From 2d2f180dd9a41a7368da9227ea7234e1f767a3cd Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Mon, 18 Mar 2024 10:32:04 -0700 Subject: [PATCH 1/3] WIP: a residence proposal for tensors --- wit/wasi-nn.wit | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/wit/wasi-nn.wit b/wit/wasi-nn.wit index 3e54249..1749c14 100644 --- a/wit/wasi-nn.wit +++ b/wit/wasi-nn.wit @@ -44,7 +44,8 @@ interface tensor { type tensor-data = list; resource tensor { - constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data); + constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data, + location: option); // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To represent a tensor // containing a single value, use `[1]` for the tensor dimensions. @@ -53,9 +54,20 @@ interface tensor { // Describe the type of element in the tensor (e.g., `f32`). ty: func() -> tensor-type; - // Return the tensor data. + // Describe where the tensor is currently located (e.g., `cpu`, `gpu`, `tpu`). + location: func() -> execution-target; + + // Return the tensor data. If the tensor is located on a device other than the CPU, this + // operation may result in an expensive data copy operation. data: func() -> tensor-data; } + + /// Alternately, construct a tensor that lives exclusively on a specific device. + create_on_device: func(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data, + location: execution-target, backend: graph-encoding) -> result; + + // TODO: rename exection-target to... device? + // TODO: rename graph-encoding to... backend? } /// A `graph` is a loaded instance of a specific ML model (e.g., MobileNet) for a specific ML From b2594a9f3bdb50fc262f5d72f64e90da4d3b8749 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Fri, 29 Mar 2024 08:53:59 -0700 Subject: [PATCH 2/3] Apply feedback from ML meeting --- wit/wasi-nn.wit | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/wit/wasi-nn.wit b/wit/wasi-nn.wit index 1749c14..edc3d6b 100644 --- a/wit/wasi-nn.wit +++ b/wit/wasi-nn.wit @@ -15,8 +15,20 @@ world ml { import errors; } +/// Inference is performed on a specific `device`. +interface device { + /// Define where tensors reside and graphs execute. + enum location { + cpu, + gpu, + tpu + } +} + /// All inputs and outputs to an ML inference are represented as `tensor`s. interface tensor { + use device.{location}; + /// The dimensions of a tensor. /// /// The array length matches the tensor rank and each element in the array describes the size of @@ -44,8 +56,8 @@ interface tensor { type tensor-data = list; resource tensor { - constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data, - location: option); + /// Construct a tensor that lives on the host CPU. + constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data); // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To represent a tensor // containing a single value, use `[1]` for the tensor dimensions. @@ -55,7 +67,7 @@ interface tensor { ty: func() -> tensor-type; // Describe where the tensor is currently located (e.g., `cpu`, `gpu`, `tpu`). - location: func() -> execution-target; + location: func() -> location; // Return the tensor data. If the tensor is located on a device other than the CPU, this // operation may result in an expensive data copy operation. @@ -74,8 +86,9 @@ interface tensor { /// framework (e.g., TensorFlow): interface graph { use errors.{error}; - use tensor.{tensor}; + use device.{location}; use inference.{graph-execution-context}; + use tensor.{tensor}; /// An execution graph for performing inference (i.e., a model). resource graph { @@ -93,21 +106,15 @@ interface graph { autodetect, } - /// Define where the graph should be executed. - enum execution-target { - cpu, - gpu, - tpu - } - /// The graph initialization data. /// /// This gets bundled up into an array of buffers because implementing backends may encode their /// graph IR in parts (e.g., OpenVINO stores its IR and weights separately). type graph-builder = list; - /// Load a `graph` from an opaque sequence of bytes to use for inference. - load: func(builder: list, encoding: graph-encoding, target: execution-target) -> result; + /// Load a `graph` from an opaque sequence of bytes to use for inference on the specified device + /// `location`. + load: func(builder: list, encoding: graph-encoding, location: location) -> result; /// Load a `graph` by name. /// @@ -128,6 +135,11 @@ interface inference { /// TODO: this may no longer be necessary in WIT /// (https://github.com/WebAssembly/wasi-nn/issues/43) resource graph-execution-context { + /// Load a tensor using the graph context. Unlike the `tensor` constructor, this function + /// will co-locate the tensor data on a specific device using the graph's underlying + /// backend; this may avoid some copies, improving performance. + load-tensor: func(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data) -> result; + /// Define the inputs to use for inference. set-input: func(name: string, tensor: tensor) -> result<_, error>; From af4495e1b3429aa6f475ef52f0fe7e7a382f5dcc Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Mon, 1 Apr 2024 08:50:04 -0700 Subject: [PATCH 3/3] Apply MTR's feedback --- wit/wasi-nn.wit | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/wit/wasi-nn.wit b/wit/wasi-nn.wit index edc3d6b..fa13f2f 100644 --- a/wit/wasi-nn.wit +++ b/wit/wasi-nn.wit @@ -18,16 +18,17 @@ world ml { /// Inference is performed on a specific `device`. interface device { /// Define where tensors reside and graphs execute. - enum location { - cpu, - gpu, - tpu + record device { + name: string } + + /// List the available devices for a given backend. + available-devices: func(backend: backend) -> list; } /// All inputs and outputs to an ML inference are represented as `tensor`s. interface tensor { - use device.{location}; + use device.{device}; /// The dimensions of a tensor. /// @@ -67,26 +68,23 @@ interface tensor { ty: func() -> tensor-type; // Describe where the tensor is currently located (e.g., `cpu`, `gpu`, `tpu`). - location: func() -> location; + location: func() -> device; + + // Move the tensor to a different device. This operation may result in an expensive data + // copy. + move-to: func(device: device) -> result; // Return the tensor data. If the tensor is located on a device other than the CPU, this // operation may result in an expensive data copy operation. data: func() -> tensor-data; } - - /// Alternately, construct a tensor that lives exclusively on a specific device. - create_on_device: func(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data, - location: execution-target, backend: graph-encoding) -> result; - - // TODO: rename exection-target to... device? - // TODO: rename graph-encoding to... backend? } /// A `graph` is a loaded instance of a specific ML model (e.g., MobileNet) for a specific ML /// framework (e.g., TensorFlow): interface graph { use errors.{error}; - use device.{location}; + use device.{device}; use inference.{graph-execution-context}; use tensor.{tensor}; @@ -114,7 +112,7 @@ interface graph { /// Load a `graph` from an opaque sequence of bytes to use for inference on the specified device /// `location`. - load: func(builder: list, encoding: graph-encoding, location: location) -> result; + load: func(builder: list, encoding: graph-encoding, location: device) -> result; /// Load a `graph` by name. ///