From 2d2f180dd9a41a7368da9227ea7234e1f767a3cd Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.brown@intel.com>
Date: Mon, 18 Mar 2024 10:32:04 -0700
Subject: [PATCH 1/3] WIP: a residence proposal for tensors

---
 wit/wasi-nn.wit | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/wit/wasi-nn.wit b/wit/wasi-nn.wit
index 3e54249..1749c14 100644
--- a/wit/wasi-nn.wit
+++ b/wit/wasi-nn.wit
@@ -44,7 +44,8 @@ interface tensor {
     type tensor-data = list<u8>;
 
     resource tensor {
-        constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data);
+        constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data,
+            location: option<execution-target>);
 
         // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To represent a tensor
         // containing a single value, use `[1]` for the tensor dimensions.
@@ -53,9 +54,20 @@ interface tensor {
         // Describe the type of element in the tensor (e.g., `f32`).
         ty: func() -> tensor-type;
 
-        // Return the tensor data.
+        // Describe where the tensor is currently located (e.g., `cpu`, `gpu`, `tpu`).
+        location: func() -> execution-target;
+
+        // Return the tensor data. If the tensor is located on a device other than the CPU, this
+        // operation may result in an expensive data copy operation.
         data: func() -> tensor-data;
     }
+
+    /// Alternately, construct a tensor that lives exclusively on a specific device.
+    create_on_device: func(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data,
+        location: execution-target, backend: graph-encoding) -> result<tensor, error>;
+
+    // TODO: rename exection-target to... device?
+    // TODO: rename graph-encoding to... backend?
 }
 
 /// A `graph` is a loaded instance of a specific ML model (e.g., MobileNet) for a specific ML

From b2594a9f3bdb50fc262f5d72f64e90da4d3b8749 Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.brown@intel.com>
Date: Fri, 29 Mar 2024 08:53:59 -0700
Subject: [PATCH 2/3] Apply feedback from ML meeting

---
 wit/wasi-nn.wit | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/wit/wasi-nn.wit b/wit/wasi-nn.wit
index 1749c14..edc3d6b 100644
--- a/wit/wasi-nn.wit
+++ b/wit/wasi-nn.wit
@@ -15,8 +15,20 @@ world ml {
     import errors;
 }
 
+/// Inference is performed on a specific `device`.
+interface device {
+    /// Define where tensors reside and graphs execute.
+    enum location {
+        cpu,
+        gpu,
+        tpu
+    }
+}
+
 /// All inputs and outputs to an ML inference are represented as `tensor`s.
 interface tensor {
+    use device.{location};
+
     /// The dimensions of a tensor.
     ///
     /// The array length matches the tensor rank and each element in the array describes the size of
@@ -44,8 +56,8 @@ interface tensor {
     type tensor-data = list<u8>;
 
     resource tensor {
-        constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data,
-            location: option<execution-target>);
+        /// Construct a tensor that lives on the host CPU.
+        constructor(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data);
 
         // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To represent a tensor
         // containing a single value, use `[1]` for the tensor dimensions.
@@ -55,7 +67,7 @@ interface tensor {
         ty: func() -> tensor-type;
 
         // Describe where the tensor is currently located (e.g., `cpu`, `gpu`, `tpu`).
-        location: func() -> execution-target;
+        location: func() -> location;
 
         // Return the tensor data. If the tensor is located on a device other than the CPU, this
         // operation may result in an expensive data copy operation.
@@ -74,8 +86,9 @@ interface tensor {
 /// framework (e.g., TensorFlow):
 interface graph {
     use errors.{error};
-    use tensor.{tensor};
+    use device.{location};
     use inference.{graph-execution-context};
+    use tensor.{tensor};
 
     /// An execution graph for performing inference (i.e., a model).
     resource graph {
@@ -93,21 +106,15 @@ interface graph {
         autodetect,
     }
 
-    /// Define where the graph should be executed.
-    enum execution-target {
-        cpu,
-        gpu,
-        tpu
-    }
-
     /// The graph initialization data.
     ///
     /// This gets bundled up into an array of buffers because implementing backends may encode their
     /// graph IR in parts (e.g., OpenVINO stores its IR and weights separately).
     type graph-builder = list<u8>;
 
-    /// Load a `graph` from an opaque sequence of bytes to use for inference.
-    load: func(builder: list<graph-builder>, encoding: graph-encoding, target: execution-target) -> result<graph, error>;
+    /// Load a `graph` from an opaque sequence of bytes to use for inference on the specified device
+    /// `location`.
+    load: func(builder: list<graph-builder>, encoding: graph-encoding, location: location) -> result<graph, error>;
 
     /// Load a `graph` by name.
     ///
@@ -128,6 +135,11 @@ interface inference {
     /// TODO: this may no longer be necessary in WIT
     /// (https://github.com/WebAssembly/wasi-nn/issues/43)
     resource graph-execution-context {
+        /// Load a tensor using the graph context. Unlike the `tensor` constructor, this function
+        /// will co-locate the tensor data on a specific device using the graph's underlying
+        /// backend; this may avoid some copies, improving performance.
+        load-tensor: func(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data) -> result<tensor, error>;
+
         /// Define the inputs to use for inference.
         set-input: func(name: string, tensor: tensor) -> result<_, error>;
 

From af4495e1b3429aa6f475ef52f0fe7e7a382f5dcc Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.brown@intel.com>
Date: Mon, 1 Apr 2024 08:50:04 -0700
Subject: [PATCH 3/3] Apply MTR's feedback

---
 wit/wasi-nn.wit | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/wit/wasi-nn.wit b/wit/wasi-nn.wit
index edc3d6b..fa13f2f 100644
--- a/wit/wasi-nn.wit
+++ b/wit/wasi-nn.wit
@@ -18,16 +18,17 @@ world ml {
 /// Inference is performed on a specific `device`.
 interface device {
     /// Define where tensors reside and graphs execute.
-    enum location {
-        cpu,
-        gpu,
-        tpu
+    record device {
+        name: string
     }
+
+    /// List the available devices for a given backend.
+    available-devices: func(backend: backend) -> list<device>;
 }
 
 /// All inputs and outputs to an ML inference are represented as `tensor`s.
 interface tensor {
-    use device.{location};
+    use device.{device};
 
     /// The dimensions of a tensor.
     ///
@@ -67,26 +68,23 @@ interface tensor {
         ty: func() -> tensor-type;
 
         // Describe where the tensor is currently located (e.g., `cpu`, `gpu`, `tpu`).
-        location: func() -> location;
+        location: func() -> device;
+
+        // Move the tensor to a different device. This operation may result in an expensive data
+        // copy.
+        move-to: func(device: device) -> result<tensor, error>;
 
         // Return the tensor data. If the tensor is located on a device other than the CPU, this
         // operation may result in an expensive data copy operation.
         data: func() -> tensor-data;
     }
-
-    /// Alternately, construct a tensor that lives exclusively on a specific device.
-    create_on_device: func(dimensions: tensor-dimensions, ty: tensor-type, data: tensor-data,
-        location: execution-target, backend: graph-encoding) -> result<tensor, error>;
-
-    // TODO: rename exection-target to... device?
-    // TODO: rename graph-encoding to... backend?
 }
 
 /// A `graph` is a loaded instance of a specific ML model (e.g., MobileNet) for a specific ML
 /// framework (e.g., TensorFlow):
 interface graph {
     use errors.{error};
-    use device.{location};
+    use device.{device};
     use inference.{graph-execution-context};
     use tensor.{tensor};
 
@@ -114,7 +112,7 @@ interface graph {
 
     /// Load a `graph` from an opaque sequence of bytes to use for inference on the specified device
     /// `location`.
-    load: func(builder: list<graph-builder>, encoding: graph-encoding, location: location) -> result<graph, error>;
+    load: func(builder: list<graph-builder>, encoding: graph-encoding, location: device) -> result<graph, error>;
 
     /// Load a `graph` by name.
     ///