mathieuripert · mathieuripert · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025
diff --git a/lib/chat_models/chat_google_ai.ex b/lib/chat_models/chat_google_ai.ex
@@ -603,9 +603,11 @@ defmodule LangChain.ChatModels.ChatGoogleAI do
     # Google is odd in that it returns token usage for each MessageDelta as it
     # goes, incrementing the number of generated tokens. I haven't seen anyone
     # else do this. For now, we fire each and every TokenUsage we receive.
-    case get_token_usage(data) do
-      %TokenUsage{} = token_usage ->
-        Callbacks.fire(model.callbacks, :on_llm_token_usage, [token_usage])
+    token_usage = get_token_usage(data)
+
+    case token_usage do
+      %TokenUsage{} = usage ->
+        Callbacks.fire(model.callbacks, :on_llm_token_usage, [usage])
         :ok
 
       nil ->
@@ -614,6 +616,7 @@ defmodule LangChain.ChatModels.ChatGoogleAI do
 
     candidates
     |> Enum.map(&do_process_response(model, &1, message_type))
+    |> Enum.map(&TokenUsage.set(&1, token_usage))
   end
 
   # Function Call in a Message

diff --git a/test/chat_models/chat_google_ai_test.exs b/test/chat_models/chat_google_ai_test.exs
@@ -619,6 +619,79 @@ defmodule ChatModels.ChatGoogleAITest do
       assert error.type == "unexpected_response"
       assert error.message == "Unexpected response"
     end
+
+    test "handles receiving a message with token usage", %{model: model} do
+      response = %{
+        "candidates" => [
+          %{
+            "content" => %{"role" => "model", "parts" => [%{"text" => "Hello User!"}]},
+            "finishReason" => "STOP",
+            "index" => 0
+          }
+        ],
+        "usageMetadata" => %{
+          "promptTokenCount" => 10,
+          "candidatesTokenCount" => 5,
+          "totalTokenCount" => 15
+        }
+      }
+
+      assert [%Message{} = struct] = ChatGoogleAI.do_process_response(model, response)
+      assert struct.role == :assistant
+      [%ContentPart{type: :text, content: "Hello User!"}] = struct.content
+      assert struct.index == 0
+      assert struct.status == :complete
+
+      # Verify that token usage is properly included in metadata
+      assert %TokenUsage{} = struct.metadata.usage
+      assert struct.metadata.usage.input == 10
+      assert struct.metadata.usage.output == 5
+
+      assert struct.metadata.usage.raw == %{
+               "promptTokenCount" => 10,
+               "candidatesTokenCount" => 5,
+               "totalTokenCount" => 15
+             }
+    end
+
+    test "handles receiving MessageDelta with token usage", %{model: model} do
+      response = %{
+        "candidates" => [
+          %{
+            "content" => %{
+              "role" => "model",
+              "parts" => [%{"text" => "This is a partial message"}]
+            },
+            "finishReason" => "STOP",
+            "index" => 0
+          }
+        ],
+        "usageMetadata" => %{
+          "promptTokenCount" => 8,
+          "candidatesTokenCount" => 3,
+          "totalTokenCount" => 11
+        }
+      }
+
+      assert [%MessageDelta{} = struct] =
+               ChatGoogleAI.do_process_response(model, response, MessageDelta)
+
+      assert struct.role == :assistant
+      assert struct.content == "This is a partial message"
+      assert struct.index == 0
+      assert struct.status == :incomplete
+
+      # Verify that token usage is properly included in metadata
+      assert %TokenUsage{} = struct.metadata.usage
+      assert struct.metadata.usage.input == 8
+      assert struct.metadata.usage.output == 3
+
+      assert struct.metadata.usage.raw == %{
+               "promptTokenCount" => 8,
+               "candidatesTokenCount" => 3,
+               "totalTokenCount" => 11
+             }
+    end
   end
 
   describe "filter_parts_for_types/2" do