From 745be501b4c68e2d6212132fd84ed2746d118529 Mon Sep 17 00:00:00 2001
From: Andrii Manzhola <103326291+Andrew1326@users.noreply.github.com>
Date: Wed, 14 Jan 2026 12:19:51 +0100
Subject: [PATCH] Fix assertion failure when duplicate client_id encountered

The assertion "We should not be processing a client id twice per update"
can fail when a process has multiple file descriptors referencing the
same DRM client (e.g., via dup(), fork(), or DRM master operations).

The kcmp syscall filters duplicate file descriptions but not distinct
file descriptions that report the same underlying DRM client_id.

This change converts the debug assertion into a runtime check that
gracefully skips duplicate entries and frees any newly allocated
cache entries to prevent memory leaks.

Fixes the crash:
  nvtop: ./src/extract_gpuinfo_amdgpu.c:964: parse_drm_fdinfo_amd:
  Assertion `!cache_entry_check && "We should not be processing a
  client id twice per update"' failed.

Applied to all affected drivers:
- AMDGPU
- Intel i915
- Intel Xe
- Qualcomm MSM (also fixed incorrect hash key usage)
- ARM Mali
---
 src/extract_gpuinfo_amdgpu.c      | 13 +++++++++----
 src/extract_gpuinfo_intel_i915.c  | 13 +++++++++----
 src/extract_gpuinfo_intel_xe.c    | 13 +++++++++----
 src/extract_gpuinfo_mali_common.c | 13 +++++++++----
 src/extract_gpuinfo_msm.c         | 15 ++++++++++-----
 5 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/src/extract_gpuinfo_amdgpu.c b/src/extract_gpuinfo_amdgpu.c
index 5c8a9bbb..075ad546 100644
--- a/src/extract_gpuinfo_amdgpu.c
+++ b/src/extract_gpuinfo_amdgpu.c
@@ -975,12 +975,17 @@ static bool parse_drm_fdinfo_amd(struct gpu_info *info, FILE *fdinfo_file, struc
     if (static_info->encode_decode_shared)
       SET_GPUINFO_PROCESS(process_info, decode_usage, process_info->decode_usage + process_info->encode_usage);
 
-#ifndef NDEBUG
-    // We should only process one fdinfo entry per client id per update
+    // Check if we already processed this client_id in the current update cycle.
+    // This can happen when a process has multiple file descriptors referencing
+    // the same DRM client (e.g., via DRM master operations).
     struct amdgpu_process_info_cache *cache_entry_check;
     HASH_FIND_CLIENT(gpu_info->current_update_process_cache, &cache_entry->client_id, cache_entry_check);
-    assert(!cache_entry_check && "We should not be processing a client id twice per update");
-#endif
+    if (cache_entry_check) {
+      // Already processed this client_id, free the entry if we allocated it
+      if (cache_entry != cache_entry_check)
+        free(cache_entry);
+      goto parse_fdinfo_exit;
+    }
 
     // Store this measurement data
     RESET_ALL(cache_entry->valid);
diff --git a/src/extract_gpuinfo_intel_i915.c b/src/extract_gpuinfo_intel_i915.c
index 1d6c13a9..52f7aab6 100644
--- a/src/extract_gpuinfo_intel_i915.c
+++ b/src/extract_gpuinfo_intel_i915.c
@@ -303,12 +303,17 @@ bool parse_drm_fdinfo_intel_i915(struct gpu_info *info, FILE *fdinfo_file, struc
     cache_entry->client_id.pdev = gpu_info->base.pdev;
   }
 
-#ifndef NDEBUG
-  // We should only process one fdinfo entry per client id per update
+  // Check if we already processed this client_id in the current update cycle.
+  // This can happen when a process has multiple file descriptors referencing
+  // the same DRM client (e.g., via DRM master operations).
   struct intel_process_info_cache *cache_entry_check;
   HASH_FIND_CLIENT(gpu_info->current_update_process_cache, &cache_entry->client_id, cache_entry_check);
-  assert(!cache_entry_check && "We should not be processing a client id twice per update");
-#endif
+  if (cache_entry_check) {
+    // Already processed this client_id, free the entry if we allocated it
+    if (cache_entry != cache_entry_check)
+      free(cache_entry);
+    goto parse_fdinfo_exit;
+  }
 
   RESET_ALL(cache_entry->valid);
   if (GPUINFO_PROCESS_FIELD_VALID(process_info, gfx_engine_used))
diff --git a/src/extract_gpuinfo_intel_xe.c b/src/extract_gpuinfo_intel_xe.c
index 4ac15567..07975f9e 100644
--- a/src/extract_gpuinfo_intel_xe.c
+++ b/src/extract_gpuinfo_intel_xe.c
@@ -254,12 +254,17 @@ bool parse_drm_fdinfo_intel_xe(struct gpu_info *info, FILE *fdinfo_file, struct
     cache_entry->client_id.pdev = gpu_info->base.pdev;
   }
 
-#ifndef NDEBUG
-  // We should only process one fdinfo entry per client id per update
+  // Check if we already processed this client_id in the current update cycle.
+  // This can happen when a process has multiple file descriptors referencing
+  // the same DRM client (e.g., via DRM master operations).
   struct intel_process_info_cache *cache_entry_check;
   HASH_FIND_CLIENT(gpu_info->current_update_process_cache, &cache_entry->client_id, cache_entry_check);
-  assert(!cache_entry_check && "We should not be processing a client id twice per update");
-#endif
+  if (cache_entry_check) {
+    // Already processed this client_id, free the entry if we allocated it
+    if (cache_entry != cache_entry_check)
+      free(cache_entry);
+    goto parse_fdinfo_exit;
+  }
 
   RESET_ALL(cache_entry->valid);
   SET_INTEL_CACHE(cache_entry, gpu_cycles, gpu_cycles);
diff --git a/src/extract_gpuinfo_mali_common.c b/src/extract_gpuinfo_mali_common.c
index 6a767da6..b9b843be 100644
--- a/src/extract_gpuinfo_mali_common.c
+++ b/src/extract_gpuinfo_mali_common.c
@@ -435,12 +435,17 @@ void mali_common_parse_fdinfo_handle_cache(struct gpu_info_mali *gpu_info,
     cache_entry->last_cycles = total_cycles;
   }
 
-#ifndef NDEBUG
-  // We should only process one fdinfo entry per client id per update
+  // Check if we already processed this client_id in the current update cycle.
+  // This can happen when a process has multiple file descriptors referencing
+  // the same DRM client (e.g., via DRM master operations).
   struct mali_process_info_cache *cache_entry_check;
   HASH_FIND_CLIENT(gpu_info->current_update_process_cache, &cache_entry->client_id, cache_entry_check);
-  assert(!cache_entry_check && "We should not be processing a client id twice per update");
-#endif
+  if (cache_entry_check) {
+    // Already processed this client_id, free the entry if we allocated it
+    if (cache_entry != cache_entry_check)
+      free(cache_entry);
+    return;
+  }
 
   RESET_ALL(cache_entry->valid);
   if (GPUINFO_PROCESS_FIELD_VALID(process_info, gfx_engine_used))
diff --git a/src/extract_gpuinfo_msm.c b/src/extract_gpuinfo_msm.c
index f05b7fe8..83c2e26b 100644
--- a/src/extract_gpuinfo_msm.c
+++ b/src/extract_gpuinfo_msm.c
@@ -357,12 +357,17 @@ static bool parse_drm_fdinfo_msm(struct gpu_info *info, FILE *fdinfo_file, struc
     cache_entry->client_id.pid = process_info->pid;
   }
 
-#ifndef NDEBUG
-  // We should only process one fdinfo entry per client id per update
+  // Check if we already processed this client_id in the current update cycle.
+  // This can happen when a process has multiple file descriptors referencing
+  // the same DRM client (e.g., via DRM master operations).
   struct msm_process_info_cache *cache_entry_check;
-  HASH_FIND_CLIENT(gpu_info->current_update_process_cache, &cid, cache_entry_check);
-  assert(!cache_entry_check && "We should not be processing a client id twice per update");
-#endif
+  HASH_FIND_CLIENT(gpu_info->current_update_process_cache, &cache_entry->client_id, cache_entry_check);
+  if (cache_entry_check) {
+    // Already processed this client_id, free the entry if we allocated it
+    if (cache_entry != cache_entry_check)
+      free(cache_entry);
+    goto parse_fdinfo_exit;
+  }
 
   RESET_ALL(cache_entry->valid);
   if (GPUINFO_PROCESS_FIELD_VALID(process_info, gfx_engine_used))