From 6156d774e7e6269e7a5697d6a1d7e8d27195c71d Mon Sep 17 00:00:00 2001 From: "Peter B. Robinson" Date: Tue, 3 Mar 2026 13:03:07 -0800 Subject: [PATCH] fix behavior inconsistency between CPU and GPU algorithm for IntersectKeyValueSorters --- src/care/KeyValueSorter_decl.h | 4 +--- src/care/KeyValueSorter_impl.h | 12 +++++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/care/KeyValueSorter_decl.h b/src/care/KeyValueSorter_decl.h index 9595ced8..29b886b9 100644 --- a/src/care/KeyValueSorter_decl.h +++ b/src/care/KeyValueSorter_decl.h @@ -1491,9 +1491,7 @@ void IntersectKeyValueSorters(RAJADeviceExec exec, KeyValueSorter void IntersectKeyValueSorters(RAJA::seq_exec exec, diff --git a/src/care/KeyValueSorter_impl.h b/src/care/KeyValueSorter_impl.h index bd2b1a43..7c7b4ec4 100644 --- a/src/care/KeyValueSorter_impl.h +++ b/src/care/KeyValueSorter_impl.h @@ -468,7 +468,17 @@ CARE_INLINE void IntersectKeyValueSorters(RAJADeviceExec exec, host_device_ptr searches{smaller+1}; host_device_ptr matched{smaller+1}; CARE_STREAM_LOOP(i, 0, smaller+1) { - searches[i] = i != smaller ? care::BinarySearch(largerArray, largeStart, larger, smallerArray[i+smallStart]) : -1; + if (i == smaller) { + searches[i] = -1; + } + else { + // to be consistent with CPU algorithm, find the first match + int match = care::BinarySearch(largerArray, largeStart, larger, smallerArray[i+smallStart]); + while (match > largeStart && largerArray[match-1] == largerArray[match]) { + --match; + } + searches[i] = match; + } matched[i] = i != smaller && searches[i] > -1; } CARE_STREAM_LOOP_END