Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,19 @@ inline std::string bandwidth(pti_view_memory_record_type* activity) {
return duration == 0 ? "\"N/A\"" : fmt::format("{}", bytes * 1.0 / duration);
}

void XpuptiActivityProfilerSession::addResouceInfo(
int32_t device_id,
int32_t sycl_queue_id) {
if (std::find_if(
resourceInfo_.begin(),
resourceInfo_.end(),
[device_id, sycl_queue_id](std::pair<int32_t, int32_t> pair) {
return (pair.first == device_id) && (pair.second == sycl_queue_id);
}) == resourceInfo_.end()) {
resourceInfo_.emplace_back(device_id, sycl_queue_id);
}
}

template <class pti_view_memory_record_type>
void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities(
ActivityType activityType,
Expand Down Expand Up @@ -191,6 +204,8 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities(
trace_activity->device = getDeviceIdxFromUUID(activity->_device_uuid);
trace_activity->resource = getMappedQueueId(activity->_sycl_queue_id);
trace_activity->flow.start = 0;

addResouceInfo(trace_activity->device, trace_activity->resource);
}

if constexpr (handleMemcpyActivities || handleMemsetActivities) {
Expand Down
39 changes: 26 additions & 13 deletions libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,8 @@ std::string getXpuDeviceProperties() {
const auto& device_list = platform.get_devices();
for (size_t i = 0; i < device_list.size(); i++) {
const auto& device = device_list[i];
jsonProps.push_back(
fmt::format(
R"JSON(
jsonProps.push_back(fmt::format(
R"JSON(
{{
"id": {},
"name": "{}",
Expand All @@ -108,16 +107,16 @@ std::string getXpuDeviceProperties() {
"vendor": "{}",
"driverVersion": "{}"
}})JSON",
i,
device.get_info<sycl::info::device::name>(),
device.get_info<sycl::info::device::global_mem_size>(),
device.get_info<sycl::info::device::max_compute_units>(),
device.get_info<sycl::info::device::max_work_group_size>(),
device.get_info<sycl::info::device::max_clock_frequency>(),
device.get_info<sycl::info::device::max_mem_alloc_size>(),
device.get_info<sycl::info::device::local_mem_size>(),
device.get_info<sycl::info::device::vendor>(),
device.get_info<sycl::info::device::driver_version>()));
i,
device.get_info<sycl::info::device::name>(),
device.get_info<sycl::info::device::global_mem_size>(),
device.get_info<sycl::info::device::max_compute_units>(),
device.get_info<sycl::info::device::max_work_group_size>(),
device.get_info<sycl::info::device::max_clock_frequency>(),
device.get_info<sycl::info::device::max_mem_alloc_size>(),
device.get_info<sycl::info::device::local_mem_size>(),
device.get_info<sycl::info::device::vendor>(),
device.get_info<sycl::info::device::driver_version>()));
}
}

Expand Down Expand Up @@ -154,6 +153,20 @@ XpuptiActivityProfilerSession::getTraceBuffer() {
return std::make_unique<libkineto::CpuTraceBuffer>(std::move(traceBuffer_));
}

std::vector<libkineto::ResourceInfo>
XpuptiActivityProfilerSession::getResourceInfos() {
std::vector<libkineto::ResourceInfo> result;
for (const auto [device_id, sycl_queue_id] : resourceInfo_) {
result.emplace_back(
device_id,
sycl_queue_id,
sycl_queue_id,
fmt::format("Stream {}", sycl_queue_id));
}
resourceInfo_.clear();
return result;
}

void XpuptiActivityProfilerSession::pushCorrelationId(uint64_t id) {
xpti_.pushCorrelationID(id, XpuptiActivityApi::CorrelationFlowType::Default);
}
Expand Down
7 changes: 4 additions & 3 deletions libkineto/src/plugin/xpupti/XpuptiActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ class XpuptiActivityProfilerSession
std::unique_ptr<libkineto::DeviceInfo> getDeviceInfo() override {
return {};
}
std::vector<libkineto::ResourceInfo> getResourceInfos() override {
return {};
}
std::vector<libkineto::ResourceInfo> getResourceInfos() override;
std::unique_ptr<libkineto::CpuTraceBuffer> getTraceBuffer() override;

void pushCorrelationId(uint64_t id) override;
Expand Down Expand Up @@ -106,6 +104,8 @@ class XpuptiActivityProfilerSession
// for profiling activity creation
DeviceIndex_t getDeviceIdxFromUUID(const uint8_t deviceUUID[16]);

void addResouceInfo(int32_t device_id, int32_t sycl_queue_id);

private:
static uint32_t iterationCount_;
static std::vector<DeviceUUIDsT> deviceUUIDs_;
Expand All @@ -124,6 +124,7 @@ class XpuptiActivityProfilerSession

XpuptiActivityApi& xpti_;
libkineto::CpuTraceBuffer traceBuffer_;
std::vector<std::pair<uint32_t, uint32_t>> resourceInfo_;
std::unordered_map<uint64_t, uint64_t> sycl_queue_pool_;
std::unique_ptr<const libkineto::Config> config_{nullptr};
const std::set<ActivityType>& activity_types_;
Expand Down
69 changes: 69 additions & 0 deletions libkineto/test/xpupti/XpuptiProfilerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,72 @@ TEST(XpuptiProfilerTest, XpuDriverEvents) {
std::move(expectedActivities),
std::move(expectedTypes));
}

TEST(XpuptiProfilerTest, TestEvents) {
KN::Config cfg;

std::vector<std::string_view> metrics;

std::set<KN::ActivityType> activities{
KN::ActivityType::GPU_MEMCPY,
KN::ActivityType::GPU_MEMSET,
KN::ActivityType::CONCURRENT_KERNEL,
KN::ActivityType::EXTERNAL_CORRELATION};

std::vector<std::string_view> expectedActivities = {
"Memcpy M2D",
"Memcpy M2D",
"Memcpy M2D",
"Run(sycl::_V1::queue, ...)",
"Memcpy D2M"};

std::vector<std::string_view> expectedTypes = {
"gpu_memcpy", "gpu_memcpy", "gpu_memcpy", "kernel", "gpu_memcpy"};

constexpr unsigned repeatCount = 1;
auto [pSession, pBuffer] = RunProfilerTest(
metrics,
activities,
cfg,
repeatCount,
std::move(expectedActivities),
std::move(expectedTypes));

static bool isVerbose = IsEnvVerbose();

auto resourceInfos = pSession->getResourceInfos();
if (isVerbose) {
for (auto&& ri : resourceInfos) {
#define PRINT(R) std::cout << #R " = " << ri.R << std::endl;
PRINT(id)
PRINT(sortIndex)
PRINT(deviceId)
PRINT(name)
#undef PRINT
}
}

std::vector<unsigned> irUseCount(resourceInfos.size(), 0);
for (auto&& pActivity : pBuffer->activities) {
bool found = false;
auto resourceId = pActivity->resourceId();
auto deviceId = pActivity->deviceId();
for (unsigned i = 0; i < resourceInfos.size(); ++i) {
const auto& ri = resourceInfos[i];
if ((ri.id == resourceId) && (ri.deviceId == deviceId)) {
++irUseCount[i];
found = true;
break;
}
}

EXPECT_TRUE(found) << "resourceInfo for deviceId = " << deviceId
<< ", resourceId=" << resourceId << " not found.";
}

for (unsigned i = 0; i < resourceInfos.size(); ++i) {
EXPECT_TRUE(irUseCount[i] > 0)
<< "resourceInfo for deviceId = " << resourceInfos[i].deviceId
<< ", resourceId=" << resourceInfos[i].id << " never used.";
}
}