diff --git a/libkineto/include/ActivityType.h b/libkineto/include/ActivityType.h index fd08efdac..935047755 100644 --- a/libkineto/include/ActivityType.h +++ b/libkineto/include/ActivityType.h @@ -42,15 +42,16 @@ enum class ActivityType { HPU_OP = 19, // HPU host side runtime event XPU_RUNTIME = 20, // host side xpu runtime events XPU_DRIVER = 21, // host side xpu driver events - COLLECTIVE_COMM = 22, // collective communication + XPU_SCOPE_PROFILER = 22, // XPUPTI Profiler scope for performance metrics + COLLECTIVE_COMM = 23, // collective communication // PRIVATEUSE1 Activity types are used for custom backends. // The corresponding device type is `DeviceType::PrivateUse1` in PyTorch. - PRIVATEUSE1_RUNTIME = 23, // host side privateUse1 runtime events - PRIVATEUSE1_DRIVER = 24, // host side privateUse1 driver events + PRIVATEUSE1_RUNTIME = 24, // host side privateUse1 runtime events + PRIVATEUSE1_DRIVER = 25, // host side privateUse1 driver events ENUM_COUNT = - 25, // This is to add buffer and not used for any profiling logic. Add + 26, // This is to add buffer and not used for any profiling logic. Add // your new type before it. OPTIONAL_ACTIVITY_TYPE_START = GLOW_RUNTIME, }; diff --git a/libkineto/libkineto_defs.bzl b/libkineto/libkineto_defs.bzl index 17327b912..ffdbbdb67 100644 --- a/libkineto/libkineto_defs.bzl +++ b/libkineto/libkineto_defs.bzl @@ -38,8 +38,13 @@ def get_libkineto_roctracer_srcs(with_api = True): def get_libkineto_xpupti_srcs(with_api = True): return [ "src/plugin/xpupti/XpuptiActivityApi.cpp", - "src/plugin/xpupti/XpuptiActivityProfiler.cpp", + "src/plugin/xpupti/XpuptiActivityApiV2.cpp", "src/plugin/xpupti/XpuptiActivityHandlers.cpp", + "src/plugin/xpupti/XpuptiActivityHandlersV2.cpp", + "src/plugin/xpupti/XpuptiActivityProfiler.cpp", + "src/plugin/xpupti/XpuptiActivityProfilerSession.cpp", + "src/plugin/xpupti/XpuptiActivityProfilerSessionV1.cpp", + "src/plugin/xpupti/XpuptiProfilerMacros.cpp", "src/plugin/xpupti/XpuptiScopeProfilerConfig.cpp", ] + (get_libkineto_cpu_only_srcs(with_api)) diff --git a/libkineto/src/ActivityType.cpp b/libkineto/src/ActivityType.cpp index 8cff187f4..af8858c94 100644 --- a/libkineto/src/ActivityType.cpp +++ b/libkineto/src/ActivityType.cpp @@ -40,6 +40,7 @@ static constexpr std::array map{ {"hpu_op", ActivityType::HPU_OP}, {"xpu_runtime", ActivityType::XPU_RUNTIME}, {"xpu_driver", ActivityType::XPU_DRIVER}, + {"xpu_scope_profiler", ActivityType::XPU_SCOPE_PROFILER}, {"collective_comm", ActivityType::COLLECTIVE_COMM}, {"privateuse1_runtime", ActivityType::PRIVATEUSE1_RUNTIME}, {"privateuse1_driver", ActivityType::PRIVATEUSE1_DRIVER}, diff --git a/libkineto/src/CuptiActivityProfiler.cpp b/libkineto/src/CuptiActivityProfiler.cpp index d237e9113..42476092d 100644 --- a/libkineto/src/CuptiActivityProfiler.cpp +++ b/libkineto/src/CuptiActivityProfiler.cpp @@ -24,6 +24,9 @@ #include "DeviceUtil.h" #include "KernelRegistry.h" #include "Logger.h" +#ifdef HAS_XPUPTI +#include "plugin/xpupti/XpuptiActivityProfilerSession.h" +#endif using namespace std::chrono; using std::string; diff --git a/libkineto/src/init.cpp b/libkineto/src/init.cpp index 2c14b99bd..73143cfe1 100644 --- a/libkineto/src/init.cpp +++ b/libkineto/src/init.cpp @@ -24,7 +24,7 @@ #include "EventProfilerController.h" #endif #ifdef HAS_XPUPTI -#include "plugin/xpupti/XpuptiActivityApi.h" +#include "plugin/xpupti/XpuptiActivityApiV2.h" #include "plugin/xpupti/XpuptiActivityProfiler.h" #include "plugin/xpupti/XpuptiScopeProfilerConfig.h" #endif @@ -182,19 +182,9 @@ void libkineto_init(bool cpuOnly, bool logOnError) { []() -> std::unique_ptr { auto returnCode = ptiViewGPULocalAvailable(); if (returnCode != PTI_SUCCESS) { - std::string errPrefixMsg( - "Fail to enable Kineto Profiler on XPU due to error code: "); - errPrefixMsg = errPrefixMsg + std::to_string(returnCode); -#if PTI_VERSION_AT_LEAST(0, 10) - std::string errMsg(ptiResultTypeToString(returnCode)); - throw std::runtime_error( - errPrefixMsg + std::string(". The detailed error message is: ") + - errMsg); -#else - throw std::runtime_error(errPrefixMsg); -#endif + throwXpuRuntimeError( + "Fail to enable Kineto Profiler on XPU.", returnCode); } - XpuptiScopeProfilerConfig::registerFactory(); return std::make_unique(); }); diff --git a/libkineto/src/output_json.cpp b/libkineto/src/output_json.cpp index 891813d1e..a002065f9 100644 --- a/libkineto/src/output_json.cpp +++ b/libkineto/src/output_json.cpp @@ -468,19 +468,18 @@ void ChromeTraceLogger::handleActivity(const libkineto::ITraceActivity& op) { if (!arg_values.empty()) { arg_values.append(","); } - arg_values.append( - fmt::format( - R"( "{}": {}, "{}": {}, "{}": {}, "{}": {}, "{}": {})", - kCollectiveName, - collectiveName, - kInMsgNelems, - inMsgSize, - kOutMsgNelems, - outMsgSize, - kGroupSize, - groupSize, - kDtype, - dtype)); + arg_values.append(fmt::format( + R"( "{}": {}, "{}": {}, "{}": {}, "{}": {}, "{}": {})", + kCollectiveName, + collectiveName, + kInMsgNelems, + inMsgSize, + kOutMsgNelems, + outMsgSize, + kGroupSize, + groupSize, + kDtype, + dtype)); } const auto& input_tensor_starts = collectiveRecord->getMetadataValue(std::string(kInTensorsStart)); @@ -509,13 +508,12 @@ void ChromeTraceLogger::handleActivity(const libkineto::ITraceActivity& op) { if (!arg_values.empty()) { arg_values.append(","); } - arg_values.append( - fmt::format( - R"( "{}": {}, "{}": {})", - kInSplit, - inSplitSize, - kOutSplit, - outSplitSize)); + arg_values.append(fmt::format( + R"( "{}": {}, "{}": {})", + kInSplit, + inSplitSize, + kOutSplit, + outSplitSize)); } const auto& processGroupName = collectiveRecord->getMetadataValue(std::string(kProcessGroupName)); @@ -591,16 +589,51 @@ void ChromeTraceLogger::handleActivity(const libkineto::ITraceActivity& op) { sanitizeStrForJSON(op_name); sanitizeForNonReadableChars(op_name); - // clang-format off ts = transToRelativeTime(ts); - fmt::print(traceOf_, R"JSON( + + if (op.type() == ActivityType::XPU_SCOPE_PROFILER) { + std::string metricsStr = op.metadataJson(); + std::string activityName = toString(op.type()); + fmt::print( + traceOf_, + // clang-format off + R"JSON( + {{ + "name": "{}", + "ph": "C", + "ts": {}.{:03}, + "pid": {}, + "tid": {}, + "args": {{ {} }} + }},)JSON", + // clang-format on + activityName.substr(0, activityName.find('_')), + ts / 1000, + ts % 1000, + device, + sanitizeTid(resource), + metricsStr); + } else { + fmt::print( + traceOf_, + // clang-format off + R"JSON( {{ "ph": "X", "cat": "{}", "name": "{}", "pid": {}, "tid": {}, "ts": {}.{:03}, "dur": {}.{:03}{} }},)JSON", - toString(op.type()), op_name, device, sanitizeTid(resource), - ts/1000, ts %1000, duration/1000, duration %1000, args); - // clang-format on + // clang-format on + toString(op.type()), + op_name, + device, + sanitizeTid(resource), + ts / 1000, + ts % 1000, + duration / 1000, + duration % 1000, + args); + } + if (op.flowId() > 0) { handleGenericLink(op); } diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp index e0f6437c3..15aea1502 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp @@ -8,20 +8,13 @@ #include "XpuptiActivityApi.h" -#include -#include -#include +#include namespace KINETO_NAMESPACE { constexpr size_t kBufSize(4 * 1024 * 1024); -XpuptiActivityApi& XpuptiActivityApi::singleton() { - static XpuptiActivityApi instance; - return instance; -} - -void XpuptiActivityApi::pushCorrelationID(int id, CorrelationFlowType type) { +void XpuptiActivityApiV1::pushCorrelationID(int id, CorrelationFlowType type) { #ifdef HAS_XPUPTI if (!singleton().externalCorrelationEnabled_) { return; @@ -38,7 +31,7 @@ void XpuptiActivityApi::pushCorrelationID(int id, CorrelationFlowType type) { #endif } -void XpuptiActivityApi::popCorrelationID(CorrelationFlowType type) { +void XpuptiActivityApiV1::popCorrelationID(CorrelationFlowType type) { #ifdef HAS_XPUPTI if (!singleton().externalCorrelationEnabled_) { return; @@ -68,13 +61,13 @@ static bool nextActivityRecord( return record != nullptr; } -void XpuptiActivityApi::bufferRequestedTrampoline( +void XpuptiActivityApiV1::bufferRequestedTrampoline( uint8_t** buffer, size_t* size) { singleton().bufferRequested(buffer, size); } -void XpuptiActivityApi::bufferRequested(uint8_t** buffer, size_t* size) { +void XpuptiActivityApiV1::bufferRequested(uint8_t** buffer, size_t* size) { std::lock_guard guard(mutex_); auto buf = std::make_unique(kBufSize); @@ -84,7 +77,8 @@ void XpuptiActivityApi::bufferRequested(uint8_t** buffer, size_t* size) { allocatedGpuTraceBuffers_[*buffer] = std::move(buf); } -std::unique_ptr XpuptiActivityApi::activityBuffers() { +std::unique_ptr +XpuptiActivityApiV1::activityBuffers() { { std::lock_guard guard(mutex_); if (allocatedGpuTraceBuffers_.empty()) { @@ -102,7 +96,7 @@ std::unique_ptr XpuptiActivityApi::activityBuffers() { } #ifdef HAS_XPUPTI -int XpuptiActivityApi::processActivitiesForBuffer( +int XpuptiActivityApiV1::processActivitiesForBuffer( uint8_t* buf, size_t validSize, std::function handler) { @@ -118,7 +112,7 @@ int XpuptiActivityApi::processActivitiesForBuffer( } #endif -const std::pair XpuptiActivityApi::processActivities( +const std::pair XpuptiActivityApiV1::processActivities( XpuptiActivityBufferMap& buffers, std::function handler) { std::pair res{0, 0}; @@ -132,13 +126,13 @@ const std::pair XpuptiActivityApi::processActivities( return res; } -void XpuptiActivityApi::flushActivities() { +void XpuptiActivityApiV1::flushActivities() { #ifdef HAS_XPUPTI XPUPTI_CALL(ptiFlushAllViews()); #endif } -void XpuptiActivityApi::clearActivities() { +void XpuptiActivityApiV1::clearActivities() { { std::lock_guard guard(mutex_); if (allocatedGpuTraceBuffers_.empty()) { @@ -153,14 +147,14 @@ void XpuptiActivityApi::clearActivities() { } #ifdef HAS_XPUPTI -void XpuptiActivityApi::bufferCompletedTrampoline( +void XpuptiActivityApiV1::bufferCompletedTrampoline( uint8_t* buffer, size_t size, size_t validSize) { singleton().bufferCompleted(buffer, size, validSize); } -void XpuptiActivityApi::bufferCompleted( +void XpuptiActivityApiV1::bufferCompleted( uint8_t* buffer, size_t size, size_t validSize) { @@ -202,8 +196,9 @@ static void enableSpecifcRuntimeAPIsTracing() { } #endif -void XpuptiActivityApi::enableXpuptiActivities( - const std::set& selected_activities) { +void XpuptiActivityApiV1::enableXpuptiActivities( + const std::set& selected_activities, + bool scopeProfilerActivityAccepted) { #ifdef HAS_XPUPTI XPUPTI_CALL(ptiViewSetCallbacks( bufferRequestedTrampoline, bufferCompletedTrampoline)); @@ -245,6 +240,14 @@ void XpuptiActivityApi::enableXpuptiActivities( XPUPTI_CALL(ptiViewEnable(PTI_VIEW_DRIVER_API)); break; + case ActivityType::XPU_SCOPE_PROFILER: + if (!scopeProfilerActivityAccepted) { + throw std::runtime_error( + "IntelĀ® PTI version required to use scope profiler is at least 0.15 " + "(available with IntelĀ® oneAPI in version at least 2025.3.1)."); + } + break; + case ActivityType::OVERHEAD: XPUPTI_CALL(ptiViewEnable(PTI_VIEW_COLLECTION_OVERHEAD)); break; @@ -253,7 +256,7 @@ void XpuptiActivityApi::enableXpuptiActivities( #endif } -void XpuptiActivityApi::disablePtiActivities( +void XpuptiActivityApiV1::disablePtiActivities( const std::set& selected_activities) { #ifdef HAS_XPUPTI for (const auto& activity : selected_activities) { diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityApi.h b/libkineto/src/plugin/xpupti/XpuptiActivityApi.h index 1d8f970b3..19ae73360 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityApi.h +++ b/libkineto/src/plugin/xpupti/XpuptiActivityApi.h @@ -12,35 +12,32 @@ #include "XpuptiProfilerMacros.h" #include "ActivityType.h" -#include "Config.h" #include #include -#include #include -#include -#include namespace KINETO_NAMESPACE { -class XpuptiActivityApi { +class XpuptiActivityApiV1 { public: enum CorrelationFlowType { Default, User }; - XpuptiActivityApi() = default; - XpuptiActivityApi(const XpuptiActivityApi&) = delete; - XpuptiActivityApi& operator=(const XpuptiActivityApi&) = delete; + XpuptiActivityApiV1() = default; + XpuptiActivityApiV1(const XpuptiActivityApiV1&) = delete; + XpuptiActivityApiV1& operator=(const XpuptiActivityApiV1&) = delete; - virtual ~XpuptiActivityApi() {} + virtual ~XpuptiActivityApiV1() {} - static XpuptiActivityApi& singleton(); + static XpuptiActivityApiV1& singleton(); static void pushCorrelationID(int id, CorrelationFlowType type); static void popCorrelationID(CorrelationFlowType type); void enableXpuptiActivities( - const std::set& selected_activities); + const std::set& selected_activities, + bool scopeProfilerActivityAccepted = false); void disablePtiActivities(const std::set& selected_activities); void clearActivities(); void flushActivities(); diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityApiV2.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityApiV2.cpp new file mode 100644 index 000000000..822d30c5b --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiActivityApiV2.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiActivityApiV2.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include "XpuptiScopeProfilerConfig.h" + +#include +#include +#include + +#endif + +namespace KINETO_NAMESPACE { + +XpuptiActivityApi& XpuptiActivityApi::singleton() { + static XpuptiActivityApi instance; + return instance; +} + +XpuptiActivityApiV1& XpuptiActivityApiV1::singleton() { + return XpuptiActivityApi::singleton(); +} + +} // namespace KINETO_NAMESPACE + +#if PTI_VERSION_AT_LEAST(0, 15) + +namespace KINETO_NAMESPACE { + +XpuptiActivityApi::safe_pti_scope_collection_handle_t:: + safe_pti_scope_collection_handle_t(std::exception_ptr& exceptFromDestructor) + : exceptFromDestructor_(exceptFromDestructor) { + XPUPTI_CALL(ptiMetricsScopeEnable(&handle_)); +} + +XpuptiActivityApi::safe_pti_scope_collection_handle_t:: + ~safe_pti_scope_collection_handle_t() noexcept { + try { + XPUPTI_CALL(ptiMetricsScopeDisable(handle_)); + } catch (...) { + exceptFromDestructor_ = std::current_exception(); + } +} + +void XpuptiActivityApi::enableScopeProfiler(const Config& cfg) { +#ifdef HAS_XPUPTI + uint32_t deviceCount = 0; + XPUPTI_CALL(ptiMetricsGetDevices(nullptr, &deviceCount)); + + if (deviceCount == 0) { + throw std::runtime_error("No XPU devices available"); + } + + auto devices = std::make_unique(deviceCount); + XPUPTI_CALL(ptiMetricsGetDevices(devices.get(), &deviceCount)); + + auto devicesHandles = std::make_unique(deviceCount); + for (uint32_t i = 0; i < deviceCount; ++i) { + devicesHandles[i] = devices[i]._handle; + } + + const auto& spcfg = XpuptiScopeProfilerConfig::get(cfg); + const auto& activitiesXpuptiMetrics = spcfg.activitiesXpuptiMetrics(); + + std::vector metricNames; + metricNames.reserve(activitiesXpuptiMetrics.size()); + std::transform( + activitiesXpuptiMetrics.begin(), + activitiesXpuptiMetrics.end(), + std::back_inserter(metricNames), + [](const std::string& s) { return s.c_str(); }); + + pti_metrics_scope_mode_t collectionMode = spcfg.xpuptiProfilerPerKernel() + ? PTI_METRICS_SCOPE_AUTO_KERNEL + : PTI_METRICS_SCOPE_USER; + + if (collectionMode == PTI_METRICS_SCOPE_USER) { + throw std::runtime_error( + "XPUPTI_PROFILER_ENABLE_PER_KERNEL has to be set to 1. Other variants are currently not supported."); + } + + scopeHandleOpt_.emplace(exceptFromScopeHandleDestructor_); + XPUPTI_CALL(ptiMetricsScopeConfigure( + *scopeHandleOpt_, + collectionMode, + devicesHandles.get(), + (deviceCount, 1), // Only 1 device is currently supported + metricNames.data(), + metricNames.size())); + + uint64_t expectedKernels = spcfg.xpuptiProfilerMaxScopes(); + size_t estimatedCollectionBufferSize = 0; + XPUPTI_CALL(ptiMetricsScopeQueryCollectionBufferSize( + *scopeHandleOpt_, expectedKernels, &estimatedCollectionBufferSize)); + + XPUPTI_CALL(ptiMetricsScopeSetCollectionBufferSize( + *scopeHandleOpt_, estimatedCollectionBufferSize)); +#endif +} + +void XpuptiActivityApi::disableScopeProfiler() { +#ifdef HAS_XPUPTI + scopeHandleOpt_.reset(); + if (exceptFromScopeHandleDestructor_) { + std::rethrow_exception(exceptFromScopeHandleDestructor_); + } +#endif +} + +void XpuptiActivityApi::startScopeActivity() { +#ifdef HAS_XPUPTI + if (scopeHandleOpt_) { + XPUPTI_CALL(ptiMetricsScopeStartCollection(*scopeHandleOpt_)); + } +#endif +} + +void XpuptiActivityApi::stopScopeActivity() { +#ifdef HAS_XPUPTI + if (scopeHandleOpt_) { + XPUPTI_CALL(ptiMetricsScopeStopCollection(*scopeHandleOpt_)); + } +#endif +} + +static size_t IntDivRoundUp(size_t a, size_t b) { + return (a + b - 1) / b; +} + +void XpuptiActivityApi::processScopeTrace( + std::function handler) { +#ifdef HAS_XPUPTI + if (scopeHandleOpt_) { + pti_metrics_scope_record_metadata_t metadata; + metadata._struct_size = sizeof(pti_metrics_scope_record_metadata_t); + + XPUPTI_CALL(ptiMetricsScopeGetMetricsMetadata(*scopeHandleOpt_, &metadata)); + + uint64_t collectionBuffersCount = 0; + XPUPTI_CALL(ptiMetricsScopeGetCollectionBuffersCount( + *scopeHandleOpt_, &collectionBuffersCount)); + + for (uint64_t bufferId = 0; bufferId < collectionBuffersCount; ++bufferId) { + void* collectionBuffer = nullptr; + size_t actualCollectionBufferSize = 0; + XPUPTI_CALL(ptiMetricsScopeGetCollectionBuffer( + *scopeHandleOpt_, + bufferId, + &collectionBuffer, + &actualCollectionBufferSize)); + + pti_metrics_scope_collection_buffer_properties_t metricsBufferProps; + metricsBufferProps._struct_size = + sizeof(pti_metrics_scope_collection_buffer_properties_t); + XPUPTI_CALL(ptiMetricsScopeGetCollectionBufferProperties( + *scopeHandleOpt_, collectionBuffer, &metricsBufferProps)); + + size_t requiredMetricsBufferSize = 0; + size_t recordsCount = 0; + XPUPTI_CALL(ptiMetricsScopeQueryMetricsBufferSize( + *scopeHandleOpt_, + collectionBuffer, + &requiredMetricsBufferSize, + &recordsCount)); + + if (recordsCount > 0) { + auto metricsBuffer = + std::make_unique(IntDivRoundUp( + requiredMetricsBufferSize, sizeof(pti_metrics_scope_record_t))); + + size_t actualRecordsCount = 0; + XPUPTI_CALL(ptiMetricsScopeCalculateMetrics( + *scopeHandleOpt_, + collectionBuffer, + metricsBuffer.get(), + requiredMetricsBufferSize, + &actualRecordsCount)); + + for (size_t recordId = 0; recordId < actualRecordsCount; ++recordId) { + auto record = metricsBuffer.get() + recordId; + handler(record, metadata); + } + } + } + } +#endif +} + +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityApiV2.h b/libkineto/src/plugin/xpupti/XpuptiActivityApiV2.h new file mode 100644 index 000000000..ec65b3098 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiActivityApiV2.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "XpuptiActivityApi.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include + +#include + +namespace KINETO_NAMESPACE { + +class Config; + +class XpuptiActivityApi : public XpuptiActivityApiV1 { + public: + XpuptiActivityApi() = default; + XpuptiActivityApi(const XpuptiActivityApi&) = delete; + XpuptiActivityApi& operator=(const XpuptiActivityApi&) = delete; + + virtual ~XpuptiActivityApi() {} + + static XpuptiActivityApi& singleton(); + + void enableXpuptiActivities( + const std::set& selected_activities) { + return XpuptiActivityApiV1::enableXpuptiActivities( + selected_activities, true); + } + + void enableScopeProfiler(const Config&); + void disableScopeProfiler(); + void startScopeActivity(); + void stopScopeActivity(); + + void processScopeTrace( + std::function handler); + + private: + struct safe_pti_scope_collection_handle_t { + safe_pti_scope_collection_handle_t( + std::exception_ptr& exceptFromDestructor); + ~safe_pti_scope_collection_handle_t() noexcept; + + operator pti_scope_collection_handle_t() { + return handle_; + } + + pti_scope_collection_handle_t handle_{}; + std::exception_ptr& exceptFromDestructor_; + }; + + std::optional scopeHandleOpt_; + std::exception_ptr exceptFromScopeHandleDestructor_; +}; + +} // namespace KINETO_NAMESPACE + +#else + +namespace KINETO_NAMESPACE { + +struct XpuptiActivityApi : public XpuptiActivityApiV1 { + using XpuptiActivityApiV1::XpuptiActivityApiV1; + + static XpuptiActivityApi& singleton(); +}; + +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp index 5809dd92b..24b0c59f8 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "XpuptiActivityProfiler.h" +#include "XpuptiActivityProfilerSessionV1.h" #include #include @@ -17,12 +17,12 @@ namespace KINETO_NAMESPACE { // =========== Session Private Methods ============= // -void XpuptiActivityProfilerSession::removeCorrelatedPtiActivities( +void XpuptiActivityProfilerSessionV1::removeCorrelatedPtiActivities( const ITraceActivity* act1) { correlatedPtiActivities_.erase(act1->correlationId()); } -void XpuptiActivityProfilerSession::checkTimestampOrder( +void XpuptiActivityProfilerSessionV1::checkTimestampOrder( const ITraceActivity* act1) { auto [it, inserted] = correlatedPtiActivities_.insert({act1->correlationId(), act1}); @@ -46,7 +46,7 @@ void XpuptiActivityProfilerSession::checkTimestampOrder( } } -inline bool XpuptiActivityProfilerSession::outOfRange( +inline bool XpuptiActivityProfilerSessionV1::outOfRange( const ITraceActivity* act) { bool outOfRange = act->timestamp() < captureWindowStartTime_ || (act->timestamp() + act->duration()) > captureWindowEndTime_; @@ -62,7 +62,7 @@ inline bool XpuptiActivityProfilerSession::outOfRange( return outOfRange; } -const ITraceActivity* XpuptiActivityProfilerSession::linkedActivity( +const ITraceActivity* XpuptiActivityProfilerSessionV1::linkedActivity( int32_t correlationId, const std::unordered_map& correlationMap) { const auto& it = correlationMap.find(correlationId); @@ -79,14 +79,14 @@ inline std::string handleToHexString(ze_handle_type handle) { // FIXME: Deprecate this method while activity._sycl_queue_id got correct IDs // from PTI -inline int64_t XpuptiActivityProfilerSession::getMappedQueueId( +inline int64_t XpuptiActivityProfilerSessionV1::getMappedQueueId( uint64_t sycl_queue_id) { auto [it, inserted] = sycl_queue_pool_.insert({sycl_queue_id, sycl_queue_pool_.size()}); return it->second; } -inline void XpuptiActivityProfilerSession::handleCorrelationActivity( +inline void XpuptiActivityProfilerSessionV1::handleCorrelationActivity( const pti_view_record_external_correlation* correlation) { switch (correlation->_external_kind) { case PTI_VIEW_EXTERNAL_KIND_CUSTOM_0: @@ -103,7 +103,7 @@ inline void XpuptiActivityProfilerSession::handleCorrelationActivity( } } -std::string XpuptiActivityProfilerSession::getApiName( +std::string XpuptiActivityProfilerSessionV1::getApiName( const pti_view_record_api_t* activity) { #if PTI_VERSION_AT_LEAST(0, 11) const char* api_name = nullptr; @@ -134,7 +134,7 @@ inline std::string bandwidth(pti_view_memory_record_type* activity) { } template -void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities( +void XpuptiActivityProfilerSessionV1::handleRuntimeKernelMemcpyMemsetActivities( ActivityType activityType, const pti_view_memory_record_type* activity, ActivityLogger& logger) { @@ -191,6 +191,14 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities( trace_activity->device = getDeviceIdxFromUUID(activity->_device_uuid); trace_activity->resource = getMappedQueueId(activity->_sycl_queue_id); trace_activity->flow.start = 0; + + if constexpr (handleKernelActivities) { + kernelActivities_[activity->_kernel_id].emplace( + trace_activity->startTime, + trace_activity->endTime, + trace_activity->device, + trace_activity->resource); + } } if constexpr (handleMemcpyActivities || handleMemsetActivities) { @@ -229,7 +237,7 @@ void XpuptiActivityProfilerSession::handleRuntimeKernelMemcpyMemsetActivities( trace_activity->log(logger); } -void XpuptiActivityProfilerSession::handleOverheadActivity( +void XpuptiActivityProfilerSessionV1::handleOverheadActivity( const pti_view_record_overhead* activity, ActivityLogger& logger) { traceBuffer_.emplace_activity( @@ -256,7 +264,7 @@ void XpuptiActivityProfilerSession::handleOverheadActivity( } } -void XpuptiActivityProfilerSession::handlePtiActivity( +void XpuptiActivityProfilerSessionV1::handlePtiActivity( const pti_view_record_base* record, ActivityLogger& logger) { switch (record->_view_kind) { diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityHandlersV2.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityHandlersV2.cpp new file mode 100644 index 000000000..54336eac6 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiActivityHandlersV2.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiProfilerMacros.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include "XpuptiActivityProfilerSession.h" + +namespace KINETO_NAMESPACE { + +static void AddPtiValueToMetadata( + GenericTraceActivity* scopeActivity, + const std::string& metricName, + pti_metric_value_type valueType, + const pti_value_t& value) { + switch (valueType) { +#define CASE(T, FIELD) \ + case PTI_METRIC_VALUE_TYPE_##T: \ + scopeActivity->addMetadata(metricName, value.FIELD); \ + return; + + CASE(UINT32, ui32); + CASE(UINT64, ui64); + CASE(FLOAT32, fp32); + CASE(FLOAT64, fp64); + +#undef CASE + + case PTI_METRIC_VALUE_TYPE_BOOL8: + scopeActivity->addMetadata(metricName, value.b8 ? "true" : "false"); + return; + + default: + break; + } +} + +void XpuptiActivityProfilerSession::handleScopeRecord( + const pti_metrics_scope_record_t* record, + const pti_metrics_scope_record_metadata_t& metadata, + ActivityLogger& logger) { + std::array scopeActivities{}; + + traceBuffer_.emplace_activity( + traceBuffer_.span, + ActivityType::CONCURRENT_KERNEL, + record->_kernel_name + ? fmt::format("metrics: {}", record->_kernel_name) + : fmt::format("metrics: kernel_{}", record->_kernel_id)); + + scopeActivities[0] = traceBuffer_.activities.back().get(); + + for (auto itSa = scopeActivities.begin() + 1; itSa != scopeActivities.end(); + ++itSa) { + traceBuffer_.emplace_activity( + traceBuffer_.span, ActivityType::XPU_SCOPE_PROFILER, "metrics"); + + *itSa = traceBuffer_.activities.back().get(); + } + + std::function FillActivityRecord{}; + auto it = kernelActivities_.find(record->_kernel_id); + if (it != kernelActivities_.end()) { + FillActivityRecord = [it](GenericTraceActivity* act) { + act->startTime = it->second.startTime_ - 1; + act->endTime = it->second.endTime_ + 1; + act->device = it->second.device_; + act->resource = it->second.resource_; + }; + } else { + FillActivityRecord = [this](GenericTraceActivity* act) { + act->startTime = lastKernelActivityEndTime_ + 1; + act->endTime = act->startTime + 1; + act->device = 0; + act->resource = 0; + }; + } + for (auto sa : scopeActivities) { + FillActivityRecord(sa); + } + scopeActivities[2]->startTime = scopeActivities[2]->endTime; + + if (it != kernelActivities_.end()) { + kernelActivities_.erase(it); + } + lastKernelActivityEndTime_ = scopeActivities[0]->endTime; + + scopeActivities[0]->addMetadata("kernel_id", record->_kernel_id); + scopeActivities[0]->addMetadataQuoted( + "queue", fmt::format("{}", record->_queue)); + + for (uint32_t m = 0; m < metadata._metrics_count; ++m) { + const auto& unit = metadata._metric_units[m]; + std::string unitSuffix = unit ? fmt::format(" [{}]", unit) : ""; + std::string metricName = + fmt::format("{}{}", metadata._metric_names[m], unitSuffix); + + for (auto itSa = scopeActivities.begin(); itSa != scopeActivities.end() - 1; + ++itSa) { + AddPtiValueToMetadata( + *itSa, + metricName, + metadata._value_types[m], + record->_metrics_values[m]); + } + + scopeActivities[2]->addMetadata(metricName, 0); + } + + for (auto sa : scopeActivities) { + sa->log(logger); + } +} + +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp index 4f10cc5f8..613f36034 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.cpp @@ -7,82 +7,14 @@ */ #include "XpuptiActivityProfiler.h" -#include -#include -#include -#include "XpuptiActivityApi.h" - -#include "time_since_epoch.h" +#include "XpuptiActivityApiV2.h" +#include "XpuptiActivityProfilerSession.h" +#include #include -#include -#include -#include - namespace KINETO_NAMESPACE { -using namespace std::literals::string_view_literals; - -uint32_t XpuptiActivityProfilerSession::iterationCount_ = 0; -std::vector XpuptiActivityProfilerSession::deviceUUIDs_ = {}; -std::unordered_set - XpuptiActivityProfilerSession::correlateRuntimeOps_ = { - "piextUSMEnqueueFill"sv, - "urEnqueueUSMFill"sv, - "piextUSMEnqueueFill2D"sv, - "urEnqueueUSMFill2D"sv, - "piextUSMEnqueueMemcpy"sv, - "urEnqueueUSMMemcpy"sv, - "piextUSMEnqueueMemset"sv, - "piextUSMEnqueueMemcpy2D"sv, - "urEnqueueUSMMemcpy2D"sv, - "piextUSMEnqueueMemset2D"sv, - "piEnqueueKernelLaunch"sv, - "urEnqueueKernelLaunch"sv, - "piextEnqueueKernelLaunchCustom"sv, - "urEnqueueKernelLaunchCustomExp"sv, - "piextEnqueueCooperativeKernelLaunch"sv, - "urEnqueueCooperativeKernelLaunchExp"sv}; - -// =========== Session Constructor ============= // -XpuptiActivityProfilerSession::XpuptiActivityProfilerSession( - XpuptiActivityApi& xpti, - const std::string& name, - const libkineto::Config& config, - const std::set& activity_types) - : xpti_(xpti), - name_(name), - config_(config.clone()), - activity_types_(activity_types) { - enumDeviceUUIDs(); - xpti_.enableXpuptiActivities(activity_types_); -} - -XpuptiActivityProfilerSession::~XpuptiActivityProfilerSession() { - xpti_.clearActivities(); -} - -// =========== Session Public Methods ============= // -void XpuptiActivityProfilerSession::start() { - profilerStartTs_ = - libkineto::timeSinceEpoch(std::chrono::high_resolution_clock::now()); -} - -void XpuptiActivityProfilerSession::stop() { - xpti_.disablePtiActivities(activity_types_); - profilerEndTs_ = - libkineto::timeSinceEpoch(std::chrono::high_resolution_clock::now()); -} - -void XpuptiActivityProfilerSession::toggleCollectionDynamic(const bool enable) { - if (enable) { - xpti_.enableXpuptiActivities(activity_types_); - } else { - xpti_.disablePtiActivities(activity_types_); - } -} - std::string getXpuDeviceProperties() { std::vector jsonProps; // Enumerated GPU devices from the specific platform @@ -93,9 +25,8 @@ std::string getXpuDeviceProperties() { const auto& device_list = platform.get_devices(); for (size_t i = 0; i < device_list.size(); i++) { const auto& device = device_list[i]; - jsonProps.push_back( - fmt::format( - R"JSON( + jsonProps.push_back(fmt::format( + R"JSON( {{ "id": {}, "name": "{}", @@ -108,117 +39,22 @@ std::string getXpuDeviceProperties() { "vendor": "{}", "driverVersion": "{}" }})JSON", - i, - device.get_info(), - device.get_info(), - device.get_info(), - device.get_info(), - device.get_info(), - device.get_info(), - device.get_info(), - device.get_info(), - device.get_info())); + i, + device.get_info(), + device.get_info(), + device.get_info(), + device.get_info(), + device.get_info(), + device.get_info(), + device.get_info(), + device.get_info(), + device.get_info())); } } return fmt::format("{}", fmt::join(jsonProps, ",")); } -void XpuptiActivityProfilerSession::processTrace(ActivityLogger& logger) { - traceBuffer_.span = - libkineto::TraceSpan(profilerStartTs_, profilerEndTs_, name_); - traceBuffer_.span.iteration = iterationCount_++; - auto gpuBuffer = xpti_.activityBuffers(); - if (gpuBuffer) { - xpti_.processActivities( - *gpuBuffer, - [this, &logger](const pti_view_record_base* record) -> void { - handlePtiActivity(record, logger); - }); - } -} - -void XpuptiActivityProfilerSession::processTrace( - ActivityLogger& logger, - libkineto::getLinkedActivityCallback get_linked_activity, - int64_t captureWindowStartTime, - int64_t captureWindowEndTime) { - captureWindowStartTime_ = captureWindowStartTime; - captureWindowEndTime_ = captureWindowEndTime; - cpuActivity_ = get_linked_activity; - processTrace(logger); -} - -std::unique_ptr -XpuptiActivityProfilerSession::getTraceBuffer() { - return std::make_unique(std::move(traceBuffer_)); -} - -void XpuptiActivityProfilerSession::pushCorrelationId(uint64_t id) { - xpti_.pushCorrelationID(id, XpuptiActivityApi::CorrelationFlowType::Default); -} - -void XpuptiActivityProfilerSession::popCorrelationId() { - xpti_.popCorrelationID(XpuptiActivityApi::CorrelationFlowType::Default); -} - -void XpuptiActivityProfilerSession::pushUserCorrelationId(uint64_t id) { - xpti_.pushCorrelationID(id, XpuptiActivityApi::CorrelationFlowType::User); -} - -void XpuptiActivityProfilerSession::popUserCorrelationId() { - xpti_.popCorrelationID(XpuptiActivityApi::CorrelationFlowType::User); -} - -void XpuptiActivityProfilerSession::enumDeviceUUIDs() { - if (!deviceUUIDs_.empty()) { - return; - } - auto platform_list = sycl::platform::get_platforms(); - // Enumerated GPU devices from the specific platform. - for (const auto& platform : platform_list) { - if (platform.get_backend() != sycl::backend::ext_oneapi_level_zero) { - continue; - } - auto device_list = platform.get_devices(); - for (const auto& device : device_list) { - if (device.is_gpu()) { - if (device.has(sycl::aspect::ext_intel_device_info_uuid)) { - deviceUUIDs_.push_back( - device.get_info()); - } else { - std::cerr - << "Warnings: UUID is not supported for this XPU device. The device index of records will be 0." - << std::endl; - deviceUUIDs_.push_back(DeviceUUIDsT{}); - } - } - } - } -} - -DeviceIndex_t XpuptiActivityProfilerSession::getDeviceIdxFromUUID( - const uint8_t deviceUUID[16]) { - auto it = std::find_if( - deviceUUIDs_.begin(), - deviceUUIDs_.end(), - [deviceUUID](const DeviceUUIDsT& deviceUUIDinVec) { - return std::equal( - deviceUUIDinVec.begin(), - deviceUUIDinVec.end(), - deviceUUID, - deviceUUID + 16); - }); - if (it == deviceUUIDs_.end()) { - std::cerr - << "Warnings: Can't find the legal XPU device from the given UUID." - << std::endl; - return static_cast(0); - } - return static_cast(std::distance(deviceUUIDs_.begin(), it)); -} - -// =========== ActivityProfiler Public Methods ============= // [[noreturn]] const std::set& XPUActivityProfiler::availableActivities() const { throw std::runtime_error( @@ -241,4 +77,5 @@ XPUActivityProfiler::configure( const libkineto::Config& config) { return configure(activity_types, config); } + } // namespace KINETO_NAMESPACE diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.h b/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.h index e40bc9856..475b41a28 100644 --- a/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.h +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfiler.h @@ -8,128 +8,12 @@ #pragma once -#include "XpuptiProfilerMacros.h" - #include "IActivityProfiler.h" -#include "libkineto.h" - -#include - -#include -#include -#include -#include -#include namespace KINETO_NAMESPACE { std::string getXpuDeviceProperties(); -using DeviceUUIDsT = std::array; - -class XpuptiActivityProfilerSession - : public libkineto::IActivityProfilerSession { - public: - XpuptiActivityProfilerSession() = delete; - XpuptiActivityProfilerSession( - XpuptiActivityApi& xpti, - const std::string& name, - const libkineto::Config& config, - const std::set& activity_types); - XpuptiActivityProfilerSession(const XpuptiActivityProfilerSession&) = delete; - XpuptiActivityProfilerSession& operator=( - const XpuptiActivityProfilerSession&) = delete; - - ~XpuptiActivityProfilerSession(); - - void start() override; - void stop() override; - void toggleCollectionDynamic(const bool); - std::vector errors() override { - return errors_; - }; - void processTrace(ActivityLogger& logger) override; - void processTrace( - ActivityLogger& logger, - libkineto::getLinkedActivityCallback get_linked_activity, - int64_t captureWindowStartTime, - int64_t captureWindowEndTime) override; - std::unique_ptr getDeviceInfo() override { - return {}; - } - std::vector getResourceInfos() override { - return {}; - } - std::unique_ptr getTraceBuffer() override; - - void pushCorrelationId(uint64_t id) override; - void popCorrelationId() override; - void pushUserCorrelationId(uint64_t id) override; - void popUserCorrelationId() override; - - private: - void checkTimestampOrder(const ITraceActivity* act1); - void removeCorrelatedPtiActivities(const ITraceActivity* act1); - bool outOfRange(const ITraceActivity* act); - int64_t getMappedQueueId(uint64_t sycl_queue_id); - const ITraceActivity* linkedActivity( - int32_t correlationId, - const std::unordered_map& correlationMap); - void handleCorrelationActivity( - const pti_view_record_external_correlation* correlation); - -#if PTI_VERSION_AT_LEAST(0, 11) - using pti_view_record_api_t = pti_view_record_api; -#else - using pti_view_record_api_t = pti_view_record_sycl_runtime; -#endif - - std::string getApiName(const pti_view_record_api_t* activity); - - template - void handleRuntimeKernelMemcpyMemsetActivities( - ActivityType activityType, - const pti_view_memory_record_type* activity, - ActivityLogger& logger); - - void handleOverheadActivity( - const pti_view_record_overhead* activity, - ActivityLogger& logger); - void handlePtiActivity( - const pti_view_record_base* record, - ActivityLogger& logger); - - // enumerate XPU Device UUIDs from runtime for once - void enumDeviceUUIDs(); - - // get logical device index(int8) from the given UUID from runtime - // for profiling activity creation - DeviceIndex_t getDeviceIdxFromUUID(const uint8_t deviceUUID[16]); - - private: - static uint32_t iterationCount_; - static std::vector deviceUUIDs_; - static std::unordered_set correlateRuntimeOps_; - - int64_t captureWindowStartTime_{0}; - int64_t captureWindowEndTime_{0}; - int64_t profilerStartTs_{0}; - int64_t profilerEndTs_{0}; - std::unordered_map cpuCorrelationMap_; - std::unordered_map userCorrelationMap_; - std::unordered_map correlatedPtiActivities_; - std::vector errors_; - - libkineto::getLinkedActivityCallback cpuActivity_; - - XpuptiActivityApi& xpti_; - libkineto::CpuTraceBuffer traceBuffer_; - std::unordered_map sycl_queue_pool_; - std::unique_ptr config_{nullptr}; - const std::set& activity_types_; - std::string name_; -}; - class XPUActivityProfiler : public libkineto::IActivityProfiler { public: XPUActivityProfiler() = default; diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.cpp new file mode 100644 index 000000000..cb9a80ff5 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiActivityProfilerSession.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include "XpuptiActivityApiV2.h" + +namespace KINETO_NAMESPACE { + +XpuptiActivityProfilerSession::XpuptiActivityProfilerSession( + XpuptiActivityApi& xpti, + const std::string& name, + const libkineto::Config& config, + const std::set& activity_types) + : XpuptiActivityProfilerSessionV1(xpti, name, config, activity_types) { + scopeProfilerEnabled_ = + activity_types.count(ActivityType::XPU_SCOPE_PROFILER) > 0; + if (scopeProfilerEnabled_) { + xpti_.enableScopeProfiler(*config_); + } +} + +XpuptiActivityProfilerSession::~XpuptiActivityProfilerSession() { + if (scopeProfilerEnabled_) { + xpti_.disableScopeProfiler(); + } +} + +void XpuptiActivityProfilerSession::start() { + XpuptiActivityProfilerSessionV1::start(); + if (scopeProfilerEnabled_) { + xpti_.startScopeActivity(); + } +} + +void XpuptiActivityProfilerSession::stop() { + if (scopeProfilerEnabled_) { + xpti_.stopScopeActivity(); + } + XpuptiActivityProfilerSessionV1::stop(); +} + +void XpuptiActivityProfilerSession::toggleCollectionDynamic(const bool enable) { + XpuptiActivityProfilerSessionV1::toggleCollectionDynamic(enable); + if (scopeProfilerEnabled_) { + if (enable) { + xpti_.startScopeActivity(); + } else { + xpti_.stopScopeActivity(); + } + } +} + +void XpuptiActivityProfilerSession::processTrace(ActivityLogger& logger) { + XpuptiActivityProfilerSessionV1::processTrace(logger); + if (scopeProfilerEnabled_) { + xpti_.processScopeTrace( + [this, &logger]( + const pti_metrics_scope_record_t* record, + const pti_metrics_scope_record_metadata_t& metadata) -> void { + handleScopeRecord(record, metadata, logger); + }); + } +} + +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h new file mode 100644 index 000000000..0b0f9fb35 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "XpuptiActivityProfilerSessionV1.h" + +#if PTI_VERSION_AT_LEAST(0, 15) + +#include + +namespace KINETO_NAMESPACE { + +class XpuptiActivityProfilerSession : public XpuptiActivityProfilerSessionV1 { + public: + XpuptiActivityProfilerSession( + XpuptiActivityApi& xpti, + const std::string& name, + const libkineto::Config& config, + const std::set& activity_types); + + XpuptiActivityProfilerSession(const XpuptiActivityProfilerSession&) = delete; + XpuptiActivityProfilerSession& operator=( + const XpuptiActivityProfilerSession&) = delete; + + ~XpuptiActivityProfilerSession(); + void start(); + void stop(); + void toggleCollectionDynamic(const bool enable); + + void processTrace(ActivityLogger& logger) override; + + void handleScopeRecord( + const pti_metrics_scope_record_t* record, + const pti_metrics_scope_record_metadata_t& metadata, + ActivityLogger& logger); + + private: + bool scopeProfilerEnabled_{false}; +}; + +} // namespace KINETO_NAMESPACE + +#else + +namespace KINETO_NAMESPACE { +using XpuptiActivityProfilerSession = XpuptiActivityProfilerSessionV1; +} // namespace KINETO_NAMESPACE + +#endif diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSessionV1.cpp b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSessionV1.cpp new file mode 100644 index 000000000..4679a55a4 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSessionV1.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiActivityProfilerSessionV1.h" +#include "XpuptiActivityApiV2.h" + +#include "time_since_epoch.h" + +#include + +#include +#include +#include + +namespace KINETO_NAMESPACE { + +using namespace std::literals::string_view_literals; + +uint32_t XpuptiActivityProfilerSessionV1::iterationCount_ = 0; +std::vector XpuptiActivityProfilerSessionV1::deviceUUIDs_ = {}; +std::unordered_set + XpuptiActivityProfilerSessionV1::correlateRuntimeOps_ = { + "piextUSMEnqueueFill"sv, + "urEnqueueUSMFill"sv, + "piextUSMEnqueueFill2D"sv, + "urEnqueueUSMFill2D"sv, + "piextUSMEnqueueMemcpy"sv, + "urEnqueueUSMMemcpy"sv, + "piextUSMEnqueueMemset"sv, + "piextUSMEnqueueMemcpy2D"sv, + "urEnqueueUSMMemcpy2D"sv, + "piextUSMEnqueueMemset2D"sv, + "piEnqueueKernelLaunch"sv, + "urEnqueueKernelLaunch"sv, + "piextEnqueueKernelLaunchCustom"sv, + "urEnqueueKernelLaunchCustomExp"sv, + "piextEnqueueCooperativeKernelLaunch"sv, + "urEnqueueCooperativeKernelLaunchExp"sv}; + +// =========== Session Constructor ============= // +XpuptiActivityProfilerSessionV1::XpuptiActivityProfilerSessionV1( + XpuptiActivityApi& xpti, + const std::string& name, + const libkineto::Config& config, + const std::set& activity_types) + : xpti_(xpti), + name_(name), + config_(config.clone()), + activity_types_(activity_types) { + enumDeviceUUIDs(); + xpti_.enableXpuptiActivities(activity_types_); +} + +XpuptiActivityProfilerSessionV1::~XpuptiActivityProfilerSessionV1() { + xpti_.clearActivities(); +} + +// =========== Session Public Methods ============= // +void XpuptiActivityProfilerSessionV1::start() { + profilerStartTs_ = + libkineto::timeSinceEpoch(std::chrono::high_resolution_clock::now()); +} + +void XpuptiActivityProfilerSessionV1::stop() { + xpti_.disablePtiActivities(activity_types_); + profilerEndTs_ = + libkineto::timeSinceEpoch(std::chrono::high_resolution_clock::now()); +} + +void XpuptiActivityProfilerSessionV1::toggleCollectionDynamic( + const bool enable) { + if (enable) { + xpti_.enableXpuptiActivities(activity_types_); + } else { + xpti_.disablePtiActivities(activity_types_); + } +} + +void XpuptiActivityProfilerSessionV1::processTrace(ActivityLogger& logger) { + traceBuffer_.span = + libkineto::TraceSpan(profilerStartTs_, profilerEndTs_, name_); + traceBuffer_.span.iteration = iterationCount_++; + auto gpuBuffer = xpti_.activityBuffers(); + if (gpuBuffer) { + xpti_.processActivities( + *gpuBuffer, + [this, &logger](const pti_view_record_base* record) -> void { + handlePtiActivity(record, logger); + }); + } +} + +void XpuptiActivityProfilerSessionV1::processTrace( + ActivityLogger& logger, + libkineto::getLinkedActivityCallback get_linked_activity, + int64_t captureWindowStartTime, + int64_t captureWindowEndTime) { + captureWindowStartTime_ = captureWindowStartTime; + captureWindowEndTime_ = captureWindowEndTime; + cpuActivity_ = get_linked_activity; + processTrace(logger); +} + +std::unique_ptr +XpuptiActivityProfilerSessionV1::getTraceBuffer() { + return std::make_unique(std::move(traceBuffer_)); +} + +void XpuptiActivityProfilerSessionV1::pushCorrelationId(uint64_t id) { + xpti_.pushCorrelationID(id, XpuptiActivityApi::CorrelationFlowType::Default); +} + +void XpuptiActivityProfilerSessionV1::popCorrelationId() { + xpti_.popCorrelationID(XpuptiActivityApi::CorrelationFlowType::Default); +} + +void XpuptiActivityProfilerSessionV1::pushUserCorrelationId(uint64_t id) { + xpti_.pushCorrelationID(id, XpuptiActivityApi::CorrelationFlowType::User); +} + +void XpuptiActivityProfilerSessionV1::popUserCorrelationId() { + xpti_.popCorrelationID(XpuptiActivityApi::CorrelationFlowType::User); +} + +void XpuptiActivityProfilerSessionV1::enumDeviceUUIDs() { + if (!deviceUUIDs_.empty()) { + return; + } + auto platform_list = sycl::platform::get_platforms(); + // Enumerated GPU devices from the specific platform. + for (const auto& platform : platform_list) { + if (platform.get_backend() != sycl::backend::ext_oneapi_level_zero) { + continue; + } + auto device_list = platform.get_devices(); + for (const auto& device : device_list) { + if (device.is_gpu()) { + if (device.has(sycl::aspect::ext_intel_device_info_uuid)) { + deviceUUIDs_.push_back( + device.get_info()); + } else { + std::cerr + << "Warnings: UUID is not supported for this XPU device. The device index of records will be 0." + << std::endl; + deviceUUIDs_.push_back(DeviceUUIDsT{}); + } + } + } + } +} + +DeviceIndex_t XpuptiActivityProfilerSessionV1::getDeviceIdxFromUUID( + const uint8_t deviceUUID[16]) { + auto it = std::find_if( + deviceUUIDs_.begin(), + deviceUUIDs_.end(), + [deviceUUID](const DeviceUUIDsT& deviceUUIDinVec) { + return std::equal( + deviceUUIDinVec.begin(), + deviceUUIDinVec.end(), + deviceUUID, + deviceUUID + 16); + }); + if (it == deviceUUIDs_.end()) { + std::cerr + << "Warnings: Can't find the legal XPU device from the given UUID." + << std::endl; + return static_cast(0); + } + return static_cast(std::distance(deviceUUIDs_.begin(), it)); +} + +} // namespace KINETO_NAMESPACE diff --git a/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSessionV1.h b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSessionV1.h new file mode 100644 index 000000000..4936d03e1 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiActivityProfilerSessionV1.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "XpuptiProfilerMacros.h" + +#include "IActivityProfiler.h" +#include "libkineto.h" + +#include + +#include +#include +#include +#include +#include + +namespace KINETO_NAMESPACE { + +using DeviceUUIDsT = std::array; + +class XpuptiActivityApi; + +class XpuptiActivityProfilerSessionV1 + : public libkineto::IActivityProfilerSession { + public: + XpuptiActivityProfilerSessionV1() = delete; + XpuptiActivityProfilerSessionV1( + XpuptiActivityApi& xpti, + const std::string& name, + const libkineto::Config& config, + const std::set& activity_types); + XpuptiActivityProfilerSessionV1(const XpuptiActivityProfilerSessionV1&) = + delete; + XpuptiActivityProfilerSessionV1& operator=( + const XpuptiActivityProfilerSessionV1&) = delete; + + ~XpuptiActivityProfilerSessionV1(); + + void start() override; + void stop() override; + void toggleCollectionDynamic(const bool); + std::vector errors() override { + return errors_; + }; + void processTrace(ActivityLogger& logger) override; + void processTrace( + ActivityLogger& logger, + libkineto::getLinkedActivityCallback get_linked_activity, + int64_t captureWindowStartTime, + int64_t captureWindowEndTime) override; + std::unique_ptr getDeviceInfo() override { + return {}; + } + std::vector getResourceInfos() override { + return {}; + } + std::unique_ptr getTraceBuffer() override; + + void pushCorrelationId(uint64_t id) override; + void popCorrelationId() override; + void pushUserCorrelationId(uint64_t id) override; + void popUserCorrelationId() override; + + private: + void checkTimestampOrder(const ITraceActivity* act1); + void removeCorrelatedPtiActivities(const ITraceActivity* act1); + bool outOfRange(const ITraceActivity* act); + int64_t getMappedQueueId(uint64_t sycl_queue_id); + const ITraceActivity* linkedActivity( + int32_t correlationId, + const std::unordered_map& correlationMap); + void handleCorrelationActivity( + const pti_view_record_external_correlation* correlation); + +#if PTI_VERSION_AT_LEAST(0, 11) + using pti_view_record_api_t = pti_view_record_api; +#else + using pti_view_record_api_t = pti_view_record_sycl_runtime; +#endif + + std::string getApiName(const pti_view_record_api_t* activity); + + template + void handleRuntimeKernelMemcpyMemsetActivities( + ActivityType activityType, + const pti_view_memory_record_type* activity, + ActivityLogger& logger); + + void handleOverheadActivity( + const pti_view_record_overhead* activity, + ActivityLogger& logger); + void handlePtiActivity( + const pti_view_record_base* record, + ActivityLogger& logger); + + // enumerate XPU Device UUIDs from runtime for once + void enumDeviceUUIDs(); + + // get logical device index(int8) from the given UUID from runtime + // for profiling activity creation + DeviceIndex_t getDeviceIdxFromUUID(const uint8_t deviceUUID[16]); + + protected: + static uint32_t iterationCount_; + static std::vector deviceUUIDs_; + static std::unordered_set correlateRuntimeOps_; + + int64_t captureWindowStartTime_{0}; + int64_t captureWindowEndTime_{0}; + int64_t profilerStartTs_{0}; + int64_t profilerEndTs_{0}; + std::unordered_map cpuCorrelationMap_; + std::unordered_map userCorrelationMap_; + std::unordered_map correlatedPtiActivities_; + std::vector errors_; + + libkineto::getLinkedActivityCallback cpuActivity_; + + XpuptiActivityApi& xpti_; + libkineto::CpuTraceBuffer traceBuffer_; + std::unordered_map sycl_queue_pool_; + std::unique_ptr config_{nullptr}; + const std::set& activity_types_; + std::string name_; + + struct KernelActivity { + void emplace( + int64_t startTime, + int64_t endTime, + int32_t device, + int32_t resource) { + startTime_ = startTime; + endTime_ = endTime; + device_ = device; + resource_ = resource; + } + + int64_t startTime_{0}; + int64_t endTime_{0}; + int32_t device_{0}; + int32_t resource_{0}; + }; + + std::unordered_map kernelActivities_; + uint64_t lastKernelActivityEndTime_{0}; +}; + +} // namespace KINETO_NAMESPACE diff --git a/libkineto/src/plugin/xpupti/XpuptiProfilerMacros.cpp b/libkineto/src/plugin/xpupti/XpuptiProfilerMacros.cpp new file mode 100644 index 000000000..edbb96060 --- /dev/null +++ b/libkineto/src/plugin/xpupti/XpuptiProfilerMacros.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiProfilerMacros.h" + +#include + +#include + +namespace KINETO_NAMESPACE { + +[[noreturn]] void throwXpuRuntimeError( + std::string_view errMsg, + pti_result errCode) { + auto errMsgWithCode = + fmt::format("{} The error code is {}", errMsg, static_cast(errCode)); +#if PTI_VERSION_AT_LEAST(0, 10) + errMsgWithCode = fmt::format( + "{}. The detailed error message is: {}", + errMsgWithCode, + ptiResultTypeToString(errCode)); +#endif + throw std::runtime_error(errMsgWithCode); +} + +[[noreturn]] void +throwXpuRuntimeError(const char* func, int line, pti_result errCode) { + auto errMsg = fmt::format( + "Kineto Profiler on XPU got error from function {} line {}.", func, line); + throwXpuRuntimeError(errMsg, errCode); +} + +} // namespace KINETO_NAMESPACE diff --git a/libkineto/src/plugin/xpupti/XpuptiProfilerMacros.h b/libkineto/src/plugin/xpupti/XpuptiProfilerMacros.h index 22d69f915..6ca9b0e4d 100644 --- a/libkineto/src/plugin/xpupti/XpuptiProfilerMacros.h +++ b/libkineto/src/plugin/xpupti/XpuptiProfilerMacros.h @@ -8,10 +8,9 @@ #pragma once -#include -#include +#include -#include +#include namespace KINETO_NAMESPACE { @@ -21,36 +20,20 @@ using namespace libkineto; (PTI_VERSION_MAJOR > MAJOR || \ (PTI_VERSION_MAJOR == MAJOR && PTI_VERSION_MINOR >= MINOR)) -#if PTI_VERSION_AT_LEAST(0, 10) -#define XPUPTI_CALL(returnCode) \ - { \ - if (returnCode != PTI_SUCCESS) { \ - std::string funcMsg(__func__); \ - std::string line(std::string(" line ") + std::to_string(__LINE__)); \ - std::string codeMsg = std::to_string(returnCode); \ - std::string HeadMsg("Kineto Profiler on XPU got error from function "); \ - std::string Msg(". The error code is "); \ - std::string detailMsg(". The detailed error message is "); \ - detailMsg = detailMsg + std::string(ptiResultTypeToString(returnCode)); \ - throw std::runtime_error( \ - HeadMsg + funcMsg + line + Msg + codeMsg + detailMsg); \ - } \ - } -#else -#define XPUPTI_CALL(returnCode) \ - { \ - if (returnCode != PTI_SUCCESS) { \ - std::string funcMsg(__func__); \ - std::string line(std::string(" line ") + std::to_string(__LINE__)); \ - std::string codeMsg = std::to_string(returnCode); \ - std::string HeadMsg("Kineto Profiler on XPU got error from function "); \ - std::string Msg(". The error code is "); \ - throw std::runtime_error(HeadMsg + funcMsg + line + Msg + codeMsg); \ - } \ +[[noreturn]] void throwXpuRuntimeError( + std::string_view errMsg, + pti_result errCode); + +[[noreturn]] void +throwXpuRuntimeError(const char* func, int line, pti_result errCode); + +#define XPUPTI_CALL(returnCode) \ + { \ + if (returnCode != PTI_SUCCESS) { \ + throwXpuRuntimeError(__func__, __LINE__, returnCode); \ + } \ } -#endif -class XpuptiActivityApi; using DeviceIndex_t = int8_t; } // namespace KINETO_NAMESPACE diff --git a/libkineto/test/ConfigTest.cpp b/libkineto/test/ConfigTest.cpp index 23e2ee0b2..7b809dba4 100644 --- a/libkineto/test/ConfigTest.cpp +++ b/libkineto/test/ConfigTest.cpp @@ -134,6 +134,11 @@ TEST(ParseTest, ActivityTypes) { cfg2.selectedActivityTypes(), std::set({ActivityType::XPU_RUNTIME})); + EXPECT_TRUE(cfg2.parse("ACTIVITY_TYPES = xpu_scope_profiler")); + EXPECT_EQ( + cfg2.selectedActivityTypes(), + std::set({ActivityType::XPU_SCOPE_PROFILER})); + EXPECT_TRUE( cfg2.parse("ACTIVITY_TYPES=privateuse1_Runtime,privateuse1_driver")); EXPECT_EQ( diff --git a/libkineto/test/xpupti/CMakeLists.txt b/libkineto/test/xpupti/CMakeLists.txt index febf1410a..09f872747 100644 --- a/libkineto/test/xpupti/CMakeLists.txt +++ b/libkineto/test/xpupti/CMakeLists.txt @@ -50,3 +50,4 @@ function(make_test test_file) endfunction() make_test(XpuptiProfilerTest.cpp) +make_test(XpuptiScopeProfilerTest.cpp) diff --git a/libkineto/test/xpupti/XpuptiScopeProfilerConfigTest.cpp b/libkineto/test/xpupti/XpuptiScopeProfilerConfigTest.cpp index b16d5fbfc..81ac1a73c 100644 --- a/libkineto/test/xpupti/XpuptiScopeProfilerConfigTest.cpp +++ b/libkineto/test/xpupti/XpuptiScopeProfilerConfigTest.cpp @@ -7,7 +7,6 @@ */ #include "src/plugin/xpupti/XpuptiScopeProfilerConfig.h" -#include "include/Config.h" #include #include diff --git a/libkineto/test/xpupti/XpuptiScopeProfilerTest.cpp b/libkineto/test/xpupti/XpuptiScopeProfilerTest.cpp new file mode 100644 index 000000000..8311792bb --- /dev/null +++ b/libkineto/test/xpupti/XpuptiScopeProfilerTest.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "XpuptiTestUtilities.h" + +#include "src/plugin/xpupti/XpuptiActivityProfiler.h" +#include "src/plugin/xpupti/XpuptiProfilerMacros.h" +#include "src/plugin/xpupti/XpuptiScopeProfilerConfig.h" + +#include + +#include +#include + +#include + +namespace KN = KINETO_NAMESPACE; + +class XpuptiScopeProfilerTest : public ::testing::Test { + protected: + void SetUp() override { + KN::XpuptiScopeProfilerConfig::registerFactory(); + } +}; + +void RunTest(std::string_view perKernel, unsigned maxScopes) { + KN::Config cfg; + + std::vector metrics = { + "GpuTime", + "GpuCoreClocks", + "AvgGpuCoreFrequencyMHz", + "XVE_INST_EXECUTED_ALU0_ALL_UTILIZATION", + "XVE_ACTIVE", + "XVE_STALL"}; + + EXPECT_TRUE(cfg.parse( + fmt::format("XPUPTI_PROFILER_METRICS = {}", fmt::join(metrics, ",")))); + EXPECT_TRUE(cfg.parse( + fmt::format("XPUPTI_PROFILER_ENABLE_PER_KERNEL = {}", perKernel))); + EXPECT_TRUE( + cfg.parse(fmt::format("XPUPTI_PROFILER_MAX_SCOPES = {}", maxScopes))); + + std::set activities{ + KN::ActivityType::GPU_MEMCPY, + KN::ActivityType::GPU_MEMSET, + KN::ActivityType::CONCURRENT_KERNEL, + KN::ActivityType::EXTERNAL_CORRELATION, + KN::ActivityType::XPU_RUNTIME, + KN::ActivityType::XPU_SCOPE_PROFILER}; + + std::vector expectedActivities = { + "urEnqueueMemBufferWrite", + "urEnqueueMemBufferWrite", + "urEnqueueMemBufferWrite", + "Memcpy M2D", + "Memcpy M2D", + "Memcpy M2D", + "urEnqueueKernelLaunch", + "Run(sycl::_V1::queue, ...)", + "urEnqueueMemBufferRead", + "Memcpy D2M", + "metrics: Run(sycl::_V1::queue, ...)", + "metrics", + "metrics"}; + + std::vector expectedTypes = { + "xpu_runtime", + "xpu_runtime", + "xpu_runtime", + "gpu_memcpy", + "gpu_memcpy", + "gpu_memcpy", + "xpu_runtime", + "kernel", + "xpu_runtime", + "gpu_memcpy", + "kernel", + "xpu_scope_profiler", + "xpu_scope_profiler"}; + + std::exception_ptr eptr; + + try { + constexpr unsigned repeatCount = 5; + RunProfilerTest( + metrics, + activities, + cfg, + repeatCount, + std::move(expectedActivities), + std::move(expectedTypes)); + } catch (...) { + eptr = std::current_exception(); + } + +#if PTI_VERSION_AT_LEAST(0, 15) + bool expectThrow = (perKernel == "false"); +#else + constexpr bool expectThrow = true; +#endif + + if (expectThrow) { + EXPECT_THROW( + try { + if (eptr) { + std::rethrow_exception(eptr); + } + } catch (const std::runtime_error& e) { + static bool isVerbose = IsEnvVerbose(); + if (isVerbose) { + std::cout << "std::runtime_error = " << e.what() << std::endl; + } + throw; + }, + std::runtime_error); + } else { + if (eptr) { + std::rethrow_exception(eptr); + } + } +} + +///////////////////////////////////////////////////////////////////// + +TEST_F(XpuptiScopeProfilerTest, PerKernelScope) { + RunTest("true", 314); +} + +TEST_F(XpuptiScopeProfilerTest, UserScope) { + RunTest("false", 159); +} diff --git a/libkineto/test/xpupti/XpuptiTestUtilities.cpp b/libkineto/test/xpupti/XpuptiTestUtilities.cpp index 0d78ab5cc..bc7208806 100644 --- a/libkineto/test/xpupti/XpuptiTestUtilities.cpp +++ b/libkineto/test/xpupti/XpuptiTestUtilities.cpp @@ -10,6 +10,8 @@ #include "src/plugin/xpupti/XpuptiActivityProfiler.h" +#include + #include #include @@ -216,6 +218,33 @@ RunProfilerTest( activitiesCount[*insertResult.first]++; typesCount[pActivity->type()]++; + bool isNameMetrics = pActivity->name() == "metrics"; + bool nameStartsWithMetrics = pActivity->name().find("metrics:") == 0; + auto [metricsCount, metricsMask] = + CountMetricsInString(metrics, pActivity->metadataJson()); + + switch (pActivity->type()) { + case KN::ActivityType::CONCURRENT_KERNEL: + if (nameStartsWithMetrics) + goto label_scope; + else + goto label_default; + + case KN::ActivityType::XPU_SCOPE_PROFILER: + EXPECT_TRUE(isNameMetrics); + label_scope: + EXPECT_EQ(metricsCount, metrics.size()); + EXPECT_EQ(metricsMask, (1u << metrics.size()) - 1); + ++scopeProfilerActCount; + break; + + default: + label_default: + EXPECT_FALSE(isNameMetrics); + EXPECT_EQ(metricsCount, 0); + EXPECT_EQ(metricsMask, 0); + } + if (isVerbose) { #define PRINT(A) std::cout << #A " = " << pActivity->A() << std::endl; PRINT(deviceId)