From 115cfaa623369a13988bb3eece8bd430dc6d7e0c Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 11 Dec 2024 12:04:32 +0100 Subject: [PATCH] Add option to drop the whole TF and continue --- .../include/ITStracking/Configuration.h | 2 + .../tracking/include/ITStracking/TimeFrame.h | 9 ++- .../include/ITStracking/TrackingConfigParam.h | 57 ++++++++++--------- .../ITSMFT/ITS/tracking/src/TimeFrame.cxx | 21 ++++--- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 20 ++++++- .../ITS/tracking/src/TrackingInterface.cxx | 4 +- 6 files changed, 70 insertions(+), 43 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h index 82d737153c827..51f3e7a637aab 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h @@ -100,6 +100,8 @@ struct TrackingParameters { bool PerPrimaryVertexProcessing = false; bool SaveTimeBenchmarks = false; bool DoUPCIteration = false; + bool FataliseUponFailure = true; + bool DropTFUponFailure = false; /// Cluster attachment bool UseTrackFollower = false; bool UseTrackFollowerTop = false; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h index 0237f4ce9579b..46c4a8e19fa47 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h @@ -259,7 +259,7 @@ class TimeFrame void printCellLUTonLayer(int i); void printTrackletLUTs(); void printCellLUTs(); - void printROFInfo(const int rofId); + void printSliceInfo(const int, const int); IndexTableUtils mIndexTableUtils; @@ -297,6 +297,13 @@ class TimeFrame std::vector mMultiplicityCutMask; const o2::base::PropagatorImpl* mPropagatorDevice = nullptr; // Needed only for GPU + void dropTracks() + { + for (auto& v : mTracks) { + deepVectorClear(v); + } + } + protected: template void deepVectorClear(std::vector& vec) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index 68bfdb51170b5..20e9a4362b77c 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -22,13 +22,12 @@ namespace its struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper { - int nIterations = 1; // Number of vertexing passes to perform - int vertPerRofThreshold = 0; // Maximum number of vertices per ROF to trigger second a round - bool allowSingleContribClusters = false; - // Number of ROFs to be considered for the vertexing - int deltaRof = 0; + int nIterations = 1; // Number of vertexing passes to perform. + int vertPerRofThreshold = 0; // Maximum number of vertices per ROF to trigger second a iteration. + bool allowSingleContribClusters = false; // attempt to find vertices in case of a single tracklet found. + int deltaRof = 0; // Number of ROFs to be considered for the vertexing. - // geometrical cuts + // geometrical cuts for tracklet selection float zCut = 0.002f; float phiCut = 0.005f; float pairCut = 0.04f; @@ -42,12 +41,12 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper { // Use TGeo for mat. budget - bool useMatCorrTGeo = false; - bool useFastMaterial = false; - int deltaRof = 0; - float sysErrY2[7] = {0}; // systematic error^2 in Y per layer - float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer + bool useMatCorrTGeo = false; // use full geometry to corect for material budget accounting in the fits. Default is to use the material budget LUT. + bool useFastMaterial = false; // use faster material approximation for material budget accounting in the fits. + int deltaRof = 0; // configure the width of the window in ROFs to be considered for the tracking. + float sysErrY2[7] = {0}; // systematic error^2 in Y per layer + float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer float maxChi2ClusterAttachment = -1.f; float maxChi2NDF = -1.f; float nSigmaCut = -1.f; @@ -69,23 +68,25 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper 0 off - float trackFollowerNSigmaZ = 1.f; // sigma in z-cut for track-following search rectangle - float trackFollowerNSigmaPhi = 1.f; // sigma in phi-cut for track-following search rectangle + float diamondPos[3] = {0.f, 0.f, 0.f}; // override the position of the vertex + bool useDiamond = false; // enable overriding the vertex position + unsigned long maxMemory = 0; // override default protections on the maximum memory to be used by the tracking + int useTrackFollower = -1; // bit 0: allow mixing implies bits 1&2; bit 1: topwards; bit2: downwards; => 0 off + float trackFollowerNSigmaZ = 1.f; // sigma in z-cut for track-following search rectangle + float trackFollowerNSigmaPhi = 1.f; // sigma in phi-cut for track-following search rectangle float cellsPerClusterLimit = -1.f; float trackletsPerClusterLimit = -1.f; int findShortTracks = -1; - int nThreads = 1; - int nOrbitsPerIterations = 0; - int nROFsPerIterations = 0; - bool perPrimaryVertexProcessing = false; - bool saveTimeBenchmarks = false; - bool overrideBeamEstimation = false; // used by gpuwf only - int trackingMode = -1; // -1: unset, 0=sync, 1=async, 2=cosmics used by gpuwf only - bool doUPCIteration = false; + int nThreads = 1; // number of threads to perform the operations in parallel. + int nROFsPerIterations = 0; // size of the slice of ROFs to be processed at a time, preferably integer divisors of nROFs per TF, to balance the iterations. + int nOrbitsPerIterations = 0; // not implemented: size of the slice of ROFs to be processed at a time, computed using the number of ROFs per orbit. + bool perPrimaryVertexProcessing = false; // perform the full tracking considering the vertex hypotheses one at the time. + bool saveTimeBenchmarks = false; // dump metrics on file + bool overrideBeamEstimation = false; // use beam position from meanVertex CCDB object + int trackingMode = -1; // -1: unset, 0=sync, 1=async, 2=cosmics used by gpuwf only + bool doUPCIteration = false; // Perform an additional iteration for UPC events on tagged vertices. You want to combine this config with VertexerParamConfig.nIterations=2 + bool fataliseUponFailure = true; // granular management of the fatalisation in async mode + bool dropTFUponFailure = false; O2ParamDef(TrackerParamConfig, "ITSCATrackerParam"); }; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index 40a540015d75f..506656e2777d1 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -609,16 +609,19 @@ void TimeFrame::printNClsPerROF() } } -void TimeFrame::printROFInfo(const int rofId) +void TimeFrame::printSliceInfo(const int startROF, const int sliceSize) { - std::cout << "ROF " << rofId << " dump:" << std::endl; - for (int iLayer{0}; iLayer < mClusters.size(); ++iLayer) { - std::cout << "Layer " << iLayer << " has: " << getClustersOnLayer(rofId, iLayer).size() << " clusters." << std::endl; - } - std::cout << "Number of seeding vertices: " << getPrimaryVertices(rofId).size() << std::endl; - int iVertex{0}; - for (auto& v : getPrimaryVertices(rofId)) { - std::cout << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors." << std::endl; + std::cout << "Dumping slice of " << sliceSize << " rofs:" << std::endl; + for (int iROF{startROF}; iROF < startROF + sliceSize; ++iROF) { + std::cout << "ROF " << iROF << " dump:" << std::endl; + for (unsigned int iLayer{0}; iLayer < mClusters.size(); ++iLayer) { + std::cout << "Layer " << iLayer << " has: " << getClustersOnLayer(iROF, iLayer).size() << " clusters." << std::endl; + } + std::cout << "Number of seeding vertices: " << getPrimaryVertices(iROF).size() << std::endl; + int iVertex{0}; + for (auto& v : getPrimaryVertices(iROF)) { + std::cout << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors." << std::endl; + } } } diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 7b06f7c7bcc76..bc642015b1fcd 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -57,6 +57,7 @@ void Tracker::clustersToTracks(std::function logger, std::f } } + bool dropTF = false; for (int iteration = 0; iteration < (int)mTrkParams.size(); ++iteration) { if (iteration == 3 && mTrkParams[0].DoUPCIteration) { mTimeFrame->swapMasks(); @@ -75,9 +76,12 @@ void Tracker::clustersToTracks(std::function logger, std::f &Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - mTimeFrame->printROFInfo(iROFs); + mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); + if (mTrkParams[iteration].DropTFUponFailure) { + dropTF = true; + } break; } float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; @@ -91,9 +95,12 @@ void Tracker::clustersToTracks(std::function logger, std::f &Tracker::computeCells, "Cell finding", [](std::string) {}, iteration); nCells += mTraits->getTFNumberOfCells(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - mTimeFrame->printROFInfo(iROFs); + mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); + if (mTrkParams[iteration].DropTFUponFailure) { + dropTF = true; + } break; } float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; @@ -110,7 +117,7 @@ void Tracker::clustersToTracks(std::function logger, std::f &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); } iVertex++; - } while (iVertex < maxNvertices); + } while (iVertex < maxNvertices && !dropTF); logger(fmt::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); logger(fmt::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); logger(fmt::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); @@ -122,6 +129,11 @@ void Tracker::clustersToTracks(std::function logger, std::f total += timeExtending; logger(fmt::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); } + if (dropTF) { + error(fmt::format("...Dropping Timeframe...")); + mTimeFrame->dropTracks(); + break; // breaking out the iterations loop + } } total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); @@ -502,6 +514,8 @@ void Tracker::getGlobalConfiguration() params.nROFsPerIterations = nROFsPerIterations; params.PerPrimaryVertexProcessing = tc.perPrimaryVertexProcessing; params.SaveTimeBenchmarks = tc.saveTimeBenchmarks; + params.FataliseUponFailure = tc.fataliseUponFailure; + params.DropTFUponFailure = tc.dropTFUponFailure; for (int iD{0}; iD < 3; ++iD) { params.Diamond[iD] = tc.diamondPos[iD]; } diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 89ec83d0f6527..b98332b65c33a 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -271,13 +271,13 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) mTimeFrame->setROFMask(processUPCMask); // Run CA tracker if constexpr (isGPU) { - if (mMode == o2::its::TrackingMode::Async) { + if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { mTracker->clustersToTracksHybrid(logger, fatalLogger); } else { mTracker->clustersToTracksHybrid(logger, errorLogger); } } else { - if (mMode == o2::its::TrackingMode::Async) { + if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { mTracker->clustersToTracks(logger, fatalLogger); } else { mTracker->clustersToTracks(logger, errorLogger);