From 3e9e10eb1156be7185fe8e34bc1b25d34360443d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 13 Nov 2024 09:33:17 +0100 Subject: [PATCH 01/11] GPU TPC: Reject clusters with too small radius during refit instead of giving them IFC mask errors --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 26 ++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 0b2da89b79ad5..106a222862f49 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -131,6 +131,7 @@ AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Fin AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger") AddOptionRTC(nWaysOuter, int8_t, 0, "", 0, "Store outer param") AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits") +AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 0, "", 0, "Reject clusters that get the IFC mask error during refit") AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128") AddOptionRTC(dEdxTruncHigh, uint8_t, 77, "", 0, "High truncation threshold, fraction of 128") AddOptionRTC(globalTracking, int8_t, 1, "", 0, "Enable Global Tracking (prolong tracks to adjacent sectors to find short segments)") diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 13244dcb4b621..0b1c282f3b2f0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -68,7 +68,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ GPUTPCGMPropagator prop; gputpcgmmergertypes::InterpolationErrors interpolation; prop.SetMaterialTPC(); - prop.SetPolynomialField(&merger->Param().polynomialField); + prop.SetPolynomialField(¶m.polynomialField); prop.SetMaxSinPhi(maxSinPhi); prop.SetToyMCEventsFlag(param.par.toyMCEventsFlag); if ((clusters[0].slice < 18) == (clusters[N - 1].slice < 18)) { @@ -157,7 +157,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ uint8_t clusterState = clusters[ihit].state; const float clAlpha = param.Alpha(clusters[ihit].slice); float xx, yy, zz; - if (merger->Param().par.earlyTpcTransform) { + if (param.par.earlyTpcTransform) { const float zOffset = (clusters[ihit].slice < 18) == (clusters[0].slice < 18) ? mTZOffset : -mTZOffset; xx = clustersXYZ[ihit].x; yy = clustersXYZ[ihit].y; @@ -177,6 +177,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } + if (param.rec.tpc.rejectIFCLowRadiusCluster) { + const float r2 = xx * xx + yy * yy; + const float rmax = (83.5f + param.rec.tpc.sysClusErrorMinDist); + if (r2 < rmax * rmax) { + MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagRejectErr); + } + } + const auto& cluster = clusters[ihit]; bool changeDirection = (cluster.leg - lastLeg) & 1; @@ -212,7 +220,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - bool dodEdx = merger->Param().par.dodEdx && merger->Param().dodEdxDownscaled && merger->Param().rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; + bool dodEdx = param.par.dodEdx && param.dodEdxDownscaled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.slice, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - 1, param); @@ -323,7 +331,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } #endif GPUCA_DEBUG_STREAMER_CHECK(GPUTPCGMPropagator::DebugStreamerVals debugVals;); - if (merger->Param().rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && merger->Param().rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification + if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification retVal = GPUTPCGMPropagator::updateErrorEdgeCluster; } else { const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; @@ -358,11 +366,11 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ ihitStart = ihit; float dy = mP[0] - prop.Model().Y(); float dz = mP[1] - prop.Model().Z(); - if (CAMath::Abs(mP[4]) * merger->Param().qptB5Scaler > 10 && --resetT0 <= 0 && CAMath::Abs(mP[2]) < 0.15f && dy * dy + dz * dz > 1) { + if (CAMath::Abs(mP[4]) * param.qptB5Scaler > 10 && --resetT0 <= 0 && CAMath::Abs(mP[2]) < 0.15f && dy * dy + dz * dz > 1) { CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if (merger->Param().par.dodEdx && merger->Param().dodEdxDownscaled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg && !(clusterState & GPUTPCGMMergedTrackHit::flagEdge)) { + if (param.par.dodEdx && param.dodEdxDownscaled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg && !(clusterState & GPUTPCGMMergedTrackHit::flagEdge)) { float qtot = 0, qmax = 0, pad = 0, relTime = 0; const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { @@ -404,16 +412,16 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ o2::utils::DebugStreamer::instance()->getStreamer("debug_accept_track", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("debug_accept_track").data() << "iTrk=" << iTrk << "outerParam=" << *outerParam << "track=" << this << "ihitStart=" << ihitStart << "\n"; }) - if (!(N + NTolerated >= GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(mP[4] * merger->Param().qptB5Scaler) && 2 * NTolerated <= CAMath::Max(10, N) && CheckNumericalQuality(covYYUpd))) { + if (!(N + NTolerated >= GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(mP[4] * param.qptB5Scaler) && 2 * NTolerated <= CAMath::Max(10, N) && CheckNumericalQuality(covYYUpd))) { return false; // TODO: NTolerated should never become that large, check what is going wrong! } - if (merger->Param().rec.tpc.minNClustersFinalTrack != -1 && N + NTolerated < merger->Param().rec.tpc.minNClustersFinalTrack) { + if (param.rec.tpc.minNClustersFinalTrack != -1 && N + NTolerated < param.rec.tpc.minNClustersFinalTrack) { return false; } // TODO: we have looping tracks here with 0 accepted clusters in the primary leg. In that case we should refit the track using only the primary leg. - if (merger->Param().par.dodEdx && merger->Param().dodEdxDownscaled) { + if (param.par.dodEdx && param.dodEdxDownscaled) { dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); } Alpha = prop.GetAlpha(); From 8e2e28a2f7a47e9bc6e485b298ff815ba2e6ed58 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 11 Nov 2024 14:11:45 +0100 Subject: [PATCH 02/11] Add empty streaming operator, so that std::cout << SMatrixGPU() does not fail --- Common/MathUtils/include/MathUtils/SMatrixGPU.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 60965a4fa2776..2bfdcf54752b2 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -29,6 +29,7 @@ #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" #include "GPUCommonLogger.h" +#include "GPUCommonTypeTraits.h" namespace o2::math_utils::detail { @@ -468,6 +469,9 @@ class SMatrixGPU GPUd() const T& operator()(unsigned int i, unsigned int j) const; GPUd() T& operator()(unsigned int i, unsigned int j); + template + GPUd() friend X& operator<<(Y& y, const SMatrixGPU&); + class SMatrixRowGPU { public: @@ -512,6 +516,13 @@ class SMatrixGPU R mRep; }; +template + requires(sizeof(typename X::traits_type::pos_type) != 0) // do not provide a template to fair::Logger, etc... (pos_type is a member type of all std::ostream classes) +GPUd() X& operator<<(Y& y, const SMatrixGPU&) +{ + return y; +} + template GPUdi() SMatrixGPU::SMatrixGPU(SMatrixIdentity) { From 5e7a6b0c15fa2d90d46888ea44221d2db227c643 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 11 Nov 2024 14:12:30 +0100 Subject: [PATCH 03/11] GPU: Fix includes of certain headers (fix order, avoid ROOT in GPU code) --- GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx | 5 ++--- GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx | 1 + GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx | 3 +-- GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h | 2 +- GPU/GPUTracking/Interface/GPUO2InterfaceQA.cxx | 2 +- GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx b/GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx index a632bf361498c..002bb1ed9e9d7 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx +++ b/GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx @@ -12,14 +12,13 @@ /// \file CalibdEdxContainer.cxx /// \author Matthias Kleiner -#include "CalibdEdxContainer.h" - -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) +#if !defined(GPUCA_STANDALONE) #include "TFile.h" #include "TPCBase/CalDet.h" #include "Framework/Logger.h" #include "clusterFinderDefs.h" #endif +#include "CalibdEdxContainer.h" using namespace GPUCA_NAMESPACE::gpu; using namespace o2::tpc; diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx index 548bbafae686d..533763e14c6d7 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx @@ -9,6 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include "Rtypes.h" #include "CalibdEdxTrackTopologyPol.h" #include diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx index 4c6e750355397..3b0e718026536 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx @@ -14,11 +14,10 @@ /// /// \author Matthias Kleiner -#include "CalibdEdxTrackTopologySpline.h" - #if !defined(GPUCA_STANDALONE) #include "TFile.h" #endif +#include "CalibdEdxTrackTopologySpline.h" using namespace GPUCA_NAMESPACE::gpu; using namespace o2::tpc; diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h index 563872fb90d4d..d9d4b9e35592d 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h @@ -19,12 +19,12 @@ #include "FlatObject.h" #include "Spline.h" +#include "GPUCommonRtypes.h" #ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/Defs.h" #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation -#include "Rtypes.h" // for ClassDefNV #include #endif diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceQA.cxx b/GPU/GPUTracking/Interface/GPUO2InterfaceQA.cxx index db6df3f9f1ede..7005fbb3bab25 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceQA.cxx +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceQA.cxx @@ -12,11 +12,11 @@ /// \file GPUO2InterfaceQA.cxx /// \author David Rohr +#include "TGraphAsymmErrors.h" #include "GPUParam.h" #include "GPUQA.h" #include "GPUO2InterfaceConfiguration.h" #include "GPUO2InterfaceQA.h" -#include "TGraphAsymmErrors.h" using namespace o2::gpu; using namespace o2::tpc; diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx index 6baea86f05d36..f7e3bca47a0fc 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx @@ -12,9 +12,9 @@ /// \file GPUTrackingRefitKernel.cxx /// \author David Rohr +#include "GPUROOTDump.h" #include "GPUTrackingRefitKernel.h" #include "GPUTrackingRefit.h" -#include "GPUROOTDump.h" using namespace GPUCA_NAMESPACE::gpu; From 71faa853dba5198907cfca9c06feb37f6b5335e6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 11 Nov 2024 14:36:09 +0100 Subject: [PATCH 04/11] FST: Force correct number of orbits to gpu-reco --- prodtests/full_system_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/prodtests/full_system_test.sh b/prodtests/full_system_test.sh index f8b6d66ce87e4..8d6a0ca3cf1f9 100755 --- a/prodtests/full_system_test.sh +++ b/prodtests/full_system_test.sh @@ -227,6 +227,7 @@ if [[ ${RANS_OPT:-} =~ (--ans-version +)(compat) ]] ; then # for decoding we use either just produced or externally provided common local file export ARGS_EXTRA_PROCESS_o2_ctf_reader_workflow+="--ctf-dict $CTFDICTFILE" fi +export CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow+="GPU_global.overrideNHbfPerTF=$NHBPERTF;" for STAGE in $STAGES; do logfile=reco_${STAGE}.log From e9587cf6137569d7c6994abebd09cb288103a40a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 11 Nov 2024 14:36:55 +0100 Subject: [PATCH 05/11] Calibration aggregator-workflow.sh: Update default lanes/threads for TPC IDC calib --- prodtests/full-system-test/aggregator-workflow.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prodtests/full-system-test/aggregator-workflow.sh b/prodtests/full-system-test/aggregator-workflow.sh index 4c20e901a2978..23336cafffab8 100755 --- a/prodtests/full-system-test/aggregator-workflow.sh +++ b/prodtests/full-system-test/aggregator-workflow.sh @@ -295,8 +295,8 @@ fi # TPC IDCs and SAC crus="0-359" # to be used with $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC or ALL -lanesFactorize=${O2_TPC_IDC_FACTORIZE_NLANES:-10} -threadFactorize=${O2_TPC_IDC_FACTORIZE_NTHREADS:-8} +lanesFactorize=${O2_TPC_IDC_FACTORIZE_NLANES:-12} +threadFactorize=${O2_TPC_IDC_FACTORIZE_NTHREADS:-16} nTFs=$((1000 * 128 / ${NHBPERTF})) nTFs_SAC=$((1000 * 128 / ${NHBPERTF})) nBuffer=$((100 * 128 / ${NHBPERTF})) From b162faae42c624387ed0f2e156361ff9d10e229e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 11 Nov 2024 18:06:24 +0100 Subject: [PATCH 06/11] GPU Display: Fix race condition --- GPU/GPUTracking/display/render/GPUDisplayDraw.cxx | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index ab7ebf6811766..746c41938e2e1 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -27,6 +27,7 @@ #include "GPUTPCGMPropagator.h" #include "GPUTPCMCInfo.h" #include "GPUParam.inc" +#include "GPUCommonMath.h" #include @@ -66,8 +67,12 @@ inline void GPUDisplay::insertVertexList(int32_t iSlice, size_t first, size_t la inline void GPUDisplay::drawPointLinestrip(int32_t iSlice, int32_t cid, int32_t id, int32_t id_limit) { mVertexBuffer[iSlice].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); - if (mGlobalPos[cid].w < id_limit) { - mGlobalPos[cid].w = id; + float curVal; + while ((curVal = mGlobalPos[cid].w) < id_limit) { + if (GPUCommonMath::AtomicCAS(&mGlobalPos[cid].w, curVal, (float)id)) { + break; + } + curVal = mGlobalPos[cid].w; } } From d7e0151da5af30685923def5190fd5f4c4466ffe Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 13 Nov 2024 09:30:47 +0100 Subject: [PATCH 07/11] GPU: Split NDPiecewisePolynomials in header and inc file, get rid of ROOT in the header --- .../DataTypes/CalibdEdxTrackTopologyPol.cxx | 1 + .../NDPiecewisePolynomials.h | 351 +++--------------- .../NDPiecewisePolynomials.inc | 276 ++++++++++++++ .../test/testMultivarPolynomials.cxx | 2 +- 4 files changed, 330 insertions(+), 300 deletions(-) create mode 100644 GPU/TPCFastTransformation/NDPiecewisePolynomials.inc diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx index 533763e14c6d7..47a6e4cff72df 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx @@ -21,6 +21,7 @@ using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation +#include "NDPiecewisePolynomials.inc" void CalibdEdxTrackTopologyPol::dumpToTree(const uint32_t nSamplingPoints[/* Dim */], const char* outName) const { for (uint32_t i = 0; i < FFits; i++) { diff --git a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h index 6de2bc7afbae8..9498645b76220 100644 --- a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h +++ b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h @@ -20,17 +20,12 @@ #include "MultivariatePolynomialHelper.h" #include "GPUCommonMath.h" -#if !defined(GPUCA_GPUCODE) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) #include -#if !defined(GPUCA_STANDALONE) -#include "TLinearFitter.h" -#ifndef GPUCA_ALIROOT_LIB -#include "CommonUtils/TreeStreamRedirector.h" -#endif -#include -#endif #endif +class TFile; + namespace GPUCA_NAMESPACE::gpu { @@ -81,23 +76,20 @@ template class NDPiecewisePolynomials : public FlatObject { public: -#ifndef GPUCA_GPUCODE +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// constructor /// \param min minimum coordinates of the grid /// \param max maximum coordinates of the grid (note: the resulting polynomials can NOT be evaluated at the maximum coordinates: only at min <= X < max) /// \param n number of vertices: defines number of fits per dimension: nFits = n - 1. n should be at least 2 to perform one fit NDPiecewisePolynomials(const float min[/* Dim */], const float max[/* Dim */], const uint32_t n[/* Dim */]) { init(min, max, n); } -#endif -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// constructor construct and object by initializing it from an object stored in a Root file /// \param fileName name of the file /// \param name name of the object NDPiecewisePolynomials(const char* fileName, const char* name) { - TFile f(fileName, "READ"); - loadFromFile(f, name); + loadFromFile(fileName, name); }; -#endif +#endif // !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// default constructor NDPiecewisePolynomials() CON_DEFAULT; @@ -115,7 +107,7 @@ class NDPiecewisePolynomials : public FlatObject /// move flat buffer to new location /// \param newBufferPtr new buffer location void moveBufferTo(char* newBufferPtr); -#endif +#endif // !defined(GPUCA_GPUCODE) /// destroy the object (release internal flat buffer) void destroy(); @@ -168,17 +160,16 @@ class NDPiecewisePolynomials : public FlatObject /// \return returns the parameters of the coefficients GPUd() const float* getParams() const { return mParams; } -#if !defined(GPUCA_GPUCODE) - /// Setting directly the parameters of the polynomials - void setParams(const float params[/* getNParameters() */]) { std::copy(params, params + getNParameters(), mParams); } - /// initalize the members /// \param min minimum coordinates of the grid /// \param max maximum coordinates of the grid (note: the resulting polynomials can NOT be evaluated at the maximum coordinates: only at min <= X < max) /// \param n number of vertices: defines number of fits per dimension: nFits = n - 1. n should be at least 2 to perform one fit void init(const float min[/* Dim */], const float max[/* Dim */], const uint32_t n[/* Dim */]); -#ifndef GPUCA_STANDALONE +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) + /// Setting directly the parameters of the polynomials + void setParams(const float params[/* getNParameters() */]) { std::copy(params, params + getNParameters(), mParams); } + /// perform the polynomial fits on the grid /// \param func function which returns for every input x on the defined grid the true value /// \param nAuxiliaryPoints number of points which will be used for the fits (should be at least 2) @@ -194,6 +185,8 @@ class NDPiecewisePolynomials : public FlatObject /// \param name name of the object in the file void loadFromFile(TFile& inpf, const char* name); + void loadFromFile(const char* fileName, const char* name); + /// write parameters to file /// \param outf output file /// \param name name of the output object @@ -211,7 +204,6 @@ class NDPiecewisePolynomials : public FlatObject /// \return returns total number of polynomial fits uint32_t getNPolynomials() const; -#endif /// converts the class to a container which can be written to a root file NDPiecewisePolynomialContainer getContainer() const { return NDPiecewisePolynomialContainer{Dim, Degree, getNParameters(), mParams, InteractionOnly, mMin, mMax, mN}; } @@ -219,10 +211,10 @@ class NDPiecewisePolynomials : public FlatObject /// set the parameters from NDPiecewisePolynomialContainer /// \param container container for the parameters void setFromContainer(const NDPiecewisePolynomialContainer& container); +#endif // !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// \return returns the total number of stored parameters uint32_t getNParameters() const { return getNPolynomials() * MultivariatePolynomialParametersHelper::getNParameters(Degree, Dim, InteractionOnly); } -#endif /// \return returns number of dimensions of the polynomials GPUd() static constexpr uint32_t getDim() { return Dim; } @@ -292,15 +284,15 @@ class NDPiecewisePolynomials : public FlatObject /// \param ix index /// \param dim dimension double getVertexPosition(const uint32_t ix, const int32_t dim) const { return ix / static_cast(mInvSpacing[dim]) + mMin[dim]; } -#endif +#endif // !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) #if !defined(GPUCA_GPUCODE) /// \return returns the size of the parameters std::size_t sizeOfParameters() const { return getNParameters() * sizeof(DataTParams); } +#endif // #if !defined(GPUCA_GPUCODE) // construct the object (flatbuffer) void construct(); -#endif #ifndef GPUCA_ALIROOT_LIB ClassDefNV(NDPiecewisePolynomials, 1); @@ -313,20 +305,6 @@ class NDPiecewisePolynomials : public FlatObject #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) template -void NDPiecewisePolynomials::loadFromFile(TFile& inpf, const char* name) -{ - NDPiecewisePolynomialContainer* gridTmp = nullptr; - inpf.GetObject(name, gridTmp); - if (gridTmp) { - setFromContainer(*gridTmp); - delete gridTmp; - } else { -#ifndef GPUCA_ALIROOT_LIB - LOGP(info, "couldnt load object {} from input file", name); -#endif - } -} -template void NDPiecewisePolynomials::setFromContainer(const NDPiecewisePolynomialContainer& container) { if (Dim != container.mDim) { @@ -350,12 +328,6 @@ void NDPiecewisePolynomials::setFromContainer(cons init(container.mMin.data(), container.mMax.data(), container.mN.data()); setParams(container.mParams.data()); } -template -void NDPiecewisePolynomials::writeToFile(TFile& outf, const char* name) const -{ - const NDPiecewisePolynomialContainer cont = getContainer(); - outf.WriteObject(&cont, name); -} template void NDPiecewisePolynomials::setDefault() @@ -368,7 +340,29 @@ void NDPiecewisePolynomials::setDefault() std::copy(params.begin(), params.end(), &mParams[i * nParamsPerPol]); } } -#endif + +template +uint32_t NDPiecewisePolynomials::getNPolynomials() const +{ + uint32_t nP = getNPolynomials(0); + for (uint32_t i = 1; i < Dim; ++i) { + nP *= getNPolynomials(i); + } + return nP; +} + +template +void NDPiecewisePolynomials::checkPos(const uint32_t iMax[/* Dim */], int32_t pos[/* Dim */]) const +{ + for (uint32_t i = 0; i < Dim; ++i) { + if (pos[i] == int32_t(iMax[i])) { + ++pos[i + 1]; + std::fill_n(pos, i + 1, 0); + } + } +} + +#endif // !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) #ifndef GPUCA_GPUCODE template @@ -405,7 +399,19 @@ void NDPiecewisePolynomials::construct() FlatObject::finishConstruction(flatbufferSize); mParams = reinterpret_cast(mFlatBufferPtr); } -#endif + +template +void NDPiecewisePolynomials::init(const float min[], const float max[], const uint32_t n[]) +{ + for (uint32_t i = 0; i < Dim; ++i) { + mMin[i] = min[i]; + mMax[i] = max[i]; + mN[i] = n[i]; + mInvSpacing[i] = (mN[i] - 1) / (mMax[i] - mMin[i]); + } + construct(); +} +#endif // !GPUCA_GPUCODE template void NDPiecewisePolynomials::destroy() @@ -472,259 +478,6 @@ GPUdi() void NDPiecewisePolynomials::clamp(float x } } -#ifndef GPUCA_GPUCODE -template -void NDPiecewisePolynomials::init(const float min[], const float max[], const uint32_t n[]) -{ - for (uint32_t i = 0; i < Dim; ++i) { - mMin[i] = min[i]; - mMax[i] = max[i]; - mN[i] = n[i]; - mInvSpacing[i] = (mN[i] - 1) / (mMax[i] - mMin[i]); - } - construct(); -} -#endif - -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) -template -uint32_t NDPiecewisePolynomials::getNPolynomials() const -{ - uint32_t nP = getNPolynomials(0); - for (uint32_t i = 1; i < Dim; ++i) { - nP *= getNPolynomials(i); - } - return nP; -} - -template -void NDPiecewisePolynomials::checkPos(const uint32_t iMax[/* Dim */], int32_t pos[/* Dim */]) const -{ - for (uint32_t i = 0; i < Dim; ++i) { - if (pos[i] == int32_t(iMax[i])) { - ++pos[i + 1]; - std::fill_n(pos, i + 1, 0); - } - } -} - -template -void NDPiecewisePolynomials::performFits(const std::function& func, const uint32_t nAuxiliaryPoints[/* Dim */]) -{ - const int32_t nTotalFits = getNPolynomials(); -#ifndef GPUCA_ALIROOT_LIB - LOGP(info, "Perform fitting of {}D-Polynomials of degree {} for a total of {} fits.", Dim, Degree, nTotalFits); -#endif - - MultivariatePolynomialHelper<0, 0, false> pol(Dim, Degree, InteractionOnly); - TLinearFitter fitter = pol.getTLinearFitter(); - - uint32_t nPoints = 1; - for (uint32_t i = 0; i < Dim; ++i) { - nPoints *= nAuxiliaryPoints[i]; - } - - std::vector xCords; - std::vector response; - xCords.reserve(Dim * nPoints); - response.reserve(nPoints); - - uint32_t nPolynomials[Dim]{0}; - for (uint32_t i = 0; i < Dim; ++i) { - nPolynomials[i] = getNPolynomials(i); - } - - int32_t pos[Dim + 1]{0}; - uint32_t counter = 0; - const int32_t printDebugForNFits = int32_t(nTotalFits / 20) + 1; - - for (;;) { - const bool debug = !(++counter % printDebugForNFits); - if (debug) { -#ifndef GPUCA_ALIROOT_LIB - LOGP(info, "Performing fit {} out of {}", counter, nTotalFits); -#endif - } - - checkPos(nPolynomials, pos); - - if (pos[Dim] == 1) { - break; - } - - xCords.clear(); - response.clear(); - fitInnerGrid(func, nAuxiliaryPoints, pos, fitter, xCords, response); - ++pos[0]; - } -} - -template -void NDPiecewisePolynomials::performFits(const std::vector& x, const std::vector& y) -{ - const int32_t nTotalFits = getNPolynomials(); -#ifndef GPUCA_ALIROOT_LIB - LOGP(info, "Perform fitting of {}D-Polynomials of degree {} for a total of {} fits.", Dim, Degree, nTotalFits); -#endif - - // approximate number of points - uint32_t nPoints = 2 * y.size() / nTotalFits; - - // polynomial index -> indices to datapoints - std::unordered_map> dataPointsIndices; - for (int32_t i = 0; i < nTotalFits; ++i) { - dataPointsIndices[i].reserve(nPoints); - } - - // check for each data point which polynomial to use - for (size_t i = 0; i < y.size(); ++i) { - std::array index; - float xVal[Dim]; - std::copy(x.begin() + i * Dim, x.begin() + i * Dim + Dim, xVal); - setIndex(xVal, index.data()); - - std::array indexClamped{index}; - clamp(xVal, indexClamped.data()); - - // check if data points are in the grid - if (index == indexClamped) { - // index of the polyniomial - const uint32_t idx = getDataIndex(index.data()) / MultivariatePolynomialParametersHelper::getNParameters(Degree, Dim, InteractionOnly); - - // store index to data point - dataPointsIndices[idx].emplace_back(i); - } - } - - // for fitting - MultivariatePolynomialHelper<0, 0, false> pol(Dim, Degree, InteractionOnly); - TLinearFitter fitter = pol.getTLinearFitter(); - - uint32_t counter = 0; - const int32_t printDebugForNFits = int32_t(nTotalFits / 20) + 1; - - // temp storage for x and y values for fitting - std::vector xCords; - std::vector response; - - for (int32_t i = 0; i < nTotalFits; ++i) { - const bool debug = !(++counter % printDebugForNFits); - if (debug) { -#ifndef GPUCA_ALIROOT_LIB - LOGP(info, "Performing fit {} out of {}", counter, nTotalFits); -#endif - } - - // store values for fitting - if (dataPointsIndices[i].empty()) { -#ifndef GPUCA_ALIROOT_LIB - LOGP(info, "No data points to fit"); -#endif - continue; - } - - const auto nP = dataPointsIndices[i].size(); - xCords.reserve(Dim * nP); - response.reserve(nP); - xCords.clear(); - response.clear(); - - // add datapoints to fit - for (size_t j = 0; j < nP; ++j) { - const size_t idxOrig = dataPointsIndices[i][j]; - - // insert x values at the end of xCords - const int32_t idxXStart = idxOrig * Dim; - xCords.insert(xCords.end(), x.begin() + idxXStart, x.begin() + idxXStart + Dim); - response.emplace_back(y[idxOrig]); - } - - // perform the fit on the points TODO make errors configurable - std::vector error; - const auto params = MultivariatePolynomialHelper<0, 0, false>::fit(fitter, xCords, response, error, true); - - // store parameters - std::copy(params.begin(), params.end(), &mParams[i * MultivariatePolynomialParametersHelper::getNParameters(Degree, Dim, InteractionOnly)]); - } -} - -template -void NDPiecewisePolynomials::fitInnerGrid(const std::function& func, const uint32_t nAuxiliaryPoints[/* Dim */], const int32_t currentIndex[/* Dim */], TLinearFitter& fitter, std::vector& xCords, std::vector& response) -{ - int32_t pos[Dim + 1]{0}; - - // add points which will be used for the fit - for (;;) { - checkPos(nAuxiliaryPoints, pos); - - if (pos[Dim] == 1) { - break; - } - - for (uint32_t iDim = 0; iDim < Dim; ++iDim) { - const double stepWidth = getStepWidth(iDim, nAuxiliaryPoints[iDim]); - const double vertexPos = getVertexPosition(currentIndex[iDim], iDim); - const double realPosTmp = vertexPos + pos[iDim] * stepWidth; - xCords.emplace_back(realPosTmp); - } - - // get response for last added points - const double responseTmp = func(&xCords[xCords.size() - Dim]); - response.emplace_back(responseTmp); - ++pos[0]; - } - - // perform the fit on the points TODO make errors configurable - std::vector error; - const auto params = MultivariatePolynomialHelper<0, 0, false>::fit(fitter, xCords, response, error, true); - - // store parameters - const uint32_t index = getDataIndex(currentIndex); - std::copy(params.begin(), params.end(), &mParams[index]); -} - -#ifndef GPUCA_ALIROOT_LIB -template -void NDPiecewisePolynomials::dumpToTree(const uint32_t nSamplingPoints[/* Dim */], const char* outName, const char* treeName, const bool recreateFile) const -{ - o2::utils::TreeStreamRedirector pcstream(outName, recreateFile ? "RECREATE" : "UPDATE"); - - double factor[Dim]{}; - for (uint32_t iDim = 0; iDim < Dim; ++iDim) { - factor[iDim] = (mMax[iDim] - mMin[iDim]) / (nSamplingPoints[iDim] - 1); - } - - std::vector x(Dim); - std::vector ix(Dim); - int32_t pos[Dim + 1]{0}; - - for (;;) { - checkPos(nSamplingPoints, pos); - - if (pos[Dim] == 1) { - break; - } - - for (uint32_t iDim = 0; iDim < Dim; ++iDim) { - ix[iDim] = pos[iDim]; - x[iDim] = mMin[iDim] + pos[iDim] * factor[iDim]; - } - - float value = eval(x.data()); - pcstream << treeName - << "ix=" << ix - << "x=" << x - << "value=" << value - << "\n"; - - ++pos[0]; - } - pcstream.Close(); -} -#endif - -#endif - } // namespace GPUCA_NAMESPACE::gpu #endif diff --git a/GPU/TPCFastTransformation/NDPiecewisePolynomials.inc b/GPU/TPCFastTransformation/NDPiecewisePolynomials.inc new file mode 100644 index 0000000000000..d7bb9d702e96f --- /dev/null +++ b/GPU/TPCFastTransformation/NDPiecewisePolynomials.inc @@ -0,0 +1,276 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file NDPiecewisePolynomials.inc +/// \author Matthias Kleiner + +#ifndef ALICEO2_TPC_NDPIECEWISEPOLYNOMIALS_INC +#define ALICEO2_TPC_NDPIECEWISEPOLYNOMIALS_INC + +#include +#include +#include "CommonUtils/TreeStreamRedirector.h" +#include "NDPiecewisePolynomials.h" + +namespace GPUCA_NAMESPACE::gpu +{ + +#ifndef GPUCA_ALIROOT_LIB +template +void NDPiecewisePolynomials::dumpToTree(const uint32_t nSamplingPoints[/* Dim */], const char* outName, const char* treeName, const bool recreateFile) const +{ + o2::utils::TreeStreamRedirector pcstream(outName, recreateFile ? "RECREATE" : "UPDATE"); + + double factor[Dim]{}; + for (uint32_t iDim = 0; iDim < Dim; ++iDim) { + factor[iDim] = (mMax[iDim] - mMin[iDim]) / (nSamplingPoints[iDim] - 1); + } + + std::vector x(Dim); + std::vector ix(Dim); + int32_t pos[Dim + 1]{0}; + + for (;;) { + checkPos(nSamplingPoints, pos); + + if (pos[Dim] == 1) { + break; + } + + for (uint32_t iDim = 0; iDim < Dim; ++iDim) { + ix[iDim] = pos[iDim]; + x[iDim] = mMin[iDim] + pos[iDim] * factor[iDim]; + } + + float value = eval(x.data()); + pcstream << treeName + << "ix=" << ix + << "x=" << x + << "value=" << value + << "\n"; + + ++pos[0]; + } + pcstream.Close(); +} +#endif // GPUCA_ALIROOT_LIB + +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) + +template +void NDPiecewisePolynomials::loadFromFile(TFile& inpf, const char* name) +{ + NDPiecewisePolynomialContainer* gridTmp = nullptr; + inpf.GetObject(name, gridTmp); + if (gridTmp) { + setFromContainer(*gridTmp); + delete gridTmp; + } else { +#ifndef GPUCA_ALIROOT_LIB + LOGP(info, "couldnt load object {} from input file", name); +#endif + } +} + +template +void NDPiecewisePolynomials::loadFromFile(const char* fileName, const char* name) +{ + TFile f(fileName, "READ"); + loadFromFile(f, name); +} + +template +void NDPiecewisePolynomials::writeToFile(TFile& outf, const char* name) const +{ + const NDPiecewisePolynomialContainer cont = getContainer(); + outf.WriteObject(&cont, name); +} + +template +void NDPiecewisePolynomials::performFits(const std::function& func, const uint32_t nAuxiliaryPoints[/* Dim */]) +{ + const int32_t nTotalFits = getNPolynomials(); +#ifndef GPUCA_ALIROOT_LIB + LOGP(info, "Perform fitting of {}D-Polynomials of degree {} for a total of {} fits.", Dim, Degree, nTotalFits); +#endif + + MultivariatePolynomialHelper<0, 0, false> pol(Dim, Degree, InteractionOnly); + TLinearFitter fitter = pol.getTLinearFitter(); + + uint32_t nPoints = 1; + for (uint32_t i = 0; i < Dim; ++i) { + nPoints *= nAuxiliaryPoints[i]; + } + + std::vector xCords; + std::vector response; + xCords.reserve(Dim * nPoints); + response.reserve(nPoints); + + uint32_t nPolynomials[Dim]{0}; + for (uint32_t i = 0; i < Dim; ++i) { + nPolynomials[i] = getNPolynomials(i); + } + + int32_t pos[Dim + 1]{0}; + uint32_t counter = 0; + const int32_t printDebugForNFits = int32_t(nTotalFits / 20) + 1; + + for (;;) { + const bool debug = !(++counter % printDebugForNFits); + if (debug) { +#ifndef GPUCA_ALIROOT_LIB + LOGP(info, "Performing fit {} out of {}", counter, nTotalFits); +#endif + } + + checkPos(nPolynomials, pos); + + if (pos[Dim] == 1) { + break; + } + + xCords.clear(); + response.clear(); + fitInnerGrid(func, nAuxiliaryPoints, pos, fitter, xCords, response); + ++pos[0]; + } +} + +template +void NDPiecewisePolynomials::performFits(const std::vector& x, const std::vector& y) +{ + const int32_t nTotalFits = getNPolynomials(); +#ifndef GPUCA_ALIROOT_LIB + LOGP(info, "Perform fitting of {}D-Polynomials of degree {} for a total of {} fits.", Dim, Degree, nTotalFits); +#endif + + // approximate number of points + uint32_t nPoints = 2 * y.size() / nTotalFits; + + // polynomial index -> indices to datapoints + std::unordered_map> dataPointsIndices; + for (int32_t i = 0; i < nTotalFits; ++i) { + dataPointsIndices[i].reserve(nPoints); + } + + // check for each data point which polynomial to use + for (size_t i = 0; i < y.size(); ++i) { + std::array index; + float xVal[Dim]; + std::copy(x.begin() + i * Dim, x.begin() + i * Dim + Dim, xVal); + setIndex(xVal, index.data()); + + std::array indexClamped{index}; + clamp(xVal, indexClamped.data()); + + // check if data points are in the grid + if (index == indexClamped) { + // index of the polyniomial + const uint32_t idx = getDataIndex(index.data()) / MultivariatePolynomialParametersHelper::getNParameters(Degree, Dim, InteractionOnly); + + // store index to data point + dataPointsIndices[idx].emplace_back(i); + } + } + + // for fitting + MultivariatePolynomialHelper<0, 0, false> pol(Dim, Degree, InteractionOnly); + TLinearFitter fitter = pol.getTLinearFitter(); + + uint32_t counter = 0; + const int32_t printDebugForNFits = int32_t(nTotalFits / 20) + 1; + + // temp storage for x and y values for fitting + std::vector xCords; + std::vector response; + + for (int32_t i = 0; i < nTotalFits; ++i) { + const bool debug = !(++counter % printDebugForNFits); + if (debug) { +#ifndef GPUCA_ALIROOT_LIB + LOGP(info, "Performing fit {} out of {}", counter, nTotalFits); +#endif + } + + // store values for fitting + if (dataPointsIndices[i].empty()) { +#ifndef GPUCA_ALIROOT_LIB + LOGP(info, "No data points to fit"); +#endif + continue; + } + + const auto nP = dataPointsIndices[i].size(); + xCords.reserve(Dim * nP); + response.reserve(nP); + xCords.clear(); + response.clear(); + + // add datapoints to fit + for (size_t j = 0; j < nP; ++j) { + const size_t idxOrig = dataPointsIndices[i][j]; + + // insert x values at the end of xCords + const int32_t idxXStart = idxOrig * Dim; + xCords.insert(xCords.end(), x.begin() + idxXStart, x.begin() + idxXStart + Dim); + response.emplace_back(y[idxOrig]); + } + + // perform the fit on the points TODO make errors configurable + std::vector error; + const auto params = MultivariatePolynomialHelper<0, 0, false>::fit(fitter, xCords, response, error, true); + + // store parameters + std::copy(params.begin(), params.end(), &mParams[i * MultivariatePolynomialParametersHelper::getNParameters(Degree, Dim, InteractionOnly)]); + } +} + +template +void NDPiecewisePolynomials::fitInnerGrid(const std::function& func, const uint32_t nAuxiliaryPoints[/* Dim */], const int32_t currentIndex[/* Dim */], TLinearFitter& fitter, std::vector& xCords, std::vector& response) +{ + int32_t pos[Dim + 1]{0}; + + // add points which will be used for the fit + for (;;) { + checkPos(nAuxiliaryPoints, pos); + + if (pos[Dim] == 1) { + break; + } + + for (uint32_t iDim = 0; iDim < Dim; ++iDim) { + const double stepWidth = getStepWidth(iDim, nAuxiliaryPoints[iDim]); + const double vertexPos = getVertexPosition(currentIndex[iDim], iDim); + const double realPosTmp = vertexPos + pos[iDim] * stepWidth; + xCords.emplace_back(realPosTmp); + } + + // get response for last added points + const double responseTmp = func(&xCords[xCords.size() - Dim]); + response.emplace_back(responseTmp); + ++pos[0]; + } + + // perform the fit on the points TODO make errors configurable + std::vector error; + const auto params = MultivariatePolynomialHelper<0, 0, false>::fit(fitter, xCords, response, error, true); + + // store parameters + const uint32_t index = getDataIndex(currentIndex); + std::copy(params.begin(), params.end(), &mParams[index]); +} + +} // namespace GPUCA_NAMESPACE::gpu + +#endif // !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) + +#endif // ALICEO2_TPC_NDPIECEWISEPOLYNOMIALS_INC diff --git a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx index c3373cdad63f0..a9c39e8528354 100644 --- a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx +++ b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx @@ -18,7 +18,7 @@ #include #include "MultivariatePolynomial.h" -#include "NDPiecewisePolynomials.h" +#include "NDPiecewisePolynomials.inc" #include namespace o2::gpu From 2541e978c1d3575c0d52e482f82a4596f7fbbd0c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 11 Nov 2024 14:12:16 +0100 Subject: [PATCH 08/11] GPU: Some protection so we get a compiler warning when headers are included in wrong order --- GPU/Common/GPUCommonRtypes.h | 4 ++-- GPU/Common/GPUROOTSMatrixFwd.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/Common/GPUCommonRtypes.h b/GPU/Common/GPUCommonRtypes.h index 5ae2ddbb83b26..7aaf5a36befe2 100644 --- a/GPU/Common/GPUCommonRtypes.h +++ b/GPU/Common/GPUCommonRtypes.h @@ -20,14 +20,14 @@ #if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined(GPUCA_GPUCODE) // clang-format off #if !defined(ROOT_Rtypes) && !defined(__CLING__) #define GPUCOMMONRTYPES_H_ACTIVE + struct MUST_NOT_USE_Rtypes_h {}; + typedef MUST_NOT_USE_Rtypes_h TClass; #define ClassDef(name,id) #define ClassDefNV(name, id) #define ClassDefOverride(name, id) #define ClassImp(name) #define templateClassImp(name) #ifndef GPUCA_GPUCODE_DEVICE -// typedef uint64_t ULong64_t; -// typedef uint32_t UInt_t; #include #endif #endif diff --git a/GPU/Common/GPUROOTSMatrixFwd.h b/GPU/Common/GPUROOTSMatrixFwd.h index a3b5abc55d3bc..44b2254949df2 100644 --- a/GPU/Common/GPUROOTSMatrixFwd.h +++ b/GPU/Common/GPUROOTSMatrixFwd.h @@ -52,7 +52,7 @@ template class MatRepStdGPU; } // namespace detail -#if !defined(GPUCA_STANDALONE) && !defined(GPUCA_GPUCODE) +#if !defined(GPUCA_STANDALONE) && !defined(GPUCA_GPUCODE) && !defined(GPUCOMMONRTYPES_H_ACTIVE) template using SVector = ROOT::Math::SVector; template From 3b160b5c28219901e453bddea4e725b56eb14e49 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 13 Nov 2024 19:37:19 +0100 Subject: [PATCH 09/11] GPU: Workaround for OpenCL --- Common/MathUtils/include/MathUtils/SMatrixGPU.h | 2 ++ GPU/GPUTracking/dEdx/GPUdEdx.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 2bfdcf54752b2..5ecdcd75a9906 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -516,12 +516,14 @@ class SMatrixGPU R mRep; }; +#ifndef __OPENCL__ // TODO: current C++ for OpenCL 2021 is at C++17, so no concepts. But we don't need this trick for OpenCL anyway, so we can just hide it. template requires(sizeof(typename X::traits_type::pos_type) != 0) // do not provide a template to fair::Logger, etc... (pos_type is a member type of all std::ostream classes) GPUd() X& operator<<(Y& y, const SMatrixGPU&) { return y; } +#endif template GPUdi() SMatrixGPU::SMatrixGPU(SMatrixIdentity) diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 9a1784e2be49a..516d1fced0a20 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -212,7 +212,7 @@ GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestri mNSubThresh++; } -#endif // !GPUCA_HAVE_O2HEADERS || __OPENCL1__ +#endif // !GPUCA_HAVE_O2HEADERS || GPUCA_OPENCL1 } // namespace gpu } // namespace GPUCA_NAMESPACE From 4dd8d1dd307374308057516f6b7fb0d14f58b3e3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 13 Nov 2024 20:05:36 +0100 Subject: [PATCH 10/11] GPU: Simplify __OPENCL__ macros using __OPENCL1__ --- GPU/Common/GPUCommonConstants.h | 2 +- GPU/Common/GPUCommonDef.h | 4 ++-- GPU/Common/GPUCommonMath.h | 10 +++++----- GPU/Common/GPUCommonTypeTraits.h | 2 +- GPU/GPUTracking/Base/GPUParam.inc | 4 ++-- .../Base/opencl-common/GPUReconstructionOCL.cl | 5 +++++ GPU/GPUTracking/Base/opencl2/CMakeLists.txt | 2 -- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 2 +- GPU/GPUTracking/DataTypes/GPUO2DataTypes.h | 4 ++-- GPU/GPUTracking/DataTypes/GPUSettings.h | 2 +- .../DataTypes/GPUTPCGMPolynomialField.h | 4 ++-- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 10 +++++----- .../Definitions/GPUDefConstantsAndSettings.h | 2 +- .../SliceTracker/GPUTPCGlobalTracking.cxx | 4 ++-- .../SliceTracker/GPUTPCGlobalTracking.h | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCTracker.h | 2 +- .../SliceTracker/GPUTPCTrackletConstructor.cxx | 16 ++++++++-------- .../SliceTracker/GPUTPCTrackletConstructor.h | 2 +- 20 files changed, 43 insertions(+), 40 deletions(-) diff --git a/GPU/Common/GPUCommonConstants.h b/GPU/Common/GPUCommonConstants.h index 5744c078dc197..883f64b7bdd12 100644 --- a/GPU/Common/GPUCommonConstants.h +++ b/GPU/Common/GPUCommonConstants.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) namespace GPUCA_NAMESPACE::gpu::gpu_common_constants { static CONSTEXPR const float kCLight = 0.000299792458f; diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index a8bf772d7aacc..ac3d7279fbaf4 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -30,7 +30,7 @@ //Some GPU configuration settings, must be included first #include "GPUCommonDefSettings.h" -#if (!defined(__OPENCL__) || defined(__OPENCLCPP__)) && (!(defined(__CINT__) || defined(__ROOTCINT__)) || defined(__CLING__)) && defined(__cplusplus) && __cplusplus >= 201103L +#if !defined(__OPENCL1__) && (!(defined(__CINT__) || defined(__ROOTCINT__)) || defined(__CLING__)) && defined(__cplusplus) && __cplusplus >= 201103L #define GPUCA_NOCOMPAT // C++11 + No old ROOT5 + No old OpenCL #ifndef __OPENCL__ #define GPUCA_NOCOMPAT_ALLOPENCL // + No OpenCL at all @@ -82,7 +82,7 @@ #define GPUCA_NAMESPACE o2 #endif -#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && !defined(__OPENCLCPP__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) || (defined(__OPENCLCPP__) && defined(GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY)) +#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL1__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) || (defined(__OPENCLCPP__) && defined(GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY)) #define GPUCA_NO_CONSTANT_MEMORY #elif defined(__CUDACC__) || defined(__HIPCC__) #define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 8b129ff29a987..bc842d00c6568 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -31,7 +31,7 @@ #include #endif -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) namespace GPUCA_NAMESPACE { namespace gpu @@ -220,7 +220,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) { #if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) return __float_as_uint(x); -#elif defined(GPUCA_GPUCODE_DEVICE) && (defined(__OPENCL__) || defined(__OPENCLCPP__)) +#elif defined(GPUCA_GPUCODE_DEVICE) && defined(__OPENCL__) return as_uint(x); #else return reinterpret_cast(x); @@ -289,7 +289,7 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) { -#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) || defined(__OPENCLCPP__)) +#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL1__) return x == 0 ? 32 : CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available #else for (int32_t i = 31; i >= 0; i--) { @@ -303,7 +303,7 @@ GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) { -#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) /*|| defined(__OPENCLCPP__)*/) // TODO: remove OPENCLCPP workaround when reported SPIR-V bug is fixed +#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) /* !defined(__OPENCL1__)*/) // TODO: exclude only OPENCLC (not CPP) when reported SPIR-V bug is fixed // use builtin if available return CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x)); #else @@ -563,7 +563,7 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt #undef CHOICE -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) } } #endif diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index 2ae524f8d1c76..88fcc9b838a65 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -21,7 +21,7 @@ #ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif -#elif !defined(__OPENCL__) || defined(__OPENCLCPP__) +#elif !defined(__OPENCL1__) // We just reimplement some type traits in std for the GPU namespace std { diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index c7c526471d505..41ed3c8f203cb 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -17,7 +17,7 @@ #include "GPUParam.h" #include "GPUTPCGMMergedTrackHit.h" -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) #include "GPUTPCClusterOccupancyMap.h" #endif @@ -228,7 +228,7 @@ GPUdi() void MEM_LG(GPUParam)::UpdateClusterError2ByState(int16_t clusterState, MEM_CLASS_PRE() GPUdi() float MEM_LG(GPUParam)::GetUnscaledMult(float time) const { -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) if (!occupancyMap) { return 0.f; } diff --git a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl index 42a640579e9e3..672c4b63eb476 100644 --- a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl @@ -14,6 +14,11 @@ // clang-format off #define __OPENCL__ +#if defined(__cplusplus) && __cplusplus >= 201703L + #define __OPENCLCPP__ +#else + #define __OPENCL1__ +#endif #define GPUCA_GPUTYPE_OPENCL #ifdef __OPENCLCPP__ diff --git a/GPU/GPUTracking/Base/opencl2/CMakeLists.txt b/GPU/GPUTracking/Base/opencl2/CMakeLists.txt index ec2a4446142c8..0a4168b130766 100644 --- a/GPU/GPUTracking/Base/opencl2/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl2/CMakeLists.txt @@ -32,8 +32,6 @@ set(OCL_DEFINECL "-D$ GPUdii() void GPUTPCGlobalTrackingCopyNumbers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() tracker, int32_t n) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h index 075957ff4c8c8..9d732a582b1c4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h @@ -25,7 +25,7 @@ namespace gpu MEM_CLASS_PRE() class GPUTPCTracker; -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) class GPUTPCGlobalTracking : public GPUKernelTemplate { public: diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h index 8892225f119cd..3ab5b0a331f31 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h @@ -44,7 +44,7 @@ class GPUTPCSliceOutput } GPUhd() uint32_t NLocalTracks() const { return mNLocalTracks; } GPUhd() uint32_t NTrackClusters() const { return mNTrackClusters; } -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) GPUhd() const GPUTPCTrack* GetFirstTrack() const { return (const GPUTPCTrack*)((const char*)this + sizeof(*this)); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx index 552d61a88fc39..7428a4ccbd0ed 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx @@ -22,7 +22,7 @@ #include "GPUO2DataTypes.h" #include "GPUTPCTrackParam.h" #include "GPUParam.inc" -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) #include "GPUTPCConvertImpl.h" #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h index f19b4f0a6c0a7..da8d3d1fb28d4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h @@ -94,7 +94,7 @@ class GPUTPCTracker : public GPUProcessor StructGPUParameters gpuParameters; // GPU parameters }; -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const { return mData.ClusterData(); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx index 9d6ed630dee8c..ba17b88436845 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx @@ -21,7 +21,7 @@ #include "GPUTPCTracker.h" #include "GPUTPCTracklet.h" #include "GPUTPCTrackletConstructor.h" -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) #include "GPUTPCGlobalTracking.h" #include "CorrectionMapsHelper.h" #ifdef GPUCA_HAVE_O2HEADERS @@ -140,14 +140,14 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, float z = z0 + hh.y * stepZ; if (iRow != r.mStartRow || !tracker.Param().par.continuousTracking) { tParam.ConstrainZ(z, tracker.ISlice(), z0, r.mLastZ); -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, z); #endif } if (iRow == r.mStartRow) { if (tracker.Param().par.continuousTracking) { float refZ = ((z > 0) ? tracker.Param().rec.tpc.defaultZOffsetOverR : -tracker.Param().rec.tpc.defaultZOffsetOverR) * x; -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) float zTmp = refZ; tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, zTmp); z += zTmp - refZ; // Add zCorrection (=zTmp - refZ) to z, such that zOffset is set such, that transformed (z - zOffset) becomes refZ @@ -266,7 +266,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, r.mNMissed++; float x = row.X(); -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) { float tmpY, tmpZ; if (!tParam.GetPropagatedYZ(tracker.Param().bzCLight, x, tmpY, tmpZ)) { @@ -299,7 +299,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); #endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISlice(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); #endif @@ -391,7 +391,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } } while (false); (void)found; -#if defined(GPUCA_HAVE_O2HEADERS) && (!defined(__OPENCL__) || defined(__OPENCLCPP__)) +#if defined(GPUCA_HAVE_O2HEADERS) && !defined(__OPENCL1__) if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISlice(), iRow, yUncorrected)); if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISlice(), iRow, pad)) { @@ -461,7 +461,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLO iRow = r.mEndRow; iRowEnd = -1; float x = tracker.Row(r.mEndRow).X(); -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) { float tmpY, tmpZ; if (tParam.GetPropagatedYZ(tracker.Param().bzCLight, x, tmpY, tmpZ)) { @@ -584,7 +584,7 @@ GPUd() int32_t GPUTPCTrackletConstructor::FetchTracklet(GPUconstantref() MEM_GLO #endif // GPUCA_GPUCODE -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) template <> GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, MEM_LG(GPUTPCTrackParam) & GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h index 06dd941ca5cf7..effee4fa757b8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h @@ -100,7 +100,7 @@ class GPUTPCTrackletConstructor GPUd() static int32_t FetchTracklet(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem); #endif // GPUCA_GPUCODE -#if !defined(__OPENCL__) || defined(__OPENCLCPP__) +#if !defined(__OPENCL1__) template GPUd() static int32_t GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); #endif From 6c81c31a8ec56a064414f646580a547d9158a6be Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 13 Nov 2024 09:35:07 +0100 Subject: [PATCH 11/11] GPU Display: make connecting A and C side segments of a track optional --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 3 +- GPU/GPUTracking/display/GPUDisplay.cxx | 1 + GPU/GPUTracking/display/GPUDisplay.h | 1 + .../display/frontend/GPUDisplayKeys.cxx | 7 ++-- .../display/helpers/GPUDisplayHelpers.cxx | 7 ++++ .../display/render/GPUDisplayDraw.cxx | 33 +++++++++++-------- 6 files changed, 36 insertions(+), 16 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 106a222862f49..c4e0dadb87659 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -346,7 +346,8 @@ AddOption(drawTracksAndFilter, bool, false, "", 0, "Use AND filter instead of OR AddOption(propagateLoopers, bool, false, "", 0, "Enabale propagation of loopers") AddOption(clustersOnly, bool, false, "", 0, "Visualize clusters only") AddOption(clustersOnNominalRow, bool, false, "", 0, "Show clusters at nominal x of pad row for early-transformed data") -AddOption(separateGlobalTracks, bool, false, "", 0, "Separate global tracks") +AddOption(separateGlobalTracks, bool, false, "", 0, "Draw track segments propagated to adjacent sectors separately") +AddOption(splitCETracks, int8_t, -1, "", 0, "Split CE tracks when they cross the central electrode (-1 = for triggered data)") AddOption(markClusters, int32_t, 0, "", 0, "Mark clusters") AddOption(markFakeClusters, int32_t, 0, "", 0, "Mark fake clusters") AddOption(markAdjacentClusters, int32_t, 0, "", 0, "Mark adjacent clusters") diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 74d89fbf6de81..56e59d664491a 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -611,6 +611,7 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) bool showTimer = false; bool doScreenshot = (mRequestScreenshot || mAnimateScreenshot) && animateTime < 0; + updateOptions(); if (animateTime < 0 && (mUpdateEventData || mResetScene || mUpdateVertexLists) && mIOPtrs) { disableUnsupportedOptions(); } diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 38dacae60c51a..ab6fe540d01bf 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -150,6 +150,7 @@ class GPUDisplay : public GPUDisplayInterface void DrawGLScene_drawCommands(); int32_t InitDisplay_internal(); int32_t getNumThreads(); + void updateOptions(); void disableUnsupportedOptions(); int32_t buildTrackFilter(); const GPUTPCTracker& sliceTracker(int32_t iSlice); diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index 1842c276a580c..8dccdc60c0d93 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -35,7 +35,7 @@ const char* HelpText[] = { "[L] / [K] Draw single collisions (next / previous)", "[C] Colorcode clusters of different collisions", "[v] Hide rejected clusters from tracks", - "[j] Show global tracks as additional segments of final tracks", + "[j] Show tracks segments propagated to adjacent sector in different color / splt CE tracks", "[u] Cycle through track filter", "[E] / [G] Extrapolate tracks / loopers", "[t] / [T] Take Screenshot / Record Animation to pictures", @@ -164,8 +164,11 @@ void GPUDisplay::HandleKey(uint8_t key) mPrintInfoText &= 3; SetInfo("Info text display - console: %s, onscreen %s", (mPrintInfoText & 2) ? "enabled" : "disabled", (mPrintInfoText & 1) ? "enabled" : "disabled"); } else if (key == 'j') { + if (mCfgH.separateGlobalTracks) { + mCfgH.splitCETracks ^= 1; + } mCfgH.separateGlobalTracks ^= 1; - SetInfo("Seperated display of global tracks %s", mCfgH.separateGlobalTracks ? "enabled" : "disabled"); + SetInfo("Seperated display of tracks propagated to adjacent sectors %s / of CE tracks %s", mCfgH.separateGlobalTracks ? "enabled" : "disabled", mCfgH.splitCETracks ? "enabled" : "disabled"); } else if (key == 'c') { if (mCfgH.markClusters == 0) { mCfgH.markClusters = 1; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx index cd73cc0b9b34f..d782898380281 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx @@ -36,6 +36,13 @@ int32_t GPUDisplay::getNumThreads() } } +void GPUDisplay::updateOptions() +{ + if (mCfgH.splitCETracks == -1 && mParam) { + mCfgH.splitCETracks = mParam->continuousMaxTimeBin != 0; + } +} + void GPUDisplay::disableUnsupportedOptions() { if (!mIOPtrs->mergedTrackHitAttachment) { diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 746c41938e2e1..ffebc373b253f 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -45,7 +45,6 @@ using namespace GPUCA_NAMESPACE::gpu; #define GET_CID(slice, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[slice][i].id : (mIOPtrs->clustersNative->clusterOffset[slice][0] + i)) -#define SEPERATE_GLOBAL_TRACKS_LIMIT (mCfgH.separateGlobalTracks ? tGLOBALTRACK : TRACK_TYPE_ID_LIMIT) const GPUTRDGeometry* GPUDisplay::trdGeometry() { return (GPUTRDGeometry*)mCalib->trdGeometry; } const GPUTPCTracker& GPUDisplay::sliceTracker(int32_t iSlice) { return mChain->GetTPCSliceTrackers()[iSlice]; } @@ -421,6 +420,8 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* } // Print TPC part of track + int32_t separateGlobalTracksLimit = (mCfgH.separateGlobalTracks ? tGLOBALTRACK : TRACK_TYPE_ID_LIMIT); + uint32_t lastSide = -1; for (int32_t k = 0; k < nClusters; k++) { if constexpr (std::is_same_v) { if (mCfgH.hideRejectedClusters && (mIOPtrs->mergedTrackHits[track->FirstClusterRef() + k].state & GPUTPCGMMergedTrackHit::flagReject)) { @@ -435,9 +436,15 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* } int32_t w = mGlobalPos[cid].w; if (drawing) { - drawPointLinestrip(iSlice, cid, tFINALTRACK, SEPERATE_GLOBAL_TRACKS_LIMIT); + if (mCfgH.splitCETracks && lastSide != (mGlobalPos[cid].z < 0)) { + insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); + drawing = false; + lastCluster = -1; + } else { + drawPointLinestrip(iSlice, cid, tFINALTRACK, separateGlobalTracksLimit); + } } - if (w == SEPERATE_GLOBAL_TRACKS_LIMIT) { + if (w == separateGlobalTracksLimit) { if (drawing) { insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); } @@ -445,21 +452,21 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* } else { if (!drawing) { startCountInner = mVertexBuffer[iSlice].size(); - } - if (!drawing) { - drawPointLinestrip(iSlice, cid, tFINALTRACK, SEPERATE_GLOBAL_TRACKS_LIMIT); - } - if (!drawing && lastCluster != -1) { - if constexpr (std::is_same_v) { - cid = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + lastCluster].num; - } else { - cid = &track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative) - mIOPtrs->clustersNative->clustersLinear; + if (lastCluster != -1 && (!mCfgH.splitCETracks || lastSide == (mGlobalPos[cid].z < 0))) { + int32_t lastcid; + if constexpr (std::is_same_v) { + lastcid = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + lastCluster].num; + } else { + lastcid = &track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative) - mIOPtrs->clustersNative->clustersLinear; + } + drawPointLinestrip(iSlice, lastcid, tFINALTRACK, separateGlobalTracksLimit); } - drawPointLinestrip(iSlice, cid, 7, SEPERATE_GLOBAL_TRACKS_LIMIT); + drawPointLinestrip(iSlice, cid, tFINALTRACK, separateGlobalTracksLimit); } drawing = true; } lastCluster = k; + lastSide = mGlobalPos[cid].z < 0; } // Print ITS part of track