From 68a86df98900738cf455ca88336bfbb8209b351a Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Mon, 20 Apr 2026 12:01:56 +0200 Subject: [PATCH 1/2] [df] Manage resources created via TFile::Get Fixes https://github.com/root-project/root/issues/21962 --- tree/dataframe/src/RLoopManager.cxx | 41 ++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/tree/dataframe/src/RLoopManager.cxx b/tree/dataframe/src/RLoopManager.cxx index fda9e99c71ec3..ceb9a6a996664 100644 --- a/tree/dataframe/src/RLoopManager.cxx +++ b/tree/dataframe/src/RLoopManager.cxx @@ -211,6 +211,28 @@ auto MakeDatasetColReadersKey(std::string_view colName, const std::type_info &ti // df.Sum("stdVectorBranch"); return std::string(colName) + ':' + ti.name(); } + +/// \brief Check if object of a certain type is in the directory +/// +/// Attempts to read an object of the specified type via TDirectory::Get, wraps +/// it in a std::unique_ptr to avoid leaking the object. +template +bool IsObjectInDir(std::string_view objName, TDirectory &dir) +{ + std::unique_ptr o{dir.Get(objName.data())}; + return o.get(); +} + +/// \brief Check if a generic object is in the directory +/// +/// Checks if a generic object is in the directory, uses TDirectory::GetKey +/// to avoid having to deal with memory management of the object being read +/// without having its type. +template <> +bool IsObjectInDir(std::string_view objName, TDirectory &dir) +{ + return dir.GetKey(objName.data()); +} } // anonymous namespace /** @@ -391,8 +413,8 @@ void RLoopManager::ChangeSpec(ROOT::RDF::Experimental::RDatasetSpec &&spec) fSamples = spec.MoveOutSamples(); fSampleMap.clear(); - const bool isTTree = inFile->Get(datasetName[0].data()); - const bool isRNTuple = inFile->Get(datasetName[0].data()); + const bool isTTree = IsObjectInDir(datasetName[0], *inFile); + const bool isRNTuple = IsObjectInDir(datasetName[0], *inFile); if (isTTree || isRNTuple) { @@ -458,7 +480,8 @@ void RLoopManager::ChangeSpec(ROOT::RDF::Experimental::RDatasetSpec &&spec) v.second.reset(); } } else { - std::string errMsg = inFile->Get(datasetName[0].data()) ? "unsupported data format for" : "cannot find"; + std::string errMsg = + IsObjectInDir(datasetName[0].data(), *inFile) ? "unsupported data format for" : "cannot find"; throw std::invalid_argument("RDataFrame: " + errMsg + " dataset \"" + std::string(datasetName[0]) + "\" in file \"" + inFile->GetName() + "\"."); } @@ -1252,13 +1275,13 @@ ROOT::Detail::RDF::CreateLMFromFile(std::string_view datasetName, std::string_vi auto inFile = OpenFileWithSanityChecks(fileNameGlob); - if (inFile->Get(datasetName.data())) { + if (IsObjectInDir(datasetName, *inFile)) { return CreateLMFromTTree(datasetName, fileNameGlob, defaultColumns, /*checkFile=*/false); - } else if (inFile->Get(datasetName.data())) { + } else if (IsObjectInDir(datasetName, *inFile)) { return CreateLMFromRNTuple(datasetName, fileNameGlob, defaultColumns); } - std::string errMsg = inFile->Get(datasetName.data()) ? "unsupported data format for" : "cannot find"; + std::string errMsg = IsObjectInDir(datasetName, *inFile) ? "unsupported data format for" : "cannot find"; throw std::invalid_argument("RDataFrame: " + errMsg + " dataset \"" + std::string(datasetName) + "\" in file \"" + inFile->GetName() + "\"."); @@ -1274,13 +1297,13 @@ ROOT::Detail::RDF::CreateLMFromFile(std::string_view datasetName, const std::vec auto inFile = OpenFileWithSanityChecks(fileNameGlobs[0]); - if (inFile->Get(datasetName.data())) { + if (IsObjectInDir(datasetName, *inFile)) { return CreateLMFromTTree(datasetName, fileNameGlobs, defaultColumns, /*checkFile=*/false); - } else if (inFile->Get(datasetName.data())) { + } else if (IsObjectInDir(datasetName, *inFile)) { return CreateLMFromRNTuple(datasetName, fileNameGlobs, defaultColumns); } - std::string errMsg = inFile->Get(datasetName.data()) ? "unsupported data format for" : "cannot find"; + std::string errMsg = IsObjectInDir(datasetName, *inFile) ? "unsupported data format for" : "cannot find"; throw std::invalid_argument("RDataFrame: " + errMsg + " dataset \"" + std::string(datasetName) + "\" in file \"" + inFile->GetName() + "\"."); From e651291bc64b31349fe3e4becd09185f117c4419 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Tue, 21 Apr 2026 13:23:14 +0200 Subject: [PATCH 2/2] [df] Manage TFile resources in RDFSnapshotHelpers * Simplify logic to ensure output file is ready for snapshot * Avoid unnecessarily calling TFile::Get and prefer TFile::GetKey to check for presence of object --- tree/dataframe/src/RDFSnapshotHelpers.cxx | 93 +++++-------------- tree/dataframe/test/dataframe_snapshot.cxx | 8 +- .../test/dataframe_snapshot_ntuple.cxx | 4 +- 3 files changed, 29 insertions(+), 76 deletions(-) diff --git a/tree/dataframe/src/RDFSnapshotHelpers.cxx b/tree/dataframe/src/RDFSnapshotHelpers.cxx index ce5c339a0511e..bd94cc5fc11f1 100644 --- a/tree/dataframe/src/RDFSnapshotHelpers.cxx +++ b/tree/dataframe/src/RDFSnapshotHelpers.cxx @@ -209,88 +209,40 @@ void CreateFundamentalTypeBranch(TTree &outputTree, RBranchData &bd, void *value bd.fOutputBranch = outputTree.Branch(bd.fOutputBranchName.c_str(), valueAddress, leafList.c_str(), bufSize); } -/// Ensure that the TTree with the resulting snapshot can be written to the target TFile. This means checking that the -/// TFile can be opened in the mode specified in `opts`, deleting any existing TTrees in case +/// Ensure that an object with the input name can be written to the target file. This means checking that the +/// TFile can be opened in the mode specified in `opts`, deleting any pre-existing objects with the same name in case /// `opts.fOverwriteIfExists = true`, or throwing an error otherwise. -void EnsureValidSnapshotTTreeOutput(const ROOT::RDF::RSnapshotOptions &opts, const std::string &treeName, - const std::string &fileName) +void EnsureValidSnapshotOutput(const ROOT::RDF::RSnapshotOptions &opts, const std::string &objName, + const std::string &fileName) { TString fileMode = opts.fMode; fileMode.ToLower(); if (fileMode != "update") return; - // output file opened in "update" mode: must check whether output TTree is already present in file + // output file opened in "update" mode: must check whether target object name is already present in file std::unique_ptr outFile{TFile::Open(fileName.c_str(), "update")}; if (!outFile || outFile->IsZombie()) throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode"); - TObject *outTree = outFile->Get(treeName.c_str()); - if (outTree == nullptr) + // Object is not present in the file, we are good + if (!outFile->GetKey(objName.c_str())) return; - // object called treeName is already present in the file + // object called objName is already present in the file if (opts.fOverwriteIfExists) { - if (outTree->InheritsFrom("TTree")) { - static_cast(outTree)->Delete("all"); + if (auto existingTree = outFile->Get(objName.c_str()); existingTree) { + // Special case for TTree. TTree::Delete invalidates the 'this' pointer, so we don't wrap it in a + // std::unique_ptr. + existingTree->Delete("all"); } else { - outFile->Delete(treeName.c_str()); + // Ensure deletion of object and all its cycles. + outFile->Delete((objName + ";*").c_str()); } } else { - const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName + - "\". If you want to delete the original tree and write another, please set " - "RSnapshotOptions::fOverwriteIfExists to true."; - throw std::invalid_argument(msg); - } -} - -/// Ensure that the RNTuple with the resulting snapshot can be written to the target TFile. This means checking that the -/// TFile can be opened in the mode specified in `opts`, deleting any existing RNTuples in case -/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise. -void EnsureValidSnapshotRNTupleOutput(const ROOT::RDF::RSnapshotOptions &opts, const std::string &ntupleName, - const std::string &fileName) -{ - TString fileMode = opts.fMode; - fileMode.ToLower(); - if (fileMode != "update") - return; - - // output file opened in "update" mode: must check whether output RNTuple is already present in file - std::unique_ptr outFile{TFile::Open(fileName.c_str(), "update")}; - if (!outFile || outFile->IsZombie()) - throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode"); - - auto *outNTuple = outFile->Get(ntupleName.c_str()); - - if (outNTuple) { - if (opts.fOverwriteIfExists) { - outFile->Delete((ntupleName + ";*").c_str()); - return; - } else { - const std::string msg = "Snapshot: RNTuple \"" + ntupleName + "\" already present in file \"" + fileName + - "\". If you want to delete the original ntuple and write another, please set " - "the 'fOverwriteIfExists' option to true in RSnapshotOptions."; - throw std::invalid_argument(msg); - } - } - - // Also check if there is any object other than an RNTuple with the provided ntupleName. - TObject *outObj = outFile->Get(ntupleName.c_str()); - - if (!outObj) - return; - - // An object called ntupleName is already present in the file. - if (opts.fOverwriteIfExists) { - if (auto tree = dynamic_cast(outObj)) { - tree->Delete("all"); - } else { - outFile->Delete((ntupleName + ";*").c_str()); - } - } else { - const std::string msg = "Snapshot: object \"" + ntupleName + "\" already present in file \"" + fileName + - "\". If you want to delete the original object and write a new RNTuple, please set " - "the 'fOverwriteIfExists' option to true in RSnapshotOptions."; + const std::string msg = "Snapshot: object \"" + objName + "\" already present in file \"" + fileName + + "\". If you want to delete the original object and write another, please set the " + "'fOverwriteIfExists' option to true in RSnapshotOptions."; throw std::invalid_argument(msg); } } @@ -441,7 +393,7 @@ ROOT::Internal::RDF::UntypedSnapshotTTreeHelper::UntypedSnapshotTTreeHelper( fOutputLoopManager(loopManager), fInputLoopManager(inputLM) { - EnsureValidSnapshotTTreeOutput(fOptions, fTreeName, fFileName); + EnsureValidSnapshotOutput(fOptions, fTreeName, fFileName); auto outputBranchNames = ReplaceDotWithUnderscore(bnames); fBranchData.reserve(vbnames.size()); @@ -634,7 +586,7 @@ ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::UntypedSnapshotTTreeHelperMT( fOutputLoopManager(loopManager), fInputLoopManager(inputLM) { - EnsureValidSnapshotTTreeOutput(fOptions, fTreeName, fFileName); + EnsureValidSnapshotOutput(fOptions, fTreeName, fFileName); auto outputBranchNames = ReplaceDotWithUnderscore(bnames); fBranchData.reserve(fNSlots); @@ -797,7 +749,8 @@ void ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::Finalize() // filtering), create an empty TTree in the output file and create the branches to preserve the schema auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName; assert(fOutputFile && "Missing output file in Snapshot finalization."); - if (!fOutputFile->Get(fullTreeName.c_str())) { + // Use GetKey to avoid having to deal with memory management of the object in the file + if (!fOutputFile->GetKey(fullTreeName.c_str())) { // First find in which directory we need to write the output TTree TDirectory *treeDirectory = fOutputFile; @@ -884,7 +837,7 @@ ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::UntypedSnapshotRNTupleHelper( fEntries(nSlots), fInputColumnTypeIDs(colTypeIDs) { - EnsureValidSnapshotRNTupleOutput(fOptions, fNTupleName, fFileName); + EnsureValidSnapshotOutput(fOptions, fNTupleName, fFileName); } // Define special member methods here where the definition of all the data member types is available @@ -1147,7 +1100,7 @@ ROOT::Internal::RDF::SnapshotHelperWithVariations::SnapshotHelperWithVariations( const std::vector &colTypeIDs) : fOptions(options), fInputLoopManager{inputLoopMgr}, fOutputLoopManager{outputLoopMgr} { - EnsureValidSnapshotTTreeOutput(fOptions, std::string(treename), std::string(filename)); + EnsureValidSnapshotOutput(fOptions, std::string(treename), std::string(filename)); TDirectory::TContext fileCtxt; fOutputHandle = std::make_shared( diff --git a/tree/dataframe/test/dataframe_snapshot.cxx b/tree/dataframe/test/dataframe_snapshot.cxx index 1c6711edce251..728bad3fedd9b 100644 --- a/tree/dataframe/test/dataframe_snapshot.cxx +++ b/tree/dataframe/test/dataframe_snapshot.cxx @@ -395,8 +395,8 @@ TEST_F(RDFSnapshot, Snapshot_update_same_treename) TestSnapshotUpdate(tdf, "snap_update_sametreenames.root", "t", "t", false); } catch (const std::invalid_argument &e) { const std::string msg = - "Snapshot: tree \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the " - "original tree and write another, please set RSnapshotOptions::fOverwriteIfExists to true."; + "Snapshot: object \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the " + "original object and write another, please set the 'fOverwriteIfExists' option to true in RSnapshotOptions."; EXPECT_EQ(e.what(), msg); exceptionCaught = true; } @@ -1276,8 +1276,8 @@ TEST_F(RDFSnapshotMT, Snapshot_update_same_treename) TestSnapshotUpdate(tdf, "snap_update_sametreenames.root", "t", "t", false); } catch (const std::invalid_argument &e) { const std::string msg = - "Snapshot: tree \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the " - "original tree and write another, please set RSnapshotOptions::fOverwriteIfExists to true."; + "Snapshot: object \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the " + "original object and write another, please set the 'fOverwriteIfExists' option to true in RSnapshotOptions."; EXPECT_EQ(e.what(), msg); exceptionCaught = true; } diff --git a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx index 0d5fdd6acf7ed..141b4250aabb6 100644 --- a/tree/dataframe/test/dataframe_snapshot_ntuple.cxx +++ b/tree/dataframe/test/dataframe_snapshot_ntuple.cxx @@ -501,9 +501,9 @@ TEST(RDFSnapshotRNTuple, UpdateSameName) FAIL() << "snapshotting in \"UPDATE\" mode to the same ntuple name without `fOverwriteIfExists` is not allowed "; } catch (const std::invalid_argument &err) { EXPECT_STREQ(err.what(), - "Snapshot: RNTuple \"ntuple\" already present in file " + "Snapshot: object \"ntuple\" already present in file " "\"RDFSnapshotRNTuple_update_same_name.root\". If you want to delete the original " - "ntuple and write another, please set the 'fOverwriteIfExists' option to true in RSnapshotOptions."); + "object and write another, please set the 'fOverwriteIfExists' option to true in RSnapshotOptions."); } opts.fOverwriteIfExists = true;