Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 23 additions & 70 deletions tree/dataframe/src/RDFSnapshotHelpers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -209,88 +209,40 @@ void CreateFundamentalTypeBranch(TTree &outputTree, RBranchData &bd, void *value
bd.fOutputBranch = outputTree.Branch(bd.fOutputBranchName.c_str(), valueAddress, leafList.c_str(), bufSize);
}

/// Ensure that the TTree with the resulting snapshot can be written to the target TFile. This means checking that the
/// TFile can be opened in the mode specified in `opts`, deleting any existing TTrees in case
/// Ensure that an object with the input name can be written to the target file. This means checking that the
/// TFile can be opened in the mode specified in `opts`, deleting any pre-existing objects with the same name in case
/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
void EnsureValidSnapshotTTreeOutput(const ROOT::RDF::RSnapshotOptions &opts, const std::string &treeName,
const std::string &fileName)
void EnsureValidSnapshotOutput(const ROOT::RDF::RSnapshotOptions &opts, const std::string &objName,
const std::string &fileName)
{
TString fileMode = opts.fMode;
fileMode.ToLower();
if (fileMode != "update")
return;

// output file opened in "update" mode: must check whether output TTree is already present in file
// output file opened in "update" mode: must check whether target object name is already present in file
std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
if (!outFile || outFile->IsZombie())
throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");

TObject *outTree = outFile->Get(treeName.c_str());
if (outTree == nullptr)
// Object is not present in the file, we are good
if (!outFile->GetKey(objName.c_str()))
return;

// object called treeName is already present in the file
// object called objName is already present in the file
if (opts.fOverwriteIfExists) {
if (outTree->InheritsFrom("TTree")) {
static_cast<TTree *>(outTree)->Delete("all");
if (auto existingTree = outFile->Get<TTree>(objName.c_str()); existingTree) {
// Special case for TTree. TTree::Delete invalidates the 'this' pointer, so we don't wrap it in a
// std::unique_ptr.
existingTree->Delete("all");
} else {
outFile->Delete(treeName.c_str());
// Ensure deletion of object and all its cycles.
outFile->Delete((objName + ";*").c_str());
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That feels wrong. What if the object is semantically the same? In that case this deletes all previous 'backup' copies of the object, is that really the intent? (usually the over-write only over-write the lastest key and keeps the backup)?

Related though, the TTree::Delete("all"); has no choice but to delete also the backup copies since it removes from the files all the TBasket that they share.

}
} else {
const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
"\". If you want to delete the original tree and write another, please set "
"RSnapshotOptions::fOverwriteIfExists to true.";
throw std::invalid_argument(msg);
}
}

/// Ensure that the RNTuple with the resulting snapshot can be written to the target TFile. This means checking that the
/// TFile can be opened in the mode specified in `opts`, deleting any existing RNTuples in case
/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
void EnsureValidSnapshotRNTupleOutput(const ROOT::RDF::RSnapshotOptions &opts, const std::string &ntupleName,
const std::string &fileName)
{
TString fileMode = opts.fMode;
fileMode.ToLower();
if (fileMode != "update")
return;

// output file opened in "update" mode: must check whether output RNTuple is already present in file
std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
if (!outFile || outFile->IsZombie())
throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");

auto *outNTuple = outFile->Get<ROOT::RNTuple>(ntupleName.c_str());

if (outNTuple) {
if (opts.fOverwriteIfExists) {
outFile->Delete((ntupleName + ";*").c_str());
return;
} else {
const std::string msg = "Snapshot: RNTuple \"" + ntupleName + "\" already present in file \"" + fileName +
"\". If you want to delete the original ntuple and write another, please set "
"the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
throw std::invalid_argument(msg);
}
}

// Also check if there is any object other than an RNTuple with the provided ntupleName.
TObject *outObj = outFile->Get(ntupleName.c_str());

if (!outObj)
return;

// An object called ntupleName is already present in the file.
if (opts.fOverwriteIfExists) {
if (auto tree = dynamic_cast<TTree *>(outObj)) {
tree->Delete("all");
} else {
outFile->Delete((ntupleName + ";*").c_str());
}
} else {
const std::string msg = "Snapshot: object \"" + ntupleName + "\" already present in file \"" + fileName +
"\". If you want to delete the original object and write a new RNTuple, please set "
"the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
const std::string msg = "Snapshot: object \"" + objName + "\" already present in file \"" + fileName +
"\". If you want to delete the original object and write another, please set the "
"'fOverwriteIfExists' option to true in RSnapshotOptions.";
throw std::invalid_argument(msg);
}
}
Expand Down Expand Up @@ -441,7 +393,7 @@ ROOT::Internal::RDF::UntypedSnapshotTTreeHelper::UntypedSnapshotTTreeHelper(
fOutputLoopManager(loopManager),
fInputLoopManager(inputLM)
{
EnsureValidSnapshotTTreeOutput(fOptions, fTreeName, fFileName);
EnsureValidSnapshotOutput(fOptions, fTreeName, fFileName);

auto outputBranchNames = ReplaceDotWithUnderscore(bnames);
fBranchData.reserve(vbnames.size());
Expand Down Expand Up @@ -634,7 +586,7 @@ ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::UntypedSnapshotTTreeHelperMT(
fOutputLoopManager(loopManager),
fInputLoopManager(inputLM)
{
EnsureValidSnapshotTTreeOutput(fOptions, fTreeName, fFileName);
EnsureValidSnapshotOutput(fOptions, fTreeName, fFileName);

auto outputBranchNames = ReplaceDotWithUnderscore(bnames);
fBranchData.reserve(fNSlots);
Expand Down Expand Up @@ -797,7 +749,8 @@ void ROOT::Internal::RDF::UntypedSnapshotTTreeHelperMT::Finalize()
// filtering), create an empty TTree in the output file and create the branches to preserve the schema
auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
assert(fOutputFile && "Missing output file in Snapshot finalization.");
if (!fOutputFile->Get(fullTreeName.c_str())) {
// Use GetKey to avoid having to deal with memory management of the object in the file
if (!fOutputFile->GetKey(fullTreeName.c_str())) {

// First find in which directory we need to write the output TTree
TDirectory *treeDirectory = fOutputFile;
Expand Down Expand Up @@ -884,7 +837,7 @@ ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::UntypedSnapshotRNTupleHelper(
fEntries(nSlots),
fInputColumnTypeIDs(colTypeIDs)
{
EnsureValidSnapshotRNTupleOutput(fOptions, fNTupleName, fFileName);
EnsureValidSnapshotOutput(fOptions, fNTupleName, fFileName);
}

// Define special member methods here where the definition of all the data member types is available
Expand Down Expand Up @@ -1147,7 +1100,7 @@ ROOT::Internal::RDF::SnapshotHelperWithVariations::SnapshotHelperWithVariations(
const std::vector<const std::type_info *> &colTypeIDs)
: fOptions(options), fInputLoopManager{inputLoopMgr}, fOutputLoopManager{outputLoopMgr}
{
EnsureValidSnapshotTTreeOutput(fOptions, std::string(treename), std::string(filename));
EnsureValidSnapshotOutput(fOptions, std::string(treename), std::string(filename));

TDirectory::TContext fileCtxt;
fOutputHandle = std::make_shared<SnapshotOutputWriter>(
Expand Down
41 changes: 32 additions & 9 deletions tree/dataframe/src/RLoopManager.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,28 @@ auto MakeDatasetColReadersKey(std::string_view colName, const std::type_info &ti
// df.Sum<RVecI>("stdVectorBranch");
return std::string(colName) + ':' + ti.name();
}

/// \brief Check if object of a certain type is in the directory
///
/// Attempts to read an object of the specified type via TDirectory::Get, wraps
/// it in a std::unique_ptr to avoid leaking the object.
template <typename T>
bool IsObjectInDir(std::string_view objName, TDirectory &dir)
{
std::unique_ptr<T> o{dir.Get<T>(objName.data())};
return o.get();
}

/// \brief Check if a generic object is in the directory
///
/// Checks if a generic object is in the directory, uses TDirectory::GetKey
/// to avoid having to deal with memory management of the object being read
/// without having its type.
template <>
bool IsObjectInDir<void>(std::string_view objName, TDirectory &dir)
{
return dir.GetKey(objName.data());
}
} // anonymous namespace

/**
Expand Down Expand Up @@ -391,8 +413,8 @@ void RLoopManager::ChangeSpec(ROOT::RDF::Experimental::RDatasetSpec &&spec)
fSamples = spec.MoveOutSamples();
fSampleMap.clear();

const bool isTTree = inFile->Get<TTree>(datasetName[0].data());
const bool isRNTuple = inFile->Get<ROOT::RNTuple>(datasetName[0].data());
const bool isTTree = IsObjectInDir<TTree>(datasetName[0], *inFile);
const bool isRNTuple = IsObjectInDir<ROOT::RNTuple>(datasetName[0], *inFile);

if (isTTree || isRNTuple) {

Expand Down Expand Up @@ -458,7 +480,8 @@ void RLoopManager::ChangeSpec(ROOT::RDF::Experimental::RDatasetSpec &&spec)
v.second.reset();
}
} else {
std::string errMsg = inFile->Get(datasetName[0].data()) ? "unsupported data format for" : "cannot find";
std::string errMsg =
IsObjectInDir<void>(datasetName[0].data(), *inFile) ? "unsupported data format for" : "cannot find";
throw std::invalid_argument("RDataFrame: " + errMsg + " dataset \"" + std::string(datasetName[0]) + "\" in file \"" +
inFile->GetName() + "\".");
}
Expand Down Expand Up @@ -1252,13 +1275,13 @@ ROOT::Detail::RDF::CreateLMFromFile(std::string_view datasetName, std::string_vi

auto inFile = OpenFileWithSanityChecks(fileNameGlob);

if (inFile->Get<TTree>(datasetName.data())) {
if (IsObjectInDir<TTree>(datasetName, *inFile)) {
return CreateLMFromTTree(datasetName, fileNameGlob, defaultColumns, /*checkFile=*/false);
} else if (inFile->Get<ROOT::RNTuple>(datasetName.data())) {
} else if (IsObjectInDir<ROOT::RNTuple>(datasetName, *inFile)) {
return CreateLMFromRNTuple(datasetName, fileNameGlob, defaultColumns);
}

std::string errMsg = inFile->Get(datasetName.data()) ? "unsupported data format for" : "cannot find";
std::string errMsg = IsObjectInDir<void>(datasetName, *inFile) ? "unsupported data format for" : "cannot find";

throw std::invalid_argument("RDataFrame: " + errMsg + " dataset \"" + std::string(datasetName) + "\" in file \"" +
inFile->GetName() + "\".");
Expand All @@ -1274,13 +1297,13 @@ ROOT::Detail::RDF::CreateLMFromFile(std::string_view datasetName, const std::vec

auto inFile = OpenFileWithSanityChecks(fileNameGlobs[0]);

if (inFile->Get<TTree>(datasetName.data())) {
if (IsObjectInDir<TTree>(datasetName, *inFile)) {
return CreateLMFromTTree(datasetName, fileNameGlobs, defaultColumns, /*checkFile=*/false);
} else if (inFile->Get<ROOT::RNTuple>(datasetName.data())) {
} else if (IsObjectInDir<ROOT::RNTuple>(datasetName, *inFile)) {
return CreateLMFromRNTuple(datasetName, fileNameGlobs, defaultColumns);
}

std::string errMsg = inFile->Get(datasetName.data()) ? "unsupported data format for" : "cannot find";
std::string errMsg = IsObjectInDir<void>(datasetName, *inFile) ? "unsupported data format for" : "cannot find";

throw std::invalid_argument("RDataFrame: " + errMsg + " dataset \"" + std::string(datasetName) + "\" in file \"" +
inFile->GetName() + "\".");
Expand Down
8 changes: 4 additions & 4 deletions tree/dataframe/test/dataframe_snapshot.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,8 @@ TEST_F(RDFSnapshot, Snapshot_update_same_treename)
TestSnapshotUpdate(tdf, "snap_update_sametreenames.root", "t", "t", false);
} catch (const std::invalid_argument &e) {
const std::string msg =
"Snapshot: tree \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the "
"original tree and write another, please set RSnapshotOptions::fOverwriteIfExists to true.";
"Snapshot: object \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the "
"original object and write another, please set the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
EXPECT_EQ(e.what(), msg);
exceptionCaught = true;
}
Expand Down Expand Up @@ -1276,8 +1276,8 @@ TEST_F(RDFSnapshotMT, Snapshot_update_same_treename)
TestSnapshotUpdate(tdf, "snap_update_sametreenames.root", "t", "t", false);
} catch (const std::invalid_argument &e) {
const std::string msg =
"Snapshot: tree \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the "
"original tree and write another, please set RSnapshotOptions::fOverwriteIfExists to true.";
"Snapshot: object \"t\" already present in file \"snap_update_sametreenames.root\". If you want to delete the "
"original object and write another, please set the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
EXPECT_EQ(e.what(), msg);
exceptionCaught = true;
}
Expand Down
4 changes: 2 additions & 2 deletions tree/dataframe/test/dataframe_snapshot_ntuple.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -501,9 +501,9 @@ TEST(RDFSnapshotRNTuple, UpdateSameName)
FAIL() << "snapshotting in \"UPDATE\" mode to the same ntuple name without `fOverwriteIfExists` is not allowed ";
} catch (const std::invalid_argument &err) {
EXPECT_STREQ(err.what(),
"Snapshot: RNTuple \"ntuple\" already present in file "
"Snapshot: object \"ntuple\" already present in file "
"\"RDFSnapshotRNTuple_update_same_name.root\". If you want to delete the original "
"ntuple and write another, please set the 'fOverwriteIfExists' option to true in RSnapshotOptions.");
"object and write another, please set the 'fOverwriteIfExists' option to true in RSnapshotOptions.");
}

opts.fOverwriteIfExists = true;
Expand Down
Loading