From 350b27632e263a316fecf3032b3bd6f0c2afb7e9 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Thu, 23 Apr 2026 14:57:32 +0200 Subject: [PATCH 1/3] [df] Lift requirement to provide explicit return type in Vary expression The expression gets a trailing return type injected of the equivalent necessary RVec type, only in case the user is not already providing it. As long as the expression returns a sequence of characters that is a valid way to instantiate an RVec constructor, the expression will compile. The condition that triggers the injection of the trailing return type is if the user expression is of type '{...}', which could be a potential candidate constructor for an RVec. --- .../dataframe/inc/ROOT/RDF/InterfaceUtils.hxx | 3 +- tree/dataframe/inc/ROOT/RDF/RInterface.hxx | 4 +- tree/dataframe/src/RDFInterfaceUtils.cxx | 53 ++++++-- tree/dataframe/test/dataframe_vary.cxx | 113 ++++++++++++++++++ 4 files changed, 161 insertions(+), 12 deletions(-) diff --git a/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx b/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx index d8f1d524078af..7b241022750d7 100644 --- a/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx +++ b/tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx @@ -433,7 +433,8 @@ std::shared_ptr BookDefinePerSampleJit(std::string_view name, std std::shared_ptr BookVariationJit(const std::vector &colNames, std::string_view variationName, const std::vector &variationTags, std::string_view expression, RLoopManager &lm, - RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn); + RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn, + const std::string &varyColType); std::string JitBuildAction(const ColumnNames_t &bl, const std::type_info &art, const std::type_info &at, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, diff --git a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx index 0a000eb178d04..3c9688530f51a 100644 --- a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx @@ -3788,9 +3788,11 @@ private: throw std::logic_error("A column name was passed to the same Vary invocation multiple times."); } + // Cannot vary different input column types, assume the first + auto varyColType = GetColumnType(colNames[0]); auto jittedVariation = RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, - GetDataSource(), fColRegister, isSingleColumn); + GetDataSource(), fColRegister, isSingleColumn, varyColType); RDFInternal::RColumnRegister newColRegister(fColRegister); newColRegister.AddVariation(std::move(jittedVariation)); diff --git a/tree/dataframe/src/RDFInterfaceUtils.cxx b/tree/dataframe/src/RDFInterfaceUtils.cxx index c99b39605002b..9a0071ae5f00e 100644 --- a/tree/dataframe/src/RDFInterfaceUtils.cxx +++ b/tree/dataframe/src/RDFInterfaceUtils.cxx @@ -219,8 +219,8 @@ std::unordered_map &GetJittedExprs() { return jittedExpressions; } -std::string -BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes) +std::string BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes, + bool isSingleColumn = false, const std::string &varyColType = "") { assert(vars.size() == varTypes.size()); @@ -278,22 +278,53 @@ BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const Co if (!vars.empty()) ss.seekp(-2, ss.cur); - if (hasReturnStmt) - ss << "){"; + // When building the function expression for a Vary call, we try to help the + // user by removing the need to explicitly write the vector return type. + // For now, Vary works by returning a (nested) RVec, depending on how many + // variables need to vary in lockstep. + auto finalizeExprForVary = [&]() { + std::string trailRetType{}; + // Trim formatting characters at the extremes of the user expression + auto first_not_space = expr.find_first_not_of(" \n\t"); + auto last_not_space = expr.find_last_not_of(" \n\t"); + if (first_not_space != std::string::npos && last_not_space != std::string::npos && expr[first_not_space] == '{' && + expr[last_not_space] == '}') { + // User expression is of type '{...}', a potential constructor for an + // RVec. At the same time, they have not decided the RVec return type + // Add trailing return type for the convenience of the user + // The innermost value type is by default the type of the first given column + trailRetType = " -> "; + if (isSingleColumn) + trailRetType += "ROOT::RVec<" + varyColType + ">"; + else + trailRetType += "ROOT::RVec>"; + trailRetType += ' '; + } + std::string trailRetToken{trailRetType.empty() ? ") {" : ')' + trailRetType + '{'}; + if (!hasReturnStmt) + trailRetToken += " return "; + return trailRetToken; + }; + + if (!varyColType.empty()) + ss << finalizeExprForVary(); else - ss << "){return "; - ss << expr << "\n;}"; + ss << (hasReturnStmt ? ") {" : ") { return "); + + // Must inject \n to avoid cases where the user puts a comment after the expression + ss << expr << "\n;}\n"; return ss.str(); } /// Declare a function to the interpreter in namespace R_rdf, return the name of the jitted function. /// If the function is already in GetJittedExprs, return the name for the function that has already been jitted. -std::string DeclareFunction(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes) +std::string DeclareFunction(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes, + bool isSingleColumn = false, const std::string &varyColType = "") { R__LOCKGUARD(gROOTMutex); - const auto funcCode = BuildFunctionString(expr, vars, varTypes); + const auto funcCode = BuildFunctionString(expr, vars, varTypes, isSingleColumn, varyColType); auto &exprMap = GetJittedExprs(); const auto exprIt = exprMap.find(funcCode); if (exprIt != exprMap.end()) { @@ -728,14 +759,16 @@ std::shared_ptr BookDefinePerSampleJit(std::string_view name, std std::shared_ptr BookVariationJit(const std::vector &colNames, std::string_view variationName, const std::vector &variationTags, std::string_view expression, RLoopManager &lm, - RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn) + RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn, + const std::string &varyColType) { const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{}; const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns); const auto exprVarTypes = GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, nullptr, ds, "Vary", /*vector2RVec=*/true); - const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes); + const auto funcName = + DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes, isSingleColumn, varyColType); const auto type = RetTypeOfFunc(funcName); if (type.rfind("ROOT::VecOps::RVec", 0) != 0) { diff --git a/tree/dataframe/test/dataframe_vary.cxx b/tree/dataframe/test/dataframe_vary.cxx index eda767df9a3e7..6c5d7f4f72d93 100644 --- a/tree/dataframe/test/dataframe_vary.cxx +++ b/tree/dataframe/test/dataframe_vary.cxx @@ -1782,4 +1782,117 @@ TEST(RDFVary, CheckVariationNames) } } +TEST_P(RDFVary, JittedVaryOneVariableImplicitRetType) +{ + auto df = ROOT::RDataFrame(10).Define("x", [] { return 1; }); + auto sum = df.Vary("x", "{-1*x, 2*x}", 2).Sum("x"); + EXPECT_EQ(*sum, 10); + + auto sums = VariationsFor(sum); + + EXPECT_EQ(sums["nominal"], 10); + EXPECT_EQ(sums["x:0"], -10); + EXPECT_EQ(sums["x:1"], 20); +} +TEST_P(RDFVary, JittedVarySimultaneousVariationsImplicitRetType) +{ + auto df = ROOT::RDataFrame(10).Define("x", [] { return 1; }).Define("y", [] { return 42; }); + auto h = df.Vary(std::vector{"x", "y"}, "{{-1, 2, 3}, {41, 43, 44}}", {"down", "up", "other"}, "xy") + .Histo1D("x", "y"); + auto histos = VariationsFor(h); + + const auto expectedKeys = std::vector{"nominal", "xy:down", "xy:other", "xy:up"}; + auto keys = histos.GetKeys(); + std::sort(keys.begin(), keys.end()); // key ordering is not guaranteed + EXPECT_EQ(keys, expectedKeys); + EXPECT_DOUBLE_EQ(histos["nominal"].GetMaximum(), 42. * 10.); + EXPECT_DOUBLE_EQ(histos["nominal"].GetMean(), 1.); + EXPECT_DOUBLE_EQ(histos["xy:down"].GetMaximum(), 41. * 10.); + EXPECT_DOUBLE_EQ(histos["xy:down"].GetMean(), -1.); + EXPECT_DOUBLE_EQ(histos["xy:up"].GetMaximum(), 43. * 10.); + EXPECT_DOUBLE_EQ(histos["xy:up"].GetMean(), 2.); + EXPECT_DOUBLE_EQ(histos["xy:other"].GetMaximum(), 44 * 10.); + EXPECT_DOUBLE_EQ(histos["xy:other"].GetMean(), 3.); +} + +TEST_P(RDFVary, JittedVarySimultaneousVariationsImplicitRetTypeMultiStringExpression) +{ + auto df = ROOT::RDataFrame(10).Define("x", [] { return 1; }).Define("y", [] { return 42; }); + auto h = df.Vary(std::vector{"x", "y"}, R"CODE( + { + {-1, 2, 3}, // x variations + {41, 43, 44} // y variations + } + )CODE", + {"down", "up", "other"}, "xy") + .Histo1D("x", "y"); + auto histos = VariationsFor(h); + + const auto expectedKeys = std::vector{"nominal", "xy:down", "xy:other", "xy:up"}; + auto keys = histos.GetKeys(); + std::sort(keys.begin(), keys.end()); // key ordering is not guaranteed + EXPECT_EQ(keys, expectedKeys); + EXPECT_DOUBLE_EQ(histos["nominal"].GetMaximum(), 42. * 10.); + EXPECT_DOUBLE_EQ(histos["nominal"].GetMean(), 1.); + EXPECT_DOUBLE_EQ(histos["xy:down"].GetMaximum(), 41. * 10.); + EXPECT_DOUBLE_EQ(histos["xy:down"].GetMean(), -1.); + EXPECT_DOUBLE_EQ(histos["xy:up"].GetMaximum(), 43. * 10.); + EXPECT_DOUBLE_EQ(histos["xy:up"].GetMean(), 2.); + EXPECT_DOUBLE_EQ(histos["xy:other"].GetMaximum(), 44 * 10.); + EXPECT_DOUBLE_EQ(histos["xy:other"].GetMean(), 3.); +} + +TEST_P(RDFVary, JittedVarySimultaneousVariationsVecColsImplicitRetType) +{ + auto df = ROOT::RDataFrame(10) + .Define("x", [] { return ROOT::RVecF{1.f, 1.f, 1.f}; }) + .Define("y", [] { return ROOT::RVecF{42.f, 42.f, 42.f}; }) + .Define("entry", [](ULong64_t entry) -> int { return entry; }, {"rdfentry_"}); + auto h = df.Vary(std::vector{"x", "y"}, "{{x*entry, x-1, x+2}, {y*entry, y-1, y+2}}", + {"down", "up", "other"}, "xy") + .Define("xy", [](const ROOT::RVecF &x, const ROOT::RVecF &y) { return x + y; }, {"x", "y"}) + .Histo1D("xy"); + auto histos = VariationsFor(h); + + const auto expectedKeys = std::vector{"nominal", "xy:down", "xy:other", "xy:up"}; + auto keys = histos.GetKeys(); + std::sort(keys.begin(), keys.end()); // key ordering is not guaranteed + EXPECT_EQ(keys, expectedKeys); + EXPECT_DOUBLE_EQ(histos["nominal"].GetMaximum(), 30.); + EXPECT_DOUBLE_EQ(histos["nominal"].GetMean(), 43); + EXPECT_DOUBLE_EQ(histos["xy:down"].GetMaximum(), 3.); // + EXPECT_DOUBLE_EQ(histos["xy:down"].GetMean(), 193.5); + EXPECT_DOUBLE_EQ(histos["xy:up"].GetMaximum(), 30.); + EXPECT_DOUBLE_EQ(histos["xy:up"].GetMean(), 41.); + EXPECT_DOUBLE_EQ(histos["xy:other"].GetMaximum(), 30.); + EXPECT_DOUBLE_EQ(histos["xy:other"].GetMean(), 47.); +} + +TEST_P(RDFVary, JittedVarySimultaneousVariationsDependingFromOtherColsImplicitRetType) +{ + auto df = ROOT::RDataFrame(10) + .Define("x", [] { return 1; }) + .Define("y", [] { return 42; }) + .Define("z", [] { return 100; }) + .Define("entry", [](ULong64_t entry) -> int { return entry; }, {"rdfentry_"}); + auto h = + df.Vary(std::vector{"x", "y", "z"}, + "{{-1*entry, 2, 3}, {41, 43*entry, 44}, {500-entry, 600, 700 + entry}}", {"down", "up", "other"}, "xyz") + .Define("xyz", [](int x, int y, int z) { return x + y + z; }, {"x", "y", "z"}) + .Histo1D("xyz"); + auto histos = VariationsFor(h); + + const auto expectedKeys = std::vector{"nominal", "xyz:down", "xyz:other", "xyz:up"}; + auto keys = histos.GetKeys(); + std::sort(keys.begin(), keys.end()); // key ordering is not guaranteed + EXPECT_EQ(keys, expectedKeys); + EXPECT_DOUBLE_EQ(histos["nominal"].GetMaximum(), 10.); + EXPECT_DOUBLE_EQ(histos["nominal"].GetMean(), 143.); + EXPECT_DOUBLE_EQ(histos["xyz:down"].GetMaximum(), 1.); + EXPECT_DOUBLE_EQ(histos["xyz:down"].GetMean(), 532.); + EXPECT_DOUBLE_EQ(histos["xyz:up"].GetMaximum(), 1.); + EXPECT_DOUBLE_EQ(histos["xyz:up"].GetMean(), 795.5); + EXPECT_DOUBLE_EQ(histos["xyz:other"].GetMaximum(), 1.); + EXPECT_DOUBLE_EQ(histos["xyz:other"].GetMean(), 751.5); +} From 5d1afc520ecbdb88d00c34e7bce4a3bfe1bdb318 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Fri, 24 Apr 2026 16:43:16 +0200 Subject: [PATCH 2/3] [df] Add test and improve error message --- tree/dataframe/src/RDFInterfaceUtils.cxx | 4 ++-- tree/dataframe/test/dataframe_vary.cxx | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tree/dataframe/src/RDFInterfaceUtils.cxx b/tree/dataframe/src/RDFInterfaceUtils.cxx index 9a0071ae5f00e..30a694b089a7f 100644 --- a/tree/dataframe/src/RDFInterfaceUtils.cxx +++ b/tree/dataframe/src/RDFInterfaceUtils.cxx @@ -773,8 +773,8 @@ BookVariationJit(const std::vector &colNames, std::string_view vari if (type.rfind("ROOT::VecOps::RVec", 0) != 0) { throw std::runtime_error( - "Jitted Vary expressions must return an RVec object. The following expression returns a " + type + - " instead:\n" + parsedExpr.fExpr); + "Jitted Vary expressions must return an RVec object. The following expression return type is '" + type + + "' instead:\n" + parsedExpr.fExpr); } auto jittedVariation = std::make_shared(colNames, variationName, variationTags, type, colRegister, diff --git a/tree/dataframe/test/dataframe_vary.cxx b/tree/dataframe/test/dataframe_vary.cxx index 6c5d7f4f72d93..1e8f711f8773c 100644 --- a/tree/dataframe/test/dataframe_vary.cxx +++ b/tree/dataframe/test/dataframe_vary.cxx @@ -208,7 +208,7 @@ TEST(RDFVary, RequireReturnTypeIsRVec) EXPECT_THROW( try { df.Vary("x", "0", /*nVariations=*/2); } catch (const std::runtime_error &err) { const auto msg = "Jitted Vary expressions must return an RVec object. " - "The following expression returns a int instead:\n0"; + "The following expression return type is 'int' instead:\n0"; EXPECT_STREQ(err.what(), msg); throw; }, @@ -1896,3 +1896,25 @@ TEST_P(RDFVary, JittedVarySimultaneousVariationsDependingFromOtherColsImplicitRe EXPECT_DOUBLE_EQ(histos["xyz:other"].GetMaximum(), 1.); EXPECT_DOUBLE_EQ(histos["xyz:other"].GetMean(), 751.5); } + +TEST_P(RDFVary, JittedVaryEmptyString) +{ + auto df = ROOT::RDataFrame(1).Define("x", [] { return 1; }).Define("y", [] { return 42.; }); + EXPECT_THROW( + try { df.Vary("x", "", /*nVariations=*/2); } catch (const std::runtime_error &err) { + const auto msg = "Jitted Vary expressions must return an RVec object. " + "The following expression return type is 'void' instead:\n"; + EXPECT_STREQ(err.what(), msg); + throw; + }, + std::runtime_error); + + EXPECT_THROW( + try { df.Vary({"x", "y"}, "", 1, "broken"); } catch (const std::runtime_error &err) { + const auto msg = "Jitted Vary expressions must return an RVec object. " + "The following expression return type is 'void' instead:\n"; + EXPECT_STREQ(err.what(), msg); + throw; + }, + std::runtime_error); +} \ No newline at end of file From 0ff0b996d96271f529e3d40cdd0f170f23211e70 Mon Sep 17 00:00:00 2001 From: Vincenzo Eduardo Padulano Date: Fri, 24 Apr 2026 17:39:34 +0200 Subject: [PATCH 3/3] [df] Add docs for the short-hand syntax for Vary expressions --- tree/dataframe/inc/ROOT/RDF/RInterface.hxx | 74 ++++++++++++++++++++++ tree/dataframe/src/RDataFrame.cxx | 8 +++ 2 files changed, 82 insertions(+) diff --git a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx index 3c9688530f51a..c2b543730e02f 100644 --- a/tree/dataframe/inc/ROOT/RDF/RInterface.hxx +++ b/tree/dataframe/inc/ROOT/RDF/RInterface.hxx @@ -1072,6 +1072,18 @@ public: /// hx["pt:up"].Draw("SAME"); /// ~~~ /// + /// ## Short-hand expression syntax + /// + /// For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins + /// with '{' and ends with '}' (parenthesis and formatting characters are excluded from the search). This means that + /// the following is an equivalent example to above: + /// + /// ~~~{.cpp} + /// auto nominal_hx = + /// df.Vary("pt", "{pt*0.9, pt*1.1}", {"down", "up"}) + /// // Same as above + /// ~~~ + /// /// \note See also This Vary() overload for more information. RInterface Vary(std::string_view colName, std::string_view expression, const std::vector &variationTags, std::string_view variationName = "") @@ -1105,6 +1117,18 @@ public: /// hx["pt:1"].Draw("SAME"); /// ~~~ /// + /// ## Short-hand expression syntax + /// + /// For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins + /// with '{' and ends with '}' (parenthesis and formatting characters are excluded from the search). This means that + /// the following is an equivalent example to above: + /// + /// ~~~{.cpp} + /// auto nominal_hx = + /// df.Vary("pt", "{pt*0.9, pt*1.1}", 2) + /// // Same as above + /// ~~~ + /// /// \note See also This Vary() overload for more information. RInterface Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName = "") @@ -1142,6 +1166,31 @@ public: /// hx["xy:1"].Draw("SAME"); /// ~~~ /// + /// ## Short-hand expression syntax + /// + /// For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins + /// with '{' and ends with '}' (parenthesis and formatting characters are excluded from the search). This means that + /// the following is an equivalent example to above: + /// + /// ~~~{.cpp} + /// auto nominal_hx = + /// df.Vary("pt", "{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy") + /// // Same as above + /// ~~~ + /// + /// or also: + /// + /// ~~~{.cpp} + /// auto nominal_hx = + /// df.Vary("pt", R"( + /// { + /// {x*0.9, x*1.1}, // x variations + /// {y*0.9, y*1.1} // y variations + /// } + /// )", 2, "xy") + /// // Same as above + /// ~~~ + /// /// \note See also This Vary() overload for more information. RInterface Vary(const std::vector &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName) @@ -1194,6 +1243,31 @@ public: /// hx["xy:up"].Draw("SAME"); /// ~~~ /// + /// ## Short-hand expression syntax + /// + /// For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins + /// with '{' and ends with '}' (parenthesis and formatting characters are excluded from the search). This means that + /// the following is an equivalent example to above: + /// + /// ~~~{.cpp} + /// auto nominal_hx = + /// df.Vary("pt", "{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy") + /// // Same as above + /// ~~~ + /// + /// or also: + /// + /// ~~~{.cpp} + /// auto nominal_hx = + /// df.Vary("pt", R"( + /// { + /// {x*0.9, x*1.1}, // x variations + /// {y*0.9, y*1.1} // y variations + /// } + /// )", {"down", "up"}, "xy") + /// // Same as above + /// ~~~ + /// /// \note See also This Vary() overload for more information. RInterface Vary(const std::vector &colNames, std::string_view expression, const std::vector &variationTags, std::string_view variationName) diff --git a/tree/dataframe/src/RDataFrame.cxx b/tree/dataframe/src/RDataFrame.cxx index f8441ae934ce9..3aec89dae95e6 100644 --- a/tree/dataframe/src/RDataFrame.cxx +++ b/tree/dataframe/src/RDataFrame.cxx @@ -1204,6 +1204,14 @@ hx["pt:down"].Draw("SAME"); hx["pt:up"].Draw("SAME"); ~~~ +A shorter expression syntax is allowed for convenience: + +~~~{.cpp} +auto nominal_hx = + df.Vary("pt", "{pt*0.9f, pt*1.1f}", {"down", "up"}) +// The rest is the same as above +~~~ + A list of variation "tags" is passed as the last argument to Vary(). The tags give names to the varied values that are returned as elements of an RVec of the appropriate C++ type. The number of variation tags must correspond to the number of elements of this RVec (2 in the example above: the first element will correspond to the tag "down", the second