Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,8 @@ std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std
std::shared_ptr<RJittedVariation>
BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn);
RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn,
const std::string &varyColType);

std::string JitBuildAction(const ColumnNames_t &bl, const std::type_info &art, const std::type_info &at, TTree *tree,
const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds,
Expand Down
78 changes: 77 additions & 1 deletion tree/dataframe/inc/ROOT/RDF/RInterface.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,18 @@ public:
/// hx["pt:up"].Draw("SAME");
/// ~~~
///
/// ## Short-hand expression syntax
///
/// For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins
/// with '{' and ends with '}' (parenthesis and formatting characters are excluded from the search). This means that
/// the following is an equivalent example to above:
///
/// ~~~{.cpp}
/// auto nominal_hx =
/// df.Vary("pt", "{pt*0.9, pt*1.1}", {"down", "up"})
/// // Same as above
/// ~~~
///
/// \note See also This Vary() overload for more information.
RInterface<Proxied> Vary(std::string_view colName, std::string_view expression,
const std::vector<std::string> &variationTags, std::string_view variationName = "")
Expand Down Expand Up @@ -1105,6 +1117,18 @@ public:
/// hx["pt:1"].Draw("SAME");
/// ~~~
///
/// ## Short-hand expression syntax
///
/// For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins
/// with '{' and ends with '}' (parenthesis and formatting characters are excluded from the search). This means that
/// the following is an equivalent example to above:
///
/// ~~~{.cpp}
/// auto nominal_hx =
/// df.Vary("pt", "{pt*0.9, pt*1.1}", 2)
/// // Same as above
/// ~~~
///
/// \note See also This Vary() overload for more information.
RInterface<Proxied> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
std::string_view variationName = "")
Expand Down Expand Up @@ -1142,6 +1166,31 @@ public:
/// hx["xy:1"].Draw("SAME");
/// ~~~
///
/// ## Short-hand expression syntax
///
/// For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins
/// with '{' and ends with '}' (parenthesis and formatting characters are excluded from the search). This means that
/// the following is an equivalent example to above:
///
/// ~~~{.cpp}
/// auto nominal_hx =
/// df.Vary("pt", "{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
/// // Same as above
/// ~~~
///
/// or also:
///
/// ~~~{.cpp}
/// auto nominal_hx =
/// df.Vary("pt", R"(
/// {
/// {x*0.9, x*1.1}, // x variations
/// {y*0.9, y*1.1} // y variations
/// }
/// )", 2, "xy")
/// // Same as above
/// ~~~
///
/// \note See also This Vary() overload for more information.
RInterface<Proxied> Vary(const std::vector<std::string> &colNames, std::string_view expression,
std::size_t nVariations, std::string_view variationName)
Expand Down Expand Up @@ -1194,6 +1243,31 @@ public:
/// hx["xy:up"].Draw("SAME");
/// ~~~
///
/// ## Short-hand expression syntax
///
/// For convenience, when a C++ expression is passed to Vary, the return type can be omitted if the string begins
/// with '{' and ends with '}' (parenthesis and formatting characters are excluded from the search). This means that
/// the following is an equivalent example to above:
///
/// ~~~{.cpp}
/// auto nominal_hx =
/// df.Vary("pt", "{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
/// // Same as above
/// ~~~
///
/// or also:
///
/// ~~~{.cpp}
/// auto nominal_hx =
/// df.Vary("pt", R"(
/// {
/// {x*0.9, x*1.1}, // x variations
/// {y*0.9, y*1.1} // y variations
/// }
/// )", {"down", "up"}, "xy")
/// // Same as above
/// ~~~
///
/// \note See also This Vary() overload for more information.
RInterface<Proxied> Vary(const std::vector<std::string> &colNames, std::string_view expression,
const std::vector<std::string> &variationTags, std::string_view variationName)
Expand Down Expand Up @@ -3788,9 +3862,11 @@ private:
throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
}

// Cannot vary different input column types, assume the first
auto varyColType = GetColumnType(colNames[0]);
auto jittedVariation =
RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager,
GetDataSource(), fColRegister, isSingleColumn);
GetDataSource(), fColRegister, isSingleColumn, varyColType);

RDFInternal::RColumnRegister newColRegister(fColRegister);
newColRegister.AddVariation(std::move(jittedVariation));
Expand Down
57 changes: 45 additions & 12 deletions tree/dataframe/src/RDFInterfaceUtils.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ std::unordered_map<std::string, std::string> &GetJittedExprs() {
return jittedExpressions;
}

std::string
BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
std::string BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes,
bool isSingleColumn = false, const std::string &varyColType = "")
{
assert(vars.size() == varTypes.size());

Expand Down Expand Up @@ -278,22 +278,53 @@ BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const Co
if (!vars.empty())
ss.seekp(-2, ss.cur);

if (hasReturnStmt)
ss << "){";
// When building the function expression for a Vary call, we try to help the
// user by removing the need to explicitly write the vector return type.
// For now, Vary works by returning a (nested) RVec, depending on how many
// variables need to vary in lockstep.
auto finalizeExprForVary = [&]() {
std::string trailRetType{};
// Trim formatting characters at the extremes of the user expression
auto first_not_space = expr.find_first_not_of(" \n\t");
auto last_not_space = expr.find_last_not_of(" \n\t");
if (first_not_space != std::string::npos && last_not_space != std::string::npos && expr[first_not_space] == '{' &&
expr[last_not_space] == '}') {
// User expression is of type '{...}', a potential constructor for an
// RVec. At the same time, they have not decided the RVec return type
// Add trailing return type for the convenience of the user
// The innermost value type is by default the type of the first given column
trailRetType = " -> ";
if (isSingleColumn)
trailRetType += "ROOT::RVec<" + varyColType + ">";
else
trailRetType += "ROOT::RVec<ROOT::RVec<" + varyColType + ">>";
trailRetType += ' ';
}
std::string trailRetToken{trailRetType.empty() ? ") {" : ')' + trailRetType + '{'};
if (!hasReturnStmt)
trailRetToken += " return ";
return trailRetToken;
};

if (!varyColType.empty())
ss << finalizeExprForVary();
else
ss << "){return ";
ss << expr << "\n;}";
ss << (hasReturnStmt ? ") {" : ") { return ");

// Must inject \n to avoid cases where the user puts a comment after the expression
ss << expr << "\n;}\n";

return ss.str();
}

/// Declare a function to the interpreter in namespace R_rdf, return the name of the jitted function.
/// If the function is already in GetJittedExprs, return the name for the function that has already been jitted.
std::string DeclareFunction(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
std::string DeclareFunction(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes,
bool isSingleColumn = false, const std::string &varyColType = "")
{
R__LOCKGUARD(gROOTMutex);

const auto funcCode = BuildFunctionString(expr, vars, varTypes);
const auto funcCode = BuildFunctionString(expr, vars, varTypes, isSingleColumn, varyColType);
auto &exprMap = GetJittedExprs();
const auto exprIt = exprMap.find(funcCode);
if (exprIt != exprMap.end()) {
Expand Down Expand Up @@ -728,20 +759,22 @@ std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std
std::shared_ptr<RJittedVariation>
BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn)
RDataSource *ds, const RColumnRegister &colRegister, bool isSingleColumn,
const std::string &varyColType)
{
const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};

const auto parsedExpr = ParseRDFExpression(expression, colRegister, dsColumns);
const auto exprVarTypes =
GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, nullptr, ds, "Vary", /*vector2RVec=*/true);
const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
const auto funcName =
DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes, isSingleColumn, varyColType);
const auto type = RetTypeOfFunc(funcName);

if (type.rfind("ROOT::VecOps::RVec", 0) != 0) {
throw std::runtime_error(
"Jitted Vary expressions must return an RVec object. The following expression returns a " + type +
" instead:\n" + parsedExpr.fExpr);
"Jitted Vary expressions must return an RVec object. The following expression return type is '" + type +
"' instead:\n" + parsedExpr.fExpr);
}

auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags, type, colRegister,
Expand Down
8 changes: 8 additions & 0 deletions tree/dataframe/src/RDataFrame.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,14 @@ hx["pt:down"].Draw("SAME");
hx["pt:up"].Draw("SAME");
~~~

A shorter expression syntax is allowed for convenience:

~~~{.cpp}
auto nominal_hx =
df.Vary("pt", "{pt*0.9f, pt*1.1f}", {"down", "up"})
// The rest is the same as above
~~~

A list of variation "tags" is passed as the last argument to Vary(). The tags give names to the varied values that are returned
as elements of an RVec of the appropriate C++ type. The number of variation tags must correspond to the number of elements of
this RVec (2 in the example above: the first element will correspond to the tag "down", the second
Expand Down
137 changes: 136 additions & 1 deletion tree/dataframe/test/dataframe_vary.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ TEST(RDFVary, RequireReturnTypeIsRVec)
EXPECT_THROW(
try { df.Vary("x", "0", /*nVariations=*/2); } catch (const std::runtime_error &err) {
const auto msg = "Jitted Vary expressions must return an RVec object. "
"The following expression returns a int instead:\n0";
"The following expression return type is 'int' instead:\n0";
EXPECT_STREQ(err.what(), msg);
throw;
},
Expand Down Expand Up @@ -1782,4 +1782,139 @@ TEST(RDFVary, CheckVariationNames)
}
}

TEST_P(RDFVary, JittedVaryOneVariableImplicitRetType)
{
auto df = ROOT::RDataFrame(10).Define("x", [] { return 1; });
auto sum = df.Vary("x", "{-1*x, 2*x}", 2).Sum<int>("x");
EXPECT_EQ(*sum, 10);

auto sums = VariationsFor(sum);

EXPECT_EQ(sums["nominal"], 10);
EXPECT_EQ(sums["x:0"], -10);
EXPECT_EQ(sums["x:1"], 20);
}

TEST_P(RDFVary, JittedVarySimultaneousVariationsImplicitRetType)
{
auto df = ROOT::RDataFrame(10).Define("x", [] { return 1; }).Define("y", [] { return 42; });
auto h = df.Vary(std::vector<std::string>{"x", "y"}, "{{-1, 2, 3}, {41, 43, 44}}", {"down", "up", "other"}, "xy")
.Histo1D<int, int>("x", "y");
auto histos = VariationsFor(h);

const auto expectedKeys = std::vector<std::string>{"nominal", "xy:down", "xy:other", "xy:up"};
auto keys = histos.GetKeys();
std::sort(keys.begin(), keys.end()); // key ordering is not guaranteed
EXPECT_EQ(keys, expectedKeys);
EXPECT_DOUBLE_EQ(histos["nominal"].GetMaximum(), 42. * 10.);
EXPECT_DOUBLE_EQ(histos["nominal"].GetMean(), 1.);
EXPECT_DOUBLE_EQ(histos["xy:down"].GetMaximum(), 41. * 10.);
EXPECT_DOUBLE_EQ(histos["xy:down"].GetMean(), -1.);
EXPECT_DOUBLE_EQ(histos["xy:up"].GetMaximum(), 43. * 10.);
EXPECT_DOUBLE_EQ(histos["xy:up"].GetMean(), 2.);
EXPECT_DOUBLE_EQ(histos["xy:other"].GetMaximum(), 44 * 10.);
EXPECT_DOUBLE_EQ(histos["xy:other"].GetMean(), 3.);
}

TEST_P(RDFVary, JittedVarySimultaneousVariationsImplicitRetTypeMultiStringExpression)
{
auto df = ROOT::RDataFrame(10).Define("x", [] { return 1; }).Define("y", [] { return 42; });
auto h = df.Vary(std::vector<std::string>{"x", "y"}, R"CODE(
{
{-1, 2, 3}, // x variations
{41, 43, 44} // y variations
}
)CODE",
{"down", "up", "other"}, "xy")
.Histo1D<int, int>("x", "y");
auto histos = VariationsFor(h);

const auto expectedKeys = std::vector<std::string>{"nominal", "xy:down", "xy:other", "xy:up"};
auto keys = histos.GetKeys();
std::sort(keys.begin(), keys.end()); // key ordering is not guaranteed
EXPECT_EQ(keys, expectedKeys);
EXPECT_DOUBLE_EQ(histos["nominal"].GetMaximum(), 42. * 10.);
EXPECT_DOUBLE_EQ(histos["nominal"].GetMean(), 1.);
EXPECT_DOUBLE_EQ(histos["xy:down"].GetMaximum(), 41. * 10.);
EXPECT_DOUBLE_EQ(histos["xy:down"].GetMean(), -1.);
EXPECT_DOUBLE_EQ(histos["xy:up"].GetMaximum(), 43. * 10.);
EXPECT_DOUBLE_EQ(histos["xy:up"].GetMean(), 2.);
EXPECT_DOUBLE_EQ(histos["xy:other"].GetMaximum(), 44 * 10.);
EXPECT_DOUBLE_EQ(histos["xy:other"].GetMean(), 3.);
}

TEST_P(RDFVary, JittedVarySimultaneousVariationsVecColsImplicitRetType)
{
auto df = ROOT::RDataFrame(10)
.Define("x", [] { return ROOT::RVecF{1.f, 1.f, 1.f}; })
.Define("y", [] { return ROOT::RVecF{42.f, 42.f, 42.f}; })
.Define("entry", [](ULong64_t entry) -> int { return entry; }, {"rdfentry_"});
auto h = df.Vary(std::vector<std::string>{"x", "y"}, "{{x*entry, x-1, x+2}, {y*entry, y-1, y+2}}",
{"down", "up", "other"}, "xy")
.Define("xy", [](const ROOT::RVecF &x, const ROOT::RVecF &y) { return x + y; }, {"x", "y"})
.Histo1D<ROOT::RVecF>("xy");
auto histos = VariationsFor(h);

const auto expectedKeys = std::vector<std::string>{"nominal", "xy:down", "xy:other", "xy:up"};
auto keys = histos.GetKeys();
std::sort(keys.begin(), keys.end()); // key ordering is not guaranteed
EXPECT_EQ(keys, expectedKeys);
EXPECT_DOUBLE_EQ(histos["nominal"].GetMaximum(), 30.);
EXPECT_DOUBLE_EQ(histos["nominal"].GetMean(), 43);
EXPECT_DOUBLE_EQ(histos["xy:down"].GetMaximum(), 3.); //
EXPECT_DOUBLE_EQ(histos["xy:down"].GetMean(), 193.5);
EXPECT_DOUBLE_EQ(histos["xy:up"].GetMaximum(), 30.);
EXPECT_DOUBLE_EQ(histos["xy:up"].GetMean(), 41.);
EXPECT_DOUBLE_EQ(histos["xy:other"].GetMaximum(), 30.);
EXPECT_DOUBLE_EQ(histos["xy:other"].GetMean(), 47.);
}

TEST_P(RDFVary, JittedVarySimultaneousVariationsDependingFromOtherColsImplicitRetType)
{
auto df = ROOT::RDataFrame(10)
.Define("x", [] { return 1; })
.Define("y", [] { return 42; })
.Define("z", [] { return 100; })
.Define("entry", [](ULong64_t entry) -> int { return entry; }, {"rdfentry_"});
auto h =
df.Vary(std::vector<std::string>{"x", "y", "z"},
"{{-1*entry, 2, 3}, {41, 43*entry, 44}, {500-entry, 600, 700 + entry}}", {"down", "up", "other"}, "xyz")
.Define("xyz", [](int x, int y, int z) { return x + y + z; }, {"x", "y", "z"})
.Histo1D<int>("xyz");
auto histos = VariationsFor(h);

const auto expectedKeys = std::vector<std::string>{"nominal", "xyz:down", "xyz:other", "xyz:up"};
auto keys = histos.GetKeys();
std::sort(keys.begin(), keys.end()); // key ordering is not guaranteed
EXPECT_EQ(keys, expectedKeys);
EXPECT_DOUBLE_EQ(histos["nominal"].GetMaximum(), 10.);
EXPECT_DOUBLE_EQ(histos["nominal"].GetMean(), 143.);
EXPECT_DOUBLE_EQ(histos["xyz:down"].GetMaximum(), 1.);
EXPECT_DOUBLE_EQ(histos["xyz:down"].GetMean(), 532.);
EXPECT_DOUBLE_EQ(histos["xyz:up"].GetMaximum(), 1.);
EXPECT_DOUBLE_EQ(histos["xyz:up"].GetMean(), 795.5);
EXPECT_DOUBLE_EQ(histos["xyz:other"].GetMaximum(), 1.);
EXPECT_DOUBLE_EQ(histos["xyz:other"].GetMean(), 751.5);
}

TEST_P(RDFVary, JittedVaryEmptyString)
{
auto df = ROOT::RDataFrame(1).Define("x", [] { return 1; }).Define("y", [] { return 42.; });
EXPECT_THROW(
try { df.Vary("x", "", /*nVariations=*/2); } catch (const std::runtime_error &err) {
const auto msg = "Jitted Vary expressions must return an RVec object. "
"The following expression return type is 'void' instead:\n";
EXPECT_STREQ(err.what(), msg);
throw;
},
std::runtime_error);

EXPECT_THROW(
try { df.Vary({"x", "y"}, "", 1, "broken"); } catch (const std::runtime_error &err) {
const auto msg = "Jitted Vary expressions must return an RVec object. "
"The following expression return type is 'void' instead:\n";
EXPECT_STREQ(err.what(), msg);
throw;
},
std::runtime_error);
}
Loading