From 13bbfe7db7736c4e727aca97ee4315ad85d14d6e Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Mon, 20 Apr 2026 14:32:57 +0200 Subject: [PATCH] [RF] Generalize boundary finding for RooHistPdf and RooHistFunc The interface to return bin boundaries and sampling hints for plotting in RooHistPdf and RooHistFunc doesn't consider that the histogram variable might be a transformation of the variable for which the bin boundaries or plot hints are requested. This commit improves the situation by implementing an algorithm that assumes that plot variable and histogram variable are linearly related, and numerically figures out the inverse transformation for transforming histogram bins to plot bins if needed. If the transformation is not linear, the RooHistFunc or RooHistPdf won't be correctly plotted either, but linear transformations are very common, even explicitly via the `RooLinearVar`, and supporting those is already a good improvement in user experience. Closes #13030. --- roofit/roofitcore/inc/RooHistPdf.h | 5 + roofit/roofitcore/src/RooHistFunc.cxx | 95 +++----------- roofit/roofitcore/src/RooHistPdf.cxx | 151 ++++++++++++++++------ roofit/roofitcore/test/testRooHistPdf.cxx | 69 ++++++++++ 4 files changed, 197 insertions(+), 123 deletions(-) diff --git a/roofit/roofitcore/inc/RooHistPdf.h b/roofit/roofitcore/inc/RooHistPdf.h index bd50afddf3d58..d62b5078c3fe4 100644 --- a/roofit/roofitcore/inc/RooHistPdf.h +++ b/roofit/roofitcore/inc/RooHistPdf.h @@ -23,6 +23,7 @@ #include "RooDataHist.h" #include +#include class RooRealVar; class RooAbsReal; @@ -145,6 +146,10 @@ class RooHistPdf : public RooAbsPdf { double xlo, double xhi); + static std::vector histogramBoundariesInPlotObs(RooDataHist const &dataHist, RooArgSet const &pdfObsList, + RooArgSet const &histObsList, RooAbsRealLValue &obs, + double xlo, double xhi); + inline void initializeOwnedDataHist(std::unique_ptr &&dataHist) { _ownedDataHist = std::move(dataHist); diff --git a/roofit/roofitcore/src/RooHistFunc.cxx b/roofit/roofitcore/src/RooHistFunc.cxx index 54f9bf9e472cd..10041d3f16446 100644 --- a/roofit/roofitcore/src/RooHistFunc.cxx +++ b/roofit/roofitcore/src/RooHistFunc.cxx @@ -326,11 +326,11 @@ std::list* RooHistFunc::plotSamplingHint(RooAbsRealLValue& obs, double x return RooHistPdf::plotSamplingHint(*_dataHist, _depList, _histObsList, _intOrder, obs, xlo, xhi); } - //////////////////////////////////////////////////////////////////////////////// -/// Return sampling hint for making curves of (projections) of this function -/// as the recursive division strategy of RooCurve cannot deal efficiently -/// with the vertical lines that occur in a non-interpolated histogram +/// Return the histogram bin boundaries mapped into the plot-observable +/// coordinate. Handles the case where the function observable is a +/// transformation (shift, scale, ...) of the plot observable so that the +/// returned boundaries are placed at the correct positions in the plot. std::list* RooHistFunc::binBoundaries(RooAbsRealLValue& obs, double xlo, double xhi) const { @@ -339,86 +339,21 @@ std::list* RooHistFunc::binBoundaries(RooAbsRealLValue& obs, double xlo, return nullptr ; } - // Find histogram observable corresponding to pdf observable - RooAbsArg* hobs(nullptr) ; - for (auto i = 0u; i < _histObsList.size(); ++i) { - const auto harg = _histObsList[i]; - const auto parg = _depList[i]; - if (std::string(parg->GetName())==obs.GetName()) { - hobs=harg ; - } + const std::vector boundaries = + RooHistPdf::histogramBoundariesInPlotObs(*_dataHist, _depList, _histObsList, obs, xlo, xhi); + if (boundaries.empty()) { + return nullptr; } - // std::cout << "RooHistFunc::bb(" << GetName() << ") histObs = " << _histObsList << std::endl ; - // std::cout << "RooHistFunc::bb(" << GetName() << ") pdfObs = " << _depList << std::endl ; - - RooAbsRealLValue* transform = nullptr; - if (!hobs) { - - // Considering alternate: input observable is histogram observable and pdf observable is transformation in terms of it - RooAbsArg* pobs = nullptr; - for (auto i = 0u; i < _histObsList.size(); ++i) { - const auto harg = _histObsList[i]; - const auto parg = _depList[i]; - if (std::string(harg->GetName())==obs.GetName()) { - pobs=parg ; - hobs=harg ; - } - } - - // Not found, or check that matching pdf observable is an l-value dependent on histogram observable fails - if (!hobs || !(pobs->dependsOn(obs) && dynamic_cast(pobs))) { - std::cout << "RooHistFunc::binBoundaries(" << GetName() << ") obs = " << obs.GetName() << " hobs is not found, returning null" << std::endl ; - return nullptr ; - } - - // Now we are in business - we are in a situation where the pdf observable LV(x), mapping to a histogram observable x - // We can return bin boundaries by mapping the histogram boundaties through the inverse of the LV(x) transformation - transform = dynamic_cast(pobs) ; - } - - - // std::cout << "hobs = " << hobs->GetName() << std::endl ; - // std::cout << "transform = " << (transform?transform->GetName():"") << std::endl ; - - // Check that observable is in dataset, if not no hint is generated - RooAbsArg* xtmp = _dataHist->get()->find(hobs->GetName()) ; - if (!xtmp) { - std::cout << "RooHistFunc::binBoundaries(" << GetName() << ") hobs = " << hobs->GetName() << " is not found in dataset?" << std::endl ; - _dataHist->get()->Print("v") ; - return nullptr ; - } - RooAbsLValue* lvarg = dynamic_cast(_dataHist->get()->find(hobs->GetName())) ; - if (!lvarg) { - std::cout << "RooHistFunc::binBoundaries(" << GetName() << ") hobs = " << hobs->GetName() << " but is not an LV, returning null" << std::endl ; - return nullptr ; - } - - // Retrieve position of all bin boundaries - const RooAbsBinning* binning = lvarg->getBinningPtr(nullptr); - double* boundaries = binning->array() ; - - auto hint = new std::list ; - - double delta = (xhi-xlo)*1e-8 ; - - // Construct array with pairs of points positioned epsilon to the left and - // right of the bin boundaries - for (Int_t i=0 ; inumBoundaries() ; i++) { - if (boundaries[i]>xlo-delta && boundaries[i]setVal(boundary) ; - //cout << "transform bound " << boundary << " using " << transform->GetName() << " result " << obs.getVal() << std::endl ; - hint->push_back(obs.getVal()) ; - } else { - hint->push_back(boundary) ; - } - } + auto hint = new std::list; + const double delta = (xhi - xlo) * 1e-8; + for (double b : boundaries) { + if (b > xlo - delta && b < xhi + delta) { + hint->push_back(b); + } } - return hint ; + return hint; } diff --git a/roofit/roofitcore/src/RooHistPdf.cxx b/roofit/roofitcore/src/RooHistPdf.cxx index dd3555827e854..176911270ed0c 100644 --- a/roofit/roofitcore/src/RooHistPdf.cxx +++ b/roofit/roofitcore/src/RooHistPdf.cxx @@ -416,6 +416,97 @@ bool RooHistPdf::forceAnalyticalInt(const RooAbsArg& dep) const return forceAnalyticalInt(_pdfObsList, dep); } +//////////////////////////////////////////////////////////////////////////////// +/// Return the histogram bin boundaries mapped to the coordinates of the plot +/// observable `obs`. This handles the case where the p.d.f. observable is a +/// transformation of the plot observable (for example a shift like +/// `x_shifted = x - shift`), so that the returned boundaries are the positions +/// of the histogram bin edges as they appear when the p.d.f. is plotted as a +/// function of `obs`. +/// +/// Returns an empty vector if no corresponding histogram observable can be +/// located, or if a non-invertible transformation is detected. + +std::vector RooHistPdf::histogramBoundariesInPlotObs(RooDataHist const &dataHist, RooArgSet const &pdfObsList, + RooArgSet const &histObsList, RooAbsRealLValue &obs, + double xlo, double xhi) +{ + std::vector result; + + // Locate the histogram observable that corresponds to the plot observable. + // These are the scenarios we handle: + // 1. The plot observable `obs` has the same name as one of the p.d.f. + // observables. That p.d.f. observable is the histogram observable + // itself (no transformation). + // 2. The p.d.f. observable is a non-trivial function of `obs` (e.g. a + // shift or scale). We have to invert that function to map the + // histogram bin boundaries back into the plot-observable coordinate. + RooAbsArg *histObs = nullptr; + RooAbsArg *pdfObs = nullptr; + bool useTransform = false; + for (unsigned int i = 0; i < pdfObsList.size(); ++i) { + if (std::string(obs.GetName()) == pdfObsList[i]->GetName()) { + histObs = histObsList[i]; + break; + } + } + if (!histObs) { + for (unsigned int i = 0; i < pdfObsList.size(); ++i) { + auto *po = dynamic_cast(pdfObsList[i]); + if (po && po->dependsOn(obs)) { + histObs = histObsList[i]; + pdfObs = pdfObsList[i]; + useTransform = true; + break; + } + } + } + if (!histObs) { + return result; + } + + auto *lval = dynamic_cast(dataHist.get()->find(histObs->GetName())); + if (!lval) { + return result; + } + + const RooAbsBinning *binning = lval->getBinningPtr(nullptr); + const int nBoundaries = binning->numBoundaries(); + const double *rawBoundaries = binning->array(); + + if (!useTransform) { + result.assign(rawBoundaries, rawBoundaries + nBoundaries); + return result; + } + + // Invert the transformation `pdfObs = T(obs)` by linearly approximating + // it on the range of `obs` and solving for `obs` at each histogram bin + // boundary. This handles both l-values such as RooLinearVar and generic + // RooAbsReals such as RooFormulaVars that describe a shift or scale + // (the approximation is exact for linear transformations). + auto *pdfObsReal = static_cast(pdfObs); + const double probeLo = std::max(xlo, obs.getMin()); + const double probeHi = std::min(xhi, obs.getMax()); + if (probeHi > probeLo) { + const double obsSave = obs.getVal(); + obs.setVal(probeLo); + const double plo = pdfObsReal->getVal(); + obs.setVal(probeHi); + const double phi = pdfObsReal->getVal(); + obs.setVal(obsSave); + const double slope = (phi - plo) / (probeHi - probeLo); + if (slope != 0.0) { + result.reserve(nBoundaries); + for (int i = 0; i < nBoundaries; ++i) { + result.push_back(probeLo + (rawBoundaries[i] - plo) / slope); + } + } + } + + // The transformation may reverse the order (negative slope). + std::sort(result.begin(), result.end()); + return result; +} //////////////////////////////////////////////////////////////////////////////// /// Return sampling hint for making curves of (projections) of this function @@ -441,40 +532,21 @@ std::list* RooHistPdf::plotSamplingHint(RooDataHist const& dataHist, return nullptr; } - // Check that observable is in dataset, if not no hint is generated - RooAbsArg* dhObs = nullptr; - for (unsigned int i=0; i < pdfObsList.size(); ++i) { - RooAbsArg* histObs = histObsList[i]; - RooAbsArg* pdfObs = pdfObsList[i]; - if (std::string(obs.GetName())==pdfObs->GetName()) { - dhObs = dataHist.get()->find(histObs->GetName()) ; - break; - } - } - - if (!dhObs) { - return nullptr; - } - RooAbsLValue* lval = dynamic_cast(dhObs) ; - if (!lval) { - return nullptr; + const std::vector boundaries = histogramBoundariesInPlotObs(dataHist, pdfObsList, histObsList, obs, xlo, xhi); + if (boundaries.empty()) { + return nullptr; } - // Retrieve position of all bin boundaries - - const RooAbsBinning* binning = lval->getBinningPtr(nullptr); - std::span boundaries{binning->array(), static_cast(binning->numBoundaries())}; - // Use the helper function from RooCurve to make sure to get sampling hints // that work with the RooFitPlotting. return RooCurve::plotSamplingHintForBinBoundaries(boundaries, xlo, xhi); } - //////////////////////////////////////////////////////////////////////////////// -/// Return sampling hint for making curves of (projections) of this function -/// as the recursive division strategy of RooCurve cannot deal efficiently -/// with the vertical lines that occur in a non-interpolated histogram +/// Return the histogram bin boundaries in the plot-observable coordinate. +/// The returned list is used by RooCurve's adaptive sampler to avoid stepping +/// over the discontinuities of a non-interpolated histogram, and by +/// RooBinIntegrator when integrating over this p.d.f. std::list* RooHistPdf::binBoundaries(RooAbsRealLValue& obs, double xlo, double xhi) const { @@ -483,27 +555,20 @@ std::list* RooHistPdf::binBoundaries(RooAbsRealLValue& obs, double xlo, return nullptr; } - // Check that observable is in dataset, if not no hint is generated - RooAbsLValue* lvarg = dynamic_cast(_dataHist->get()->find(obs.GetName())) ; - if (!lvarg) { - return nullptr ; + const std::vector boundaries = + histogramBoundariesInPlotObs(*_dataHist, _pdfObsList, _histObsList, obs, xlo, xhi); + if (boundaries.empty()) { + return nullptr; } - // Retrieve position of all bin boundaries - const RooAbsBinning* binning = lvarg->getBinningPtr(nullptr); - double* boundaries = binning->array() ; - - auto hint = new std::list ; - - // Construct array with pairs of points positioned epsilon to the left and - // right of the bin boundaries - for (Int_t i=0 ; inumBoundaries() ; i++) { - if (boundaries[i]>=xlo && boundaries[i]<=xhi) { - hint->push_back(boundaries[i]) ; - } + auto hint = new std::list; + for (double b : boundaries) { + if (b >= xlo && b <= xhi) { + hint->push_back(b); + } } - return hint ; + return hint; } diff --git a/roofit/roofitcore/test/testRooHistPdf.cxx b/roofit/roofitcore/test/testRooHistPdf.cxx index b8ec66e6a2250..f5768c6374677 100644 --- a/roofit/roofitcore/test/testRooHistPdf.cxx +++ b/roofit/roofitcore/test/testRooHistPdf.cxx @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -10,8 +11,13 @@ #include #include +#include + #include +#include +#include + // Verify that RooFit correctly uses analytic integration when having a // RooLinearVar as the observable of a RooHistPdf. TEST(RooHistPdf, AnalyticIntWithRooLinearVar) @@ -79,3 +85,66 @@ TEST(RooHistPdf, EnsurePositiveValuesInFFTConvPdf) EXPECT_EQ(fit_result->status(), 0) << "The fit should succeed with status 0"; } + +// GitHub issue #13030: plotting a RooHistPdf whose observable is a +// transformation of the plot variable (e.g. a shift `x_shifted = x - shift`) +// produced visually broken curves because `binBoundaries` and +// `plotSamplingHint` returned the raw histogram boundaries in the histogram +// observable coordinate instead of the plot observable coordinate. +TEST(RooHistPdf, ShiftedBinBoundaries) +{ + RooRealVar x{"x", "x", 1000, 1500}; + x.setBins(5); // bin edges in x: 1000, 1100, 1200, 1300, 1400, 1500 + + RooRealVar shift{"shift", "shift", 25.0, -100, 100}; + + TH1D h{"h", "", x.numBins(), x.getMin(), x.getMax()}; + for (int i = 1; i <= h.GetNbinsX(); ++i) { + h.SetBinContent(i, 1.0); + } + RooDataHist dh{"dh", "", x, &h}; + + // Case 1: pdfObs = x - shift as a RooFormulaVar (no l-value inverse). + { + RooFormulaVar xShifted{"x_shifted", "x - shift", {x, shift}}; + RooHistPdf pdf{"pdf_f", "", xShifted, x, dh, 0}; + std::unique_ptr> boundaries{pdf.binBoundaries(x, 1000.0, 1600.0)}; + ASSERT_TRUE(boundaries); + // Hist bin boundary `b` satisfies `x - shift = b`, so plot_x = b + shift. + const std::vector expected{1025.0, 1125.0, 1225.0, 1325.0, 1425.0, 1525.0}; + ASSERT_EQ(boundaries->size(), expected.size()); + auto it = boundaries->begin(); + for (double e : expected) { + EXPECT_DOUBLE_EQ(*it++, e); + } + } + + // Case 2: pdfObs = RooLinearVar (slope=1, offset=shift -> xShifted = x + shift). + { + RooRealVar slope{"slope", "slope", 1.0}; + RooLinearVar xShifted{"x_shifted_lv", "", x, slope, shift}; + RooHistPdf pdf{"pdf_lv", "", RooArgList{xShifted}, RooArgList{x}, dh, 0}; + std::unique_ptr> boundaries{pdf.binBoundaries(x, 1000.0, 1500.0)}; + ASSERT_TRUE(boundaries); + // xShifted = x + 25 -> plot_x = b - 25; only values inside [1000, 1500] kept. + const std::vector expected{1075.0, 1175.0, 1275.0, 1375.0, 1475.0}; + ASSERT_EQ(boundaries->size(), expected.size()); + auto it = boundaries->begin(); + for (double e : expected) { + EXPECT_DOUBLE_EQ(*it++, e); + } + } + + // Case 3: identity (no transformation) keeps the raw boundaries. + { + RooHistPdf pdf{"pdf_id", "", x, dh, 0}; + std::unique_ptr> boundaries{pdf.binBoundaries(x, 1000.0, 1500.0)}; + ASSERT_TRUE(boundaries); + const std::vector expected{1000.0, 1100.0, 1200.0, 1300.0, 1400.0, 1500.0}; + ASSERT_EQ(boundaries->size(), expected.size()); + auto it = boundaries->begin(); + for (double e : expected) { + EXPECT_DOUBLE_EQ(*it++, e); + } + } +}