diff --git a/roofit/roofitcore/inc/RooHistPdf.h b/roofit/roofitcore/inc/RooHistPdf.h index bd50afddf3d58..d62b5078c3fe4 100644 --- a/roofit/roofitcore/inc/RooHistPdf.h +++ b/roofit/roofitcore/inc/RooHistPdf.h @@ -23,6 +23,7 @@ #include "RooDataHist.h" #include +#include class RooRealVar; class RooAbsReal; @@ -145,6 +146,10 @@ class RooHistPdf : public RooAbsPdf { double xlo, double xhi); + static std::vector histogramBoundariesInPlotObs(RooDataHist const &dataHist, RooArgSet const &pdfObsList, + RooArgSet const &histObsList, RooAbsRealLValue &obs, + double xlo, double xhi); + inline void initializeOwnedDataHist(std::unique_ptr &&dataHist) { _ownedDataHist = std::move(dataHist); diff --git a/roofit/roofitcore/src/RooHistFunc.cxx b/roofit/roofitcore/src/RooHistFunc.cxx index 54f9bf9e472cd..10041d3f16446 100644 --- a/roofit/roofitcore/src/RooHistFunc.cxx +++ b/roofit/roofitcore/src/RooHistFunc.cxx @@ -326,11 +326,11 @@ std::list* RooHistFunc::plotSamplingHint(RooAbsRealLValue& obs, double x return RooHistPdf::plotSamplingHint(*_dataHist, _depList, _histObsList, _intOrder, obs, xlo, xhi); } - //////////////////////////////////////////////////////////////////////////////// -/// Return sampling hint for making curves of (projections) of this function -/// as the recursive division strategy of RooCurve cannot deal efficiently -/// with the vertical lines that occur in a non-interpolated histogram +/// Return the histogram bin boundaries mapped into the plot-observable +/// coordinate. Handles the case where the function observable is a +/// transformation (shift, scale, ...) of the plot observable so that the +/// returned boundaries are placed at the correct positions in the plot. std::list* RooHistFunc::binBoundaries(RooAbsRealLValue& obs, double xlo, double xhi) const { @@ -339,86 +339,21 @@ std::list* RooHistFunc::binBoundaries(RooAbsRealLValue& obs, double xlo, return nullptr ; } - // Find histogram observable corresponding to pdf observable - RooAbsArg* hobs(nullptr) ; - for (auto i = 0u; i < _histObsList.size(); ++i) { - const auto harg = _histObsList[i]; - const auto parg = _depList[i]; - if (std::string(parg->GetName())==obs.GetName()) { - hobs=harg ; - } + const std::vector boundaries = + RooHistPdf::histogramBoundariesInPlotObs(*_dataHist, _depList, _histObsList, obs, xlo, xhi); + if (boundaries.empty()) { + return nullptr; } - // std::cout << "RooHistFunc::bb(" << GetName() << ") histObs = " << _histObsList << std::endl ; - // std::cout << "RooHistFunc::bb(" << GetName() << ") pdfObs = " << _depList << std::endl ; - - RooAbsRealLValue* transform = nullptr; - if (!hobs) { - - // Considering alternate: input observable is histogram observable and pdf observable is transformation in terms of it - RooAbsArg* pobs = nullptr; - for (auto i = 0u; i < _histObsList.size(); ++i) { - const auto harg = _histObsList[i]; - const auto parg = _depList[i]; - if (std::string(harg->GetName())==obs.GetName()) { - pobs=parg ; - hobs=harg ; - } - } - - // Not found, or check that matching pdf observable is an l-value dependent on histogram observable fails - if (!hobs || !(pobs->dependsOn(obs) && dynamic_cast(pobs))) { - std::cout << "RooHistFunc::binBoundaries(" << GetName() << ") obs = " << obs.GetName() << " hobs is not found, returning null" << std::endl ; - return nullptr ; - } - - // Now we are in business - we are in a situation where the pdf observable LV(x), mapping to a histogram observable x - // We can return bin boundaries by mapping the histogram boundaties through the inverse of the LV(x) transformation - transform = dynamic_cast(pobs) ; - } - - - // std::cout << "hobs = " << hobs->GetName() << std::endl ; - // std::cout << "transform = " << (transform?transform->GetName():"") << std::endl ; - - // Check that observable is in dataset, if not no hint is generated - RooAbsArg* xtmp = _dataHist->get()->find(hobs->GetName()) ; - if (!xtmp) { - std::cout << "RooHistFunc::binBoundaries(" << GetName() << ") hobs = " << hobs->GetName() << " is not found in dataset?" << std::endl ; - _dataHist->get()->Print("v") ; - return nullptr ; - } - RooAbsLValue* lvarg = dynamic_cast(_dataHist->get()->find(hobs->GetName())) ; - if (!lvarg) { - std::cout << "RooHistFunc::binBoundaries(" << GetName() << ") hobs = " << hobs->GetName() << " but is not an LV, returning null" << std::endl ; - return nullptr ; - } - - // Retrieve position of all bin boundaries - const RooAbsBinning* binning = lvarg->getBinningPtr(nullptr); - double* boundaries = binning->array() ; - - auto hint = new std::list ; - - double delta = (xhi-xlo)*1e-8 ; - - // Construct array with pairs of points positioned epsilon to the left and - // right of the bin boundaries - for (Int_t i=0 ; inumBoundaries() ; i++) { - if (boundaries[i]>xlo-delta && boundaries[i]setVal(boundary) ; - //cout << "transform bound " << boundary << " using " << transform->GetName() << " result " << obs.getVal() << std::endl ; - hint->push_back(obs.getVal()) ; - } else { - hint->push_back(boundary) ; - } - } + auto hint = new std::list; + const double delta = (xhi - xlo) * 1e-8; + for (double b : boundaries) { + if (b > xlo - delta && b < xhi + delta) { + hint->push_back(b); + } } - return hint ; + return hint; } diff --git a/roofit/roofitcore/src/RooHistPdf.cxx b/roofit/roofitcore/src/RooHistPdf.cxx index dd3555827e854..176911270ed0c 100644 --- a/roofit/roofitcore/src/RooHistPdf.cxx +++ b/roofit/roofitcore/src/RooHistPdf.cxx @@ -416,6 +416,97 @@ bool RooHistPdf::forceAnalyticalInt(const RooAbsArg& dep) const return forceAnalyticalInt(_pdfObsList, dep); } +//////////////////////////////////////////////////////////////////////////////// +/// Return the histogram bin boundaries mapped to the coordinates of the plot +/// observable `obs`. This handles the case where the p.d.f. observable is a +/// transformation of the plot observable (for example a shift like +/// `x_shifted = x - shift`), so that the returned boundaries are the positions +/// of the histogram bin edges as they appear when the p.d.f. is plotted as a +/// function of `obs`. +/// +/// Returns an empty vector if no corresponding histogram observable can be +/// located, or if a non-invertible transformation is detected. + +std::vector RooHistPdf::histogramBoundariesInPlotObs(RooDataHist const &dataHist, RooArgSet const &pdfObsList, + RooArgSet const &histObsList, RooAbsRealLValue &obs, + double xlo, double xhi) +{ + std::vector result; + + // Locate the histogram observable that corresponds to the plot observable. + // These are the scenarios we handle: + // 1. The plot observable `obs` has the same name as one of the p.d.f. + // observables. That p.d.f. observable is the histogram observable + // itself (no transformation). + // 2. The p.d.f. observable is a non-trivial function of `obs` (e.g. a + // shift or scale). We have to invert that function to map the + // histogram bin boundaries back into the plot-observable coordinate. + RooAbsArg *histObs = nullptr; + RooAbsArg *pdfObs = nullptr; + bool useTransform = false; + for (unsigned int i = 0; i < pdfObsList.size(); ++i) { + if (std::string(obs.GetName()) == pdfObsList[i]->GetName()) { + histObs = histObsList[i]; + break; + } + } + if (!histObs) { + for (unsigned int i = 0; i < pdfObsList.size(); ++i) { + auto *po = dynamic_cast(pdfObsList[i]); + if (po && po->dependsOn(obs)) { + histObs = histObsList[i]; + pdfObs = pdfObsList[i]; + useTransform = true; + break; + } + } + } + if (!histObs) { + return result; + } + + auto *lval = dynamic_cast(dataHist.get()->find(histObs->GetName())); + if (!lval) { + return result; + } + + const RooAbsBinning *binning = lval->getBinningPtr(nullptr); + const int nBoundaries = binning->numBoundaries(); + const double *rawBoundaries = binning->array(); + + if (!useTransform) { + result.assign(rawBoundaries, rawBoundaries + nBoundaries); + return result; + } + + // Invert the transformation `pdfObs = T(obs)` by linearly approximating + // it on the range of `obs` and solving for `obs` at each histogram bin + // boundary. This handles both l-values such as RooLinearVar and generic + // RooAbsReals such as RooFormulaVars that describe a shift or scale + // (the approximation is exact for linear transformations). + auto *pdfObsReal = static_cast(pdfObs); + const double probeLo = std::max(xlo, obs.getMin()); + const double probeHi = std::min(xhi, obs.getMax()); + if (probeHi > probeLo) { + const double obsSave = obs.getVal(); + obs.setVal(probeLo); + const double plo = pdfObsReal->getVal(); + obs.setVal(probeHi); + const double phi = pdfObsReal->getVal(); + obs.setVal(obsSave); + const double slope = (phi - plo) / (probeHi - probeLo); + if (slope != 0.0) { + result.reserve(nBoundaries); + for (int i = 0; i < nBoundaries; ++i) { + result.push_back(probeLo + (rawBoundaries[i] - plo) / slope); + } + } + } + + // The transformation may reverse the order (negative slope). + std::sort(result.begin(), result.end()); + return result; +} //////////////////////////////////////////////////////////////////////////////// /// Return sampling hint for making curves of (projections) of this function @@ -441,40 +532,21 @@ std::list* RooHistPdf::plotSamplingHint(RooDataHist const& dataHist, return nullptr; } - // Check that observable is in dataset, if not no hint is generated - RooAbsArg* dhObs = nullptr; - for (unsigned int i=0; i < pdfObsList.size(); ++i) { - RooAbsArg* histObs = histObsList[i]; - RooAbsArg* pdfObs = pdfObsList[i]; - if (std::string(obs.GetName())==pdfObs->GetName()) { - dhObs = dataHist.get()->find(histObs->GetName()) ; - break; - } - } - - if (!dhObs) { - return nullptr; - } - RooAbsLValue* lval = dynamic_cast(dhObs) ; - if (!lval) { - return nullptr; + const std::vector boundaries = histogramBoundariesInPlotObs(dataHist, pdfObsList, histObsList, obs, xlo, xhi); + if (boundaries.empty()) { + return nullptr; } - // Retrieve position of all bin boundaries - - const RooAbsBinning* binning = lval->getBinningPtr(nullptr); - std::span boundaries{binning->array(), static_cast(binning->numBoundaries())}; - // Use the helper function from RooCurve to make sure to get sampling hints // that work with the RooFitPlotting. return RooCurve::plotSamplingHintForBinBoundaries(boundaries, xlo, xhi); } - //////////////////////////////////////////////////////////////////////////////// -/// Return sampling hint for making curves of (projections) of this function -/// as the recursive division strategy of RooCurve cannot deal efficiently -/// with the vertical lines that occur in a non-interpolated histogram +/// Return the histogram bin boundaries in the plot-observable coordinate. +/// The returned list is used by RooCurve's adaptive sampler to avoid stepping +/// over the discontinuities of a non-interpolated histogram, and by +/// RooBinIntegrator when integrating over this p.d.f. std::list* RooHistPdf::binBoundaries(RooAbsRealLValue& obs, double xlo, double xhi) const { @@ -483,27 +555,20 @@ std::list* RooHistPdf::binBoundaries(RooAbsRealLValue& obs, double xlo, return nullptr; } - // Check that observable is in dataset, if not no hint is generated - RooAbsLValue* lvarg = dynamic_cast(_dataHist->get()->find(obs.GetName())) ; - if (!lvarg) { - return nullptr ; + const std::vector boundaries = + histogramBoundariesInPlotObs(*_dataHist, _pdfObsList, _histObsList, obs, xlo, xhi); + if (boundaries.empty()) { + return nullptr; } - // Retrieve position of all bin boundaries - const RooAbsBinning* binning = lvarg->getBinningPtr(nullptr); - double* boundaries = binning->array() ; - - auto hint = new std::list ; - - // Construct array with pairs of points positioned epsilon to the left and - // right of the bin boundaries - for (Int_t i=0 ; inumBoundaries() ; i++) { - if (boundaries[i]>=xlo && boundaries[i]<=xhi) { - hint->push_back(boundaries[i]) ; - } + auto hint = new std::list; + for (double b : boundaries) { + if (b >= xlo && b <= xhi) { + hint->push_back(b); + } } - return hint ; + return hint; } diff --git a/roofit/roofitcore/test/testRooHistPdf.cxx b/roofit/roofitcore/test/testRooHistPdf.cxx index b8ec66e6a2250..f5768c6374677 100644 --- a/roofit/roofitcore/test/testRooHistPdf.cxx +++ b/roofit/roofitcore/test/testRooHistPdf.cxx @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -10,8 +11,13 @@ #include #include +#include + #include +#include +#include + // Verify that RooFit correctly uses analytic integration when having a // RooLinearVar as the observable of a RooHistPdf. TEST(RooHistPdf, AnalyticIntWithRooLinearVar) @@ -79,3 +85,66 @@ TEST(RooHistPdf, EnsurePositiveValuesInFFTConvPdf) EXPECT_EQ(fit_result->status(), 0) << "The fit should succeed with status 0"; } + +// GitHub issue #13030: plotting a RooHistPdf whose observable is a +// transformation of the plot variable (e.g. a shift `x_shifted = x - shift`) +// produced visually broken curves because `binBoundaries` and +// `plotSamplingHint` returned the raw histogram boundaries in the histogram +// observable coordinate instead of the plot observable coordinate. +TEST(RooHistPdf, ShiftedBinBoundaries) +{ + RooRealVar x{"x", "x", 1000, 1500}; + x.setBins(5); // bin edges in x: 1000, 1100, 1200, 1300, 1400, 1500 + + RooRealVar shift{"shift", "shift", 25.0, -100, 100}; + + TH1D h{"h", "", x.numBins(), x.getMin(), x.getMax()}; + for (int i = 1; i <= h.GetNbinsX(); ++i) { + h.SetBinContent(i, 1.0); + } + RooDataHist dh{"dh", "", x, &h}; + + // Case 1: pdfObs = x - shift as a RooFormulaVar (no l-value inverse). + { + RooFormulaVar xShifted{"x_shifted", "x - shift", {x, shift}}; + RooHistPdf pdf{"pdf_f", "", xShifted, x, dh, 0}; + std::unique_ptr> boundaries{pdf.binBoundaries(x, 1000.0, 1600.0)}; + ASSERT_TRUE(boundaries); + // Hist bin boundary `b` satisfies `x - shift = b`, so plot_x = b + shift. + const std::vector expected{1025.0, 1125.0, 1225.0, 1325.0, 1425.0, 1525.0}; + ASSERT_EQ(boundaries->size(), expected.size()); + auto it = boundaries->begin(); + for (double e : expected) { + EXPECT_DOUBLE_EQ(*it++, e); + } + } + + // Case 2: pdfObs = RooLinearVar (slope=1, offset=shift -> xShifted = x + shift). + { + RooRealVar slope{"slope", "slope", 1.0}; + RooLinearVar xShifted{"x_shifted_lv", "", x, slope, shift}; + RooHistPdf pdf{"pdf_lv", "", RooArgList{xShifted}, RooArgList{x}, dh, 0}; + std::unique_ptr> boundaries{pdf.binBoundaries(x, 1000.0, 1500.0)}; + ASSERT_TRUE(boundaries); + // xShifted = x + 25 -> plot_x = b - 25; only values inside [1000, 1500] kept. + const std::vector expected{1075.0, 1175.0, 1275.0, 1375.0, 1475.0}; + ASSERT_EQ(boundaries->size(), expected.size()); + auto it = boundaries->begin(); + for (double e : expected) { + EXPECT_DOUBLE_EQ(*it++, e); + } + } + + // Case 3: identity (no transformation) keeps the raw boundaries. + { + RooHistPdf pdf{"pdf_id", "", x, dh, 0}; + std::unique_ptr> boundaries{pdf.binBoundaries(x, 1000.0, 1500.0)}; + ASSERT_TRUE(boundaries); + const std::vector expected{1000.0, 1100.0, 1200.0, 1300.0, 1400.0, 1500.0}; + ASSERT_EQ(boundaries->size(), expected.size()); + auto it = boundaries->begin(); + for (double e : expected) { + EXPECT_DOUBLE_EQ(*it++, e); + } + } +}