From f9f05ecf11353f06cbafdf87778c7c4e7ea96570 Mon Sep 17 00:00:00 2001 From: moneta Date: Wed, 11 Feb 2026 16:10:39 +0100 Subject: [PATCH] [hist] Fix computation of number of entries in ResetStats When computing the number of entries in TH1::ResetStats always include underflow/overflow. Also, in case of weighted histogram, fEntries is set to the effective entries inclusing underflow/overflow. Note that GetEffectiveEntries() will return the underflow or overflow depending on the flag used for computing the statistics. Fix a bug in the computation of GetAllSumOfWeights() and improve the function to return (as an optional parameter) the total sum of weight square (including underflow/overflows). The bug was happening for dimensions < 2. (see ROOT-21253) Remove in UHI indexing test the check that number of effective entries is equal to number of entries. With the current changes the number of entries will contain underflows/overflows while number of effective entries not, because it follows the convention used for the statistics (mean,std, etc..) including what is in the current histogram range In stressHistogram add test to check sumofallweights including underflow/overflow and compare with GetEntries for unweighted histograms (not for TProfile's which are different) --- .../pythonizations/test/uhi_indexing.py | 2 - hist/hist/inc/TH1.h | 2 +- hist/hist/src/TH1.cxx | 65 ++++++++++++++----- hist/hist/src/TProfileHelper.h | 2 + test/stressHistogram.cxx | 37 ++++++++++- 5 files changed, 87 insertions(+), 21 deletions(-) diff --git a/bindings/pyroot/pythonizations/test/uhi_indexing.py b/bindings/pyroot/pythonizations/test/uhi_indexing.py index a823143294fd1..2618e925df935 100644 --- a/bindings/pyroot/pythonizations/test/uhi_indexing.py +++ b/bindings/pyroot/pythonizations/test/uhi_indexing.py @@ -296,7 +296,6 @@ def test_statistics_slice(self, hist_setup): sliced_hist_full = hist_setup[...] assert hist_setup.GetEffectiveEntries() == pytest.approx(sliced_hist_full.GetEffectiveEntries()) - assert sliced_hist_full.GetEntries() == pytest.approx(sliced_hist_full.GetEffectiveEntries()) assert hist_setup.Integral() == pytest.approx(sliced_hist_full.Integral()) # Check if slicing over a range updates the statistics @@ -307,7 +306,6 @@ def test_statistics_slice(self, hist_setup): assert hist_setup.Integral() == sliced_hist.Integral() assert hist_setup.GetEffectiveEntries() == pytest.approx(sliced_hist.GetEffectiveEntries()) - assert sliced_hist.GetEntries() == pytest.approx(sliced_hist.GetEffectiveEntries()) assert hist_setup.GetStdDev() == pytest.approx(sliced_hist.GetStdDev(), rel=10e-5) assert hist_setup.GetMean() == pytest.approx(sliced_hist.GetMean(), rel=10e-5) diff --git a/hist/hist/inc/TH1.h b/hist/hist/inc/TH1.h index 8cce4f6fc0261..cbd931b53fc3b 100644 --- a/hist/hist/inc/TH1.h +++ b/hist/hist/inc/TH1.h @@ -553,7 +553,7 @@ class TH1 : public TNamed, public TAttLine, public TAttFill, public TAttMarker { virtual void GetStats(Double_t *stats) const; virtual Double_t GetStdDev(Int_t axis=1) const; virtual Double_t GetStdDevError(Int_t axis=1) const; - Double_t GetSumOfAllWeights(const bool includeOverflow) const; + Double_t GetSumOfAllWeights(const bool includeOverflow, Double_t * sumWeightSquare = nullptr) const; /// Return the sum of weights across all bins excluding under/overflows. /// \see TH1::GetSumOfAllWeights() virtual Double_t GetSumOfWeights() const { return GetSumOfAllWeights(false); } diff --git a/hist/hist/src/TH1.cxx b/hist/hist/src/TH1.cxx index 168cfd0a4afa6..638f35fe3f815 100644 --- a/hist/hist/src/TH1.cxx +++ b/hist/hist/src/TH1.cxx @@ -967,11 +967,13 @@ Bool_t TH1::Add(const TH1 *h1, Double_t c1) return (iret >= 0); } - // Create Sumw2 if h1 has Sumw2 set + // Create Sumw2 if h1 has Sumw2 set if (fSumw2.fN == 0 && h1->GetSumw2N() != 0) Sumw2(); + // In addition, create Sumw2 if is not a simple addition, otherwise errors will not be correctly computed + if (fSumw2.fN == 0 && c1 != 1.0) Sumw2(); - // - Add statistics - Double_t entries = TMath::Abs( GetEntries() + c1 * h1->GetEntries() ); + // - Add statistics (for c1=1) + Double_t entries = GetEntries() + h1->GetEntries(); // statistics can be preserved only in case of positive coefficients // otherwise with negative c1 (histogram subtraction) one risks to get negative variances @@ -1048,7 +1050,15 @@ Bool_t TH1::Add(const TH1 *h1, Double_t c1) else s1[i] += c1*s2[i]; } PutStats(s1); - SetEntries(entries); + if (c1 == 1.0) + SetEntries(entries); + else { + // compute entries as effective entries in case of + // weights different than 1 + double sumw2 = 0; + double sumw = GetSumOfAllWeights(true, &sumw2); + if (sumw2 > 0) SetEntries( sumw*sumw/sumw2); + } } return kTRUE; } @@ -1133,7 +1143,8 @@ Bool_t TH1::Add(const TH1 *h1, const TH1 *h2, Double_t c1, Double_t c2) // Create Sumw2 if h1 or h2 have Sumw2 set if (fSumw2.fN == 0 && (h1->GetSumw2N() != 0 || h2->GetSumw2N() != 0)) Sumw2(); - + // Create also Sumw2 if not a simple addition (c1 = 1, c2 = 1) + if (fSumw2.fN == 0 && (c1 != 1.0 || c2 != 1.0)) Sumw2(); // - Add statistics Double_t nEntries = TMath::Abs( c1*h1->GetEntries() + c2*h2->GetEntries() ); @@ -1240,9 +1251,18 @@ Bool_t TH1::Add(const TH1 *h1, const TH1 *h2, Double_t c1, Double_t c2) ResetStats(); } else { - // update statistics (do here to avoid changes by SetBinContent) FIXME remove??? + // update statistics PutStats(s3); - SetEntries(nEntries); + // previous entries are correct only if c1=1 and c2=1 + if (c1 == 1.0 && c2 == 1.0) + SetEntries(nEntries); + else { + // compute entries as effective entries in case of + // weights different than 1 + double sumw2 = 0; + double sumw = GetSumOfAllWeights(true, &sumw2); + if (sumw2 > 0) SetEntries( sumw*sumw/sumw2); + } } return kTRUE; @@ -7959,33 +7979,48 @@ void TH1::ResetStats() fEntries = 1; // to force re-calculation of the statistics in TH1::GetStats GetStats(stats); PutStats(stats); - fEntries = TMath::Abs(fTsumw); - // use effective entries for weighted histograms: (sum_w) ^2 / sum_w2 - if (fSumw2.fN > 0 && fTsumw > 0 && stats[1] > 0 ) fEntries = stats[0]*stats[0]/ stats[1]; + // histogram entries should include always underflows and overflows + if (GetStatOverflowsBehaviour() && !fXaxis.TestBit(TAxis::kAxisRange)) + fEntries = TMath::Abs(fTsumw); + else { + Double_t sumw2 = 0; + Double_t * p_sumw2 = (fSumw2.fN > 0) ? &sumw2 : nullptr; + fEntries = GetSumOfAllWeights(true, p_sumw2); + // use effective entries for weighted histograms: (sum_w) ^2 / sum_w2 + if (p_sumw2 && sumw2 > 0) fEntries = fEntries*fEntries/ sumw2; + } } //////////////////////////////////////////////////////////////////////////////// -/// Return the sum of all weights +/// Return the sum of all weights and optionally also the sum of weight squares /// \param includeOverflow true to include under/overflows bins, false to exclude those. /// \note Different from TH1::GetSumOfWeights, that always excludes those -Double_t TH1::GetSumOfAllWeights(const bool includeOverflow) const +Double_t TH1::GetSumOfAllWeights(const bool includeOverflow, Double_t * sumWeightSquare) const { if (fBuffer) const_cast(this)->BufferEmpty(); const Int_t start = (includeOverflow ? 0 : 1); const Int_t lastX = fXaxis.GetNbins() + (includeOverflow ? 1 : 0); - const Int_t lastY = fYaxis.GetNbins() + (includeOverflow ? 1 : 0); - const Int_t lastZ = fZaxis.GetNbins() + (includeOverflow ? 1 : 0); + const Int_t lastY = (fDimension > 1) ? (fYaxis.GetNbins() + (includeOverflow ? 1 : 0)) : start; + const Int_t lastZ = (fDimension > 2) ? (fZaxis.GetNbins() + (includeOverflow ? 1 : 0)) : start; Double_t sum =0; + Double_t sum2 = 0; for(auto binz = start; binz <= lastZ; binz++) { for(auto biny = start; biny <= lastY; biny++) { for(auto binx = start; binx <= lastX; binx++) { const auto bin = GetBin(binx, biny, binz); - sum += RetrieveBinContent(bin); + sum += RetrieveBinContent(bin); + if (sumWeightSquare && fSumw2.fN > 0) sum2 += GetBinErrorSqUnchecked(bin); } } } + if (sumWeightSquare) { + if (fSumw2.fN > 0) + *sumWeightSquare = sum2; + else + *sumWeightSquare = sum; + } return sum; } diff --git a/hist/hist/src/TProfileHelper.h b/hist/hist/src/TProfileHelper.h index e2b93a6ff7e12..e2996b96f65a9 100644 --- a/hist/hist/src/TProfileHelper.h +++ b/hist/hist/src/TProfileHelper.h @@ -108,6 +108,8 @@ Bool_t TProfileHelper::Add(T* p, const TH1 *h1, const TH1 *h2, Double_t c1, Dou // create sumw2 per bin if not set if (p->fBinSumw2.fN == 0 && (p1->fBinSumw2.fN != 0 || p2->fBinSumw2.fN != 0)) p->Sumw2(); + // create sumw2 also for the case where coefficients are not equal to 1 + if (p->fSumw2.fN == 0 && (c1 != 1.0 || c2 != 1.0)) p->Sumw2(); // Make the loop over the bins to calculate the Addition Double_t *cu1 = p1->GetW(); Double_t *cu2 = p2->GetW(); diff --git a/test/stressHistogram.cxx b/test/stressHistogram.cxx index 93c5d7e366d4d..56f7d3da76f5d 100644 --- a/test/stressHistogram.cxx +++ b/test/stressHistogram.cxx @@ -127,7 +127,7 @@ enum compareOptions { int defaultEqualOptions = 0; //cmpOptPrint; //int defaultEqualOptions = cmpOptDebug; -Bool_t cleanHistos = kTRUE; // delete histogram after testing (swicth off in case of debugging) +Bool_t cleanHistos = kTRUE; // delete histogram after testing (switch off in case of debugging) const double defaultErrorLimit = 1.E-10; @@ -142,7 +142,7 @@ const char* refFileName = "./stressHistogram.5.18.00.root"; TRandom2 r; // set to zero if want to run different numbers every time -const int initialSeed = 0; +int initialSeed = 0; @@ -181,7 +181,6 @@ bool testAdd1() TH1D* h2 = new TH1D("t1D1_h2", "h2-Title", numberOfBins, minRange, maxRange); TH1D* h3 = new TH1D("t1D1_h3", "h3=c1*h1+c2*h2", numberOfBins, minRange, maxRange); - h1->Sumw2();h2->Sumw2();h3->Sumw2(); FillHistograms(h1, h3, 1.0, c1); FillHistograms(h2, h3, 1.0, c2); @@ -11106,6 +11105,11 @@ int stressHistogram(int testNumber = 0) // Test 7 // Add Tests + // for adding we do not need to set by default Sumw2 + // we test that automatically is set + bool globalSumw2 = TH1::GetDefaultSumw2(); + if (globalSumw2) TH1::SetDefaultSumw2(false); + const unsigned int numberOfAdds = 22; pointer2Test addTestPointer[numberOfAdds] = { testAdd1, testAddProfile1, testAdd2, testAddProfile2, @@ -11126,6 +11130,8 @@ int stressHistogram(int testNumber = 0) "Add tests for 1D, 2D and 3D Histograms and Profiles..............", addTestPointer }; + if (globalSumw2) TH1::SetDefaultSumw2(true); + // Test 8 // Multiply Tests const unsigned int numberOfMultiply = 20; @@ -11911,6 +11917,21 @@ int compareStatistics( TH1* h1, TH1* h2, bool debug, double ERRORLIMIT) << " | " << fabs( stats1[0] - stats2[0] ) << " " << differents << std::endl; + // check the sum of weights (excluding underflows/overflows) + differents += (bool) equals( h1->GetSumOfWeights(), h2->GetSumOfWeights(), 100*ERRORLIMIT); + if ( debug ) + std::cout << "Sum Of Weigths(2): " << h1->GetSumOfWeights() << " " << h2->GetSumOfWeights() + << " | " << fabs( h2->GetSumOfWeights() - h1->GetSumOfWeights() ) + << " " << differents + << std::endl; + // check the sum of all weights (including underflows/overflows) + differents += (bool) equals( h1->GetSumOfAllWeights(true), h2->GetSumOfAllWeights(true), 100*ERRORLIMIT); + if ( debug ) + std::cout << "Sum Of All Weigths (with u/o): " << h1->GetSumOfAllWeights(true) << " " << h2->GetSumOfAllWeights(true) + << " | " << fabs( h2->GetSumOfAllWeights(true) - h1->GetSumOfAllWeights(true) ) + << " " << differents + << std::endl; + if (TMath::AreEqualRel(stats1[0], h1->GetEffectiveEntries() , 1.E-12) ) { // unweighted histograms - check also number of entries @@ -11920,6 +11941,16 @@ int compareStatistics( TH1* h1, TH1* h2, bool debug, double ERRORLIMIT) << " | " << fabs( h1->GetEntries() - h2->GetEntries() ) << " " << differents << std::endl; + + // check that the number of entries is equal to sum of all weights + // this is not valid for TProfiles + if (!h1->InheritsFrom(TProfile::Class()) && !h1->InheritsFrom(TProfile2D::Class()) && !h1->InheritsFrom(TProfile3D::Class())) { + differents += (bool) equals( h1->GetEntries(), h2->GetSumOfAllWeights(true), 100*ERRORLIMIT); + if (debug) + std::cout << "Entries vs Sum of All Weights: " << h1->GetEntries() << " " << h2->GetSumOfAllWeights(true) + << " | " << fabs( h1->GetEntries() - h2->GetSumOfAllWeights(true) ) + << " " << differents << std::endl; + } } // Number of Effective Entries