From 36ed5f12c9eb1859a66cbe4de2fe6107fd6ff1ee Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Mon, 16 Feb 2026 15:34:50 -0500 Subject: [PATCH 01/23] Support total phase space unfolding --- scripts/plotting/inclusive_xsec_summary.py | 107 +++++++++++++++++---- scripts/rabbit/setupRabbit.py | 56 ++++++++--- wremnants/combine_helpers.py | 87 +++++++++++++---- wremnants/datasets/datagroups.py | 15 ++- 4 files changed, 210 insertions(+), 55 deletions(-) diff --git a/scripts/plotting/inclusive_xsec_summary.py b/scripts/plotting/inclusive_xsec_summary.py index 023c4e20b..8d1d14e57 100644 --- a/scripts/plotting/inclusive_xsec_summary.py +++ b/scripts/plotting/inclusive_xsec_summary.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd from matplotlib.patches import Ellipse, Polygon +from scipy.stats import chi2 import rabbit.io_tools from utilities import parsing @@ -32,6 +33,9 @@ default=None, help="Path to config file for style formatting", ) +parser.add_argument( + "--full", action="store_true", default=False, help="full phase space results" +) args = parser.parse_args() logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) @@ -73,25 +77,55 @@ } # mapping, channel, bin +identifier = "_masked" +if args.full: + identifier = f"_full{identifier}" xsec_keys = [ - (r"$\mathrm{W}^{-}$", "Project ch1_masked qGen", "ch1_masked", {"qGen": 0}), - (r"$\mathrm{W}^{+}$", "Project ch1_masked qGen", "ch1_masked", {"qGen": 1}), - (r"$\mathrm{W}$", "Project ch1_masked", "ch1_masked", None), - (r"$\mathrm{Z}$", "Project ch0_masked", "ch0_masked", None), + ( + r"$\mathrm{W}^{-}$", + f"Project ch1{identifier} qGen", + f"ch1{identifier}", + {"qGen": 0}, + ), + ( + r"$\mathrm{W}^{+}$", + f"Project ch1{identifier} qGen", + f"ch1{identifier}", + {"qGen": 1}, + ), + (r"$\mathrm{W}$", f"Project ch1{identifier}", f"ch1{identifier}", None), + (r"$\mathrm{Z}$", f"Project ch0{identifier}", f"ch0{identifier}", None), ( r"$\mathrm{W}^{+}/\mathrm{W}^{-}$", - "Ratio ch1_masked ch1_masked qGen:1,ptGen:sum,absEtaGen:sum qGen:0,ptGen:sum,absEtaGen:sum", - "ch1_masked", + ( + f"Ratio ch1{identifier} ch1{identifier} qGen:1,ptGen:sum,absEtaGen:sum qGen:0,ptGen:sum,absEtaGen:sum", + f"Ratio ch1{identifier} ch1{identifier} qGen:1 qGen:0", + ), + f"ch1{identifier}", None, ), ( r"$\mathrm{W/Z}$", - "Ratio ch1_masked ch0_masked qGen:sum,ptGen:sum,absEtaGen:sum ptVGen:sum,absYVGen:sum", - "ch1_masked_ch0_masked", + ( + f"Ratio ch1{identifier} ch0{identifier} qGen:sum,ptGen:sum,absEtaGen:sum ptVGen:sum,absYVGen:sum", + f"Ratio ch1{identifier} ch0{identifier} qGen:sum count:sum", + ), + f"ch1{identifier}_ch0{identifier}", None, ), ] +smp_20_004 = { + r"$\mathrm{W}^{-}$": [8670, 215.63858652847824], + r"$\mathrm{W}^{+}$": [11800, 288.09720581775866], + r"$\mathrm{W}$": [20480, 499.8999899979995], + r"$\mathrm{Z}$": [1952, 48.63126566315131], + r"$\mathrm{W}^{+}/\mathrm{W}^{-}$": [1.3615, 0.009570788891204319], + r"$\mathrm{W/Z}$": [10.491, 0.0864002314811714], +} + +nMeas = 1 + lumi = meta["meta_info_input"]["channel_info"]["ch0"]["lumi"] custom_order = [ @@ -114,7 +148,12 @@ ] dfs = [] -for name, mapping, channel, selection in xsec_keys: +for name, mappings, channel, selection in xsec_keys: + if len(mappings) == 2: + mapping = mappings[0] + else: + mapping = mappings + hp = result[mapping]["channels"][channel]["hist_prefit_inclusive"].get() h1 = result[mapping]["channels"][channel]["hist_postfit_inclusive"].get() hi = result[mapping]["channels"][channel][ @@ -184,9 +223,16 @@ df["prefit_error"] = prefit_error for pdf_name, pdf_res in pdf_results.items(): - channel_mappings = pdf_res[mapping.replace("_masked", "")]["channels"][ - channel.replace("_masked", "") - ] + if len(mappings) == 2: + if mappings[0].replace(identifier, "") not in pdf_res.keys(): + mapping = mappings[1] + else: + mapping = mappings[0] + else: + mapping = mappings + + mapping = mapping.replace(identifier, "") + channel_mappings = pdf_res[mapping]["channels"][channel.replace(identifier, "")] hr = channel_mappings["hist_prefit_inclusive"].get() hr_impacts = channel_mappings[ "hist_prefit_inclusive_global_impacts_grouped" @@ -214,6 +260,8 @@ df = pd.concat(dfs) +logger.debug(df) + names = [k[0] for k in xsec_keys] outdir = output_tools.make_plot_dir(args.outpath, eoscp=args.eoscp) @@ -295,7 +343,7 @@ pdf_error = df_g[f"{pdf_name}_error"].values[0] / norm ax.errorbar( [pdf_value], - [i + 1 - (j + 1) / (nPDFs + 1)], + [i + 1 - (j + 1) / (nPDFs + nMeas)], xerr=pdf_error, color=pdf_colors[pdf_name], marker="o", @@ -313,6 +361,17 @@ # marker="o", # ) + # cross sections from SMP-20-004 + j = j + 1 + ax.errorbar( + [smp_20_004[name][0]] / norm, + [i + 1 - (j + 1) / (nPDFs + nMeas)], + xerr=smp_20_004[name][1] / norm, + color="black", + marker="x", + label="JHEP04(2025)162" if i == 0 else None, + ) + # round to two significant digits in total uncertainty sig_digi = 2 - int(math.floor(math.log10(abs(total)))) - 1 @@ -375,7 +434,7 @@ extra_labels = ["Measurement"] -plot_tools.addLegend( +leg = plot_tools.addLegend( ax, ncols=args.legCols, text_size="small", @@ -391,6 +450,13 @@ padding_loc="auto", ) +# add a link to the legend entry +for text in leg.get_texts(): + if "JHEP04(2025)162" in text.get_text(): + text.set_url("https://doi.org/10.1007/JHEP04(2025)162") + # Optional: Make it look like a link + # text.set_color("blue") + ax.set_xlim([lo, hi]) ax.set_ylim([0, len(norms) + 2]) @@ -406,6 +472,8 @@ plot_tools.add_cms_decor(ax, args.cmsDecor, data=True, lumi=lumi, loc=args.logoPos) outname = "summary" +if args.full: + outname = f"total_{outname}" if args.postfix: outname += f"_{args.postfix}" plot_tools.save_pdf_and_png(outdir, outname) @@ -436,8 +504,11 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): # Compute angle in degrees theta = np.degrees(np.arctan2(*eigvecs[:, 0][::-1])) + prob = chi2.cdf(nstd**2, df=1) # Get the probability of the 1D n-sigma + scale = np.sqrt(chi2.ppf(prob, df=2)) # Find the 2D equivalent scale + # Width and height are "2*nstd" standard deviations - width, height = 2 * nstd * np.sqrt(eigvals) + width, height = 2 * scale * np.sqrt(eigvals) # Create ellipse ellipse = Ellipse(xy=pos, width=width, height=height, angle=theta, **kwargs) @@ -467,7 +538,7 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): if getattr(hi, "axes", False) and "yield" in hi.axes.name: hi = hi[{"yield": hist.sum}] - if k == ckey0 or k == ckey0.replace("_masked", ""): + if k == ckey0 or k == ckey0.replace(identifier, ""): sel = channel0[-1] if sel is not None: @@ -477,7 +548,7 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): x = hi.value ix = ibin - if k == ckey1 or k == ckey1.replace("_masked", ""): + if k == ckey1 or k == ckey1.replace(identifier, ""): sel = channel1[-1] if sel is not None: y = hi[sel].value @@ -554,6 +625,8 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): plot_tools.add_cms_decor(ax, args.cmsDecor, data=True, loc=args.logoPos) outname = f"summary_2D_{name}" + if args.full: + outname = f"total_{outname}" if args.postfix: outname += f"_{args.postfix}" plot_tools.save_pdf_and_png(outdir, outname) diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 1c8ac4f8b..989498027 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -129,8 +129,10 @@ def make_subparsers(parser): ) parser.add_argument( "--constrainNOIs", - action="store_true", - help="Constrain NOI variation", + type=str, + default=[], + nargs="+", + help="Constrain NOI variation for given groups", ) parser = parsing.set_parser_default(parser, "massVariation", 10) @@ -915,6 +917,7 @@ def setup( fitresult_data=None, unfolding_scalemap=None, base_group=None, + unfolding_with_flow=False, ): isUnfolding = args.analysisMode == "unfolding" isTheoryAgnostic = args.analysisMode in [ @@ -1155,7 +1158,7 @@ def setup( member_filter=lambda x: not x.name.endswith("OOA"), fitvar=fitvar, histToReadAxes=args.unfoldingLevel, - disable_flow_fit_axes=not (datagroups.xnorm and args.unfoldingWithFlow), + disable_flow_fit_axes=not (datagroups.xnorm and unfolding_with_flow), ) # out of acceptance contribution @@ -1392,12 +1395,14 @@ def setup( args.pseudoDataProcsRegexp, ) + if datagroups.xnorm and isUnfolding and unfolding_with_flow: + masked_flow_axes = ["ptGen", "ptVGen"] + if "_full" in datagroups.channel: + masked_flow_axes.extend(["absEtaGen", "absYVGen"]) + else: + masked_flow_axes = [] + if args.correlateSignalMCstat and datagroups.xnorm: - masked_flow_axes = ( - ["ptGen", "ptVGen"] - if (datagroups.xnorm and isUnfolding and args.unfoldingWithFlow) - else [] - ) combine_helpers.add_nominal_with_correlated_BinByBinStat( datagroups, wmass, @@ -1415,11 +1420,7 @@ def setup( ), fitresult_data=fitresult_data, masked=datagroups.xnorm and fitresult_data is None, - masked_flow_axes=( - ["ptGen", "ptVGen"] - if (datagroups.xnorm and isUnfolding and args.unfoldingWithFlow) - else [] - ), + masked_flow_axes=masked_flow_axes, ) if stat_only and isUnfolding and not isPoiAsNoi: @@ -1551,6 +1552,12 @@ def setup( theoryAgnostic_helper.add_theoryAgnostic_uncertainty() elif isUnfolding: + signal_groups = datagroups.expandProcesses("signal_samples") + if len(signal_groups) != 1: + raise NotImplementedError( + f"noi variations currently only works for 1 signal group but got {len(signal_groups)}" + ) + combine_helpers.add_noi_unfolding_variations( datagroups, label, @@ -1560,7 +1567,7 @@ def setup( scale_norm=args.scaleNormXsecHistYields, gen_level=args.unfoldingLevel, fitresult=unfolding_scalemap, - constrained=args.constrainNOIs, + constrained=signal_groups[0] in args.constrainNOIs, ) if args.muRmuFPolVar and not isTheoryAgnosticPolVar: @@ -1576,7 +1583,10 @@ def setup( if args.correlateSignalMCstat and datagroups.xnorm and args.fitresult is None: # use variations from reco histogram and apply them to xnorm - source = ("nominal", f"{inputBaseName}_yieldsUnfolding_theory_weight") + source = ( + "nominal", + f"{inputBaseName.replace('_full','')}_yieldsUnfolding_theory_weight", + ) # need to find the reco variables that correspond to the reco fit, reco fit must be done with variables in same order as gen bins gen2reco = { "qGen": "charge", @@ -3008,6 +3018,22 @@ def outputFolderName(outfolder, datagroups, doStatOnly, postfix): stat_only=args.doStatOnly or args.doStatOnlyMasked, channel=f"{channel}_masked", unfolding_scalemap=unfolding_scalemap, + unfolding_with_flow=args.unfoldingWithFlow, + ) + + # add masked channel in full phase space + datagroups_xnorm = setup( + writer, + args, + ifile, + f"{args.unfoldingLevel}_full", + iLumiScale, + genvar, + genvar=genvar, + stat_only=args.doStatOnly or args.doStatOnlyMasked, + channel=f"{channel}_full_masked", + unfolding_scalemap=unfolding_scalemap, + unfolding_with_flow=True, ) if args.unfoldSimultaneousWandZ and datagroups.mode == "w_mass": diff --git a/wremnants/combine_helpers.py b/wremnants/combine_helpers.py index bf97cb38a..710e494d2 100644 --- a/wremnants/combine_helpers.py +++ b/wremnants/combine_helpers.py @@ -186,13 +186,16 @@ def add_explicit_BinByBinStat( } # integrate out gen axes for bin by bin uncertainties integration_var["acceptance"] = hist.sum + if "_full" in datagroups.channel: + sel = {"acceptance": hist.sum} + else: + sel = {"acceptance": True} + hnom = datagroups.groups[proc].hists[datagroups.nominalName] hnom = hnom.project(*datagroups.gen_axes_names) hvar = datagroups.groups[proc].hists["syst"] - hvar_acc = action_sel(hvar, {"acceptance": True}).project( - *recovar, *datagroups.gen_axes_names - ) + hvar_acc = action_sel(hvar, sel).project(*recovar, *datagroups.gen_axes_names) hvar_reco = action_sel(hvar, integration_var) rel_unc = np.sqrt(hvar_reco.variances(flow=True)) / hvar_reco.values(flow=True) @@ -201,10 +204,17 @@ def add_explicit_BinByBinStat( hvar_acc, rel_unc[..., *[np.newaxis] * len(datagroups.gen_axes_names)] ) + if hasattr(datagroups, "axes_disable_flow") and len(datagroups.axes_disable_flow): + hvar = hh.disableFlow(hvar, datagroups.axes_disable_flow) + # disable for for reco axes + hvar = hh.disableFlow( + hvar, [n for n in hvar.axes.name if n not in datagroups.gen_axes_names] + ) + datagroups.writer.add_beta_variations( hvar, proc, - source_channel=datagroups.channel.replace("_masked", ""), + source_channel=datagroups.channel.replace("_masked", "").replace("_full", ""), dest_channel=datagroups.channel, ) @@ -230,8 +240,13 @@ def add_nominal_with_correlated_BinByBinStat( sumFakesPartial=True, ) + if base_name.endswith("_full"): + sel = {"acceptance": hist.sum} + else: + sel = {"acceptance": True} + # load generator level nominal - gen_name = f"{base_name}_yieldsUnfolding_theory_weight" + gen_name = f"{base_name.replace('_full','')}_yieldsUnfolding_theory_weight" datagroups.loadHistsForDatagroups( baseName="nominal", syst=gen_name, @@ -243,12 +258,11 @@ def add_nominal_with_correlated_BinByBinStat( for proc in datagroups.predictedProcesses(): logger.info(f"Add process {proc} in channel {datagroups.channel}") - # nominal histograms of prediction norm_proc_hist_reco = datagroups.groups[proc].hists[gen_name] norm_proc_hist = datagroups.groups[proc].hists[datagroups.nominalName] - norm_proc_hist_reco = action_sel(norm_proc_hist_reco, {"acceptance": True}) + norm_proc_hist_reco = action_sel(norm_proc_hist_reco, sel) if norm_proc_hist_reco.axes.name != datagroups.fit_axes: norm_proc_hist_reco = norm_proc_hist_reco.project(*datagroups.fit_axes) @@ -432,9 +446,6 @@ def add_noi_unfolding_variations( poi_axes_syst = [f"_{n}" for n in poi_axes] if datagroups.xnorm else poi_axes[:] noi_args = dict( - histname=( - gen_level if datagroups.xnorm else f"nominal_{gen_level}_yieldsUnfolding" - ), name=f"nominal_{gen_level}_yieldsUnfolding", baseName=f"{label}_", group=f"normXsec{label}", @@ -468,21 +479,51 @@ def add_noi_unfolding_variations( if datagroups.xnorm: - def make_poi_xnorm_variations(h, poi_axes, poi_axes_syst, norm, h_scale=None): - h = hh.disableFlow( - h, - [ - "absYVGen", - "absEtaGen", - ], - ) + def make_poi_xnorm_variations( + hVar, hNom, poi_axes, poi_axes_syst, norm, h_scale=None + ): + if "_full" not in datagroups.channel: + # include flow in rapidity for total phase space measurement + hNom = hh.disableFlow( + hNom, + [ + "absYVGen", + "absEtaGen", + ], + ) + hVar = hh.disableFlow( + hVar, + [ + "absYVGen", + "absEtaGen", + ], + ) + hVar = hh.expand_hist_by_duplicate_axes( - h, poi_axes[::-1], poi_axes_syst[::-1] + hVar, poi_axes[::-1], poi_axes_syst[::-1] ) + if "_full" in datagroups.channel: + # Do not assign unconstrained parameters in these rapidity flow bins. i.e. disable flow of expanded axes + hVar = hh.disableFlow( + hVar, + [ + "_absYVGen", + "_absEtaGen", + ], + ) + # set the default scaling values for those to 1 + h_scale = hh.setFlow( + h_scale, + ["absYVGen", "absEtaGen"], + under=False, + over=True, + default_values=1.0, + ) + if h_scale is not None: hVar = hh.multiplyHists(hVar, h_scale) - return hh.addHists(h, hVar, scale2=norm) + return hh.addHists(hNom, hVar, scale2=norm) if scalemap is None: scalemap = get_scalemap( @@ -491,11 +532,14 @@ def make_poi_xnorm_variations(h, poi_axes, poi_axes_syst, norm, h_scale=None): gen_level, rename_axes={o: n for o, n in zip(poi_axes, poi_axes_syst)}, ) - + nominalName = datagroups.nominalName.replace("_full", "") datagroups.addSystematic( **noi_args, + histname=nominalName, + nominalName=nominalName, systAxesFlow=[f"_{n}" for n in poi_axes if n in poi_axes_flow], action=make_poi_xnorm_variations, + actionRequiresNomi=True, actionArgs=dict( poi_axes=poi_axes, poi_axes_syst=poi_axes_syst, @@ -526,6 +570,7 @@ def make_poi_variations(h, poi_axes, norm, h_scale=None): datagroups.addSystematic( **noi_args, + histname=f"nominal_{gen_level}_yieldsUnfolding", systAxesFlow=[n for n in poi_axes if n in poi_axes_flow], preOpMap={ m.name: make_poi_variations diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py index 719b0abfc..4ebf5fd3d 100644 --- a/wremnants/datasets/datagroups.py +++ b/wremnants/datasets/datagroups.py @@ -46,8 +46,19 @@ def __init__(self, infile, mode=None, xnorm=False, **kwargs): self.results = pickle.load(f) elif infile.endswith(".hdf5"): logger.info("Load input file") - h5file = h5py.File(infile, "r") - self.results = input_tools.load_results_h5py(h5file) + infiles = infile.split(",") + self.results = {} + for ifile in sorted(infiles): + logger.info(f"Now at {ifile}") + h5file = h5py.File(ifile, "r") + result = input_tools.load_results_h5py(h5file) + for k, v in result.items(): + if k in self.results.keys() and k in ["meta_info"]: + logger.warning(f"Skip {k}") + continue + + self.results[k] = v + else: raise ValueError(f"{infile} has unsupported file type") From d8a657c7cf2925e6f626195ffc2caae665f15d03 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Wed, 18 Feb 2026 21:22:08 -0500 Subject: [PATCH 02/23] Always use preFSR for gen mass cut --- wremnants/unfolding_tools.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/wremnants/unfolding_tools.py b/wremnants/unfolding_tools.py index aae3cd98d..2e7609fce 100644 --- a/wremnants/unfolding_tools.py +++ b/wremnants/unfolding_tools.py @@ -450,9 +450,10 @@ def add_gen_histograms( ) else: for level in self.unfolding_levels: - # add full phase space histograms for inclusive cross section - df_full = df.Filter(f"{level}V_mass > 60") - df_full = df_full.Filter(f"{level}V_mass < 120") + # add full phase space histograms for inclusive cross section, + # the mass cuts has to be preFSR to compare to SMP-20-004 + df_full = df.Filter(f"massVgen > 60") + df_full = df_full.Filter(f"massVgen < 120") add_xnorm_histograms( results, df_full, @@ -469,7 +470,7 @@ def add_gen_histograms( base_name=f"{level}_full", ) - df = select_fiducial_space( + df_fid = select_fiducial_space( df, level, mode=self.analysis_label, @@ -481,20 +482,20 @@ def add_gen_histograms( if self.unfolding_corr_helper: logger.debug("Apply reweighting based on unfolded result") - df = df.Define( + df_fid = df_fid.Define( "unfoldingWeight_tensor", self.unfolding_corr_helper, [*self.unfolding_corr_helper.hist.axes.name[:-1], "unity"], ) - df = df.Define( + df_fid = df_fid.Define( "central_weight", f"{level}_acceptance ? unfoldingWeight_tensor(0) : unity", ) if self.poi_as_noi: - df_xnorm = df.Filter(f"{level}_acceptance") + df_xnorm = df_fid.Filter(f"{level}_acceptance") else: - df_xnorm = df + df_xnorm = df_fid add_xnorm_histograms( results, From 4638eab31b5aa1e231a167e0f829a8d07b6e38e4 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Wed, 18 Feb 2026 21:23:49 -0500 Subject: [PATCH 03/23] Fix previously broken filterProcs argument; use 'is' instead of '==' in python --- scripts/rabbit/setupRabbit.py | 2 +- utilities/io_tools/input_tools.py | 2 +- wremnants/datasets/datagroups.py | 4 ++-- wremnants/datasets/dataset_tools.py | 5 +++-- wremnants/histmaker_tools.py | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 989498027..5d618718b 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -1229,7 +1229,7 @@ def setup( and not (datagroups.xnorm or args.skipSignalSystOnFakes) and datagroups.fakeName != "QCD" and (excludeGroup != None and datagroups.fakeName not in excludeGroup) - and (filterGroup == None or datagroups.fakeName in filterGroup) + and (filterGroup is None or datagroups.fakeName in filterGroup) ) dibosonMatch = ["WW", "WZ", "ZZ"] diff --git a/utilities/io_tools/input_tools.py b/utilities/io_tools/input_tools.py index b6d63894d..051918283 100644 --- a/utilities/io_tools/input_tools.py +++ b/utilities/io_tools/input_tools.py @@ -719,7 +719,7 @@ def safeGetRootObject( fileObject, objectName, quitOnFail=True, silent=False, detach=True ): obj = fileObject.Get(objectName) - if obj == None: + if obj is None: error_msg = f"Error getting {objectName} from file {fileObject.GetName()}" if not silent: logger.error(error_msg) diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py index 4ebf5fd3d..9bf7a3aa5 100644 --- a/wremnants/datasets/datagroups.py +++ b/wremnants/datasets/datagroups.py @@ -62,7 +62,7 @@ def __init__(self, infile, mode=None, xnorm=False, **kwargs): else: raise ValueError(f"{infile} has unsupported file type") - if mode == None: + if mode is None: analysis_script = os.path.basename(self.getScriptCommand().split()[0]) self.mode = Datagroups.analysisLabel(analysis_script) else: @@ -805,7 +805,7 @@ def addSummedProc( self.groups[rename].hists[histname] = hh.sumHists(tosum) def setSelectOp(self, op, processes=None): - if processes == None: + if processes is None: procs = self.groups else: procs = [processes] if isinstance(processes, str) else processes diff --git a/wremnants/datasets/dataset_tools.py b/wremnants/datasets/dataset_tools.py index 1cc07a9dc..96cc346ef 100644 --- a/wremnants/datasets/dataset_tools.py +++ b/wremnants/datasets/dataset_tools.py @@ -243,9 +243,10 @@ def getDatasets( for sample, info in dataDict.items(): if excl not in [None, []] and (info["group"] in excl or sample in excl): continue - if filter not in [None, []] and (info["group"] in filt or sample in filt): + if filter not in [None, []]: # keep the sample if it is explicitly filtered - pass + if info["group"] not in filt and sample not in filt: + continue elif info.get("auxiliary") and ( aux in [None, []] or (info["group"] not in aux and sample not in aux) ): diff --git a/wremnants/histmaker_tools.py b/wremnants/histmaker_tools.py index 5c36cf57e..7162ad233 100644 --- a/wremnants/histmaker_tools.py +++ b/wremnants/histmaker_tools.py @@ -258,7 +258,7 @@ def hist_to_helper(h): hConv = narf.hist_to_pyroot_boost(h, tensor_rank=0) tensor = getattr(ROOT.wrem, f"HistHelper{len(h.axes)}D", None) - if tensor == None: + if tensor is None: raise NotImplementedError(f"HistHelper{len(h.axes)}D not yet implemented") helper = tensor[type(hConv).__cpp_name__](ROOT.std.move(hConv)) From d5d9733e2e8309df959f4be2c255596783f5b480 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Wed, 18 Feb 2026 21:24:47 -0500 Subject: [PATCH 04/23] Basic support for gen mass in unfolding --- utilities/differential.py | 2 ++ utilities/parsing.py | 1 + 2 files changed, 3 insertions(+) diff --git a/utilities/differential.py b/utilities/differential.py index a79398051..ff8adbd33 100644 --- a/utilities/differential.py +++ b/utilities/differential.py @@ -157,6 +157,8 @@ def get_dilepton_axes( 2, -2.0, 2.0, underflow=False, overflow=False, name="qVGen" ) ) + elif v in ["massVGen"]: + axes.append(hist.axis.Regular(1, 60.0, 120.0, name="massVGen")) else: raise NotImplementedError(f"Unfolding dilepton axis {v} is not supported.") diff --git a/utilities/parsing.py b/utilities/parsing.py index 40cb202db..763c5ba21 100644 --- a/utilities/parsing.py +++ b/utilities/parsing.py @@ -691,6 +691,7 @@ def __call__(self, parser, namespace, values, option_string=None): "qVGen", "ptVGen", "absYVGen", + "massVGen", "helicitySig", ], help="Generator level variable", From 14c4dd38b73c74d3c4f605b52c5339faad61dba4 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Fri, 20 Feb 2026 11:07:53 -0500 Subject: [PATCH 05/23] Fix full phase space unfolding for Z --- wremnants/unfolding_tools.py | 57 ++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/wremnants/unfolding_tools.py b/wremnants/unfolding_tools.py index 2e7609fce..8d12b15d6 100644 --- a/wremnants/unfolding_tools.py +++ b/wremnants/unfolding_tools.py @@ -38,8 +38,6 @@ def define_gen_level(df, dataset_name, gen_levels=["prefsr", "postfsr"], mode="w singlelep = mode[0] == "w" or "wlike" in mode if "prefsr" in gen_levels: - df = theory_tools.define_prefsr_vars(df) - # # needed for fiducial phase space definition df = df.Alias("prefsrV_mass", "massVgen") df = df.Alias("prefsrV_pt", "ptVgen") @@ -434,6 +432,8 @@ def __init__( def add_gen_histograms( self, args, df, results, dataset, corr_helpers, theory_helpers={} ): + df = theory_tools.define_prefsr_vars(df) + df = define_gen_level( df, dataset.name, self.unfolding_levels, mode=self.analysis_label ) @@ -449,11 +449,34 @@ def add_gen_histograms( **self.cutsmap, ) else: + for level in self.unfolding_levels: + df = select_fiducial_space( + df, + level, + mode=self.analysis_label, + selections=self.unfolding_selections[level], + select=not self.poi_as_noi, + accept=True, + **self.cutsmap, + ) + + if self.unfolding_corr_helper: + logger.debug("Apply reweighting based on unfolded result") + df = df.Define( + "unfoldingWeight_tensor", + self.unfolding_corr_helper, + [*self.unfolding_corr_helper.hist.axes.name[:-1], "unity"], + ) + df = df.Define( + "central_weight", + f"{level}_acceptance ? unfoldingWeight_tensor(0) : unity", + ) + for level in self.unfolding_levels: # add full phase space histograms for inclusive cross section, # the mass cuts has to be preFSR to compare to SMP-20-004 - df_full = df.Filter(f"massVgen > 60") - df_full = df_full.Filter(f"massVgen < 120") + df_full = df.Filter("massVgen > 60") + df_full = df_full.Filter("massVgen < 120") add_xnorm_histograms( results, df_full, @@ -470,32 +493,10 @@ def add_gen_histograms( base_name=f"{level}_full", ) - df_fid = select_fiducial_space( - df, - level, - mode=self.analysis_label, - selections=self.unfolding_selections[level], - select=not self.poi_as_noi, - accept=True, - **self.cutsmap, - ) - - if self.unfolding_corr_helper: - logger.debug("Apply reweighting based on unfolded result") - df_fid = df_fid.Define( - "unfoldingWeight_tensor", - self.unfolding_corr_helper, - [*self.unfolding_corr_helper.hist.axes.name[:-1], "unity"], - ) - df_fid = df_fid.Define( - "central_weight", - f"{level}_acceptance ? unfoldingWeight_tensor(0) : unity", - ) - if self.poi_as_noi: - df_xnorm = df_fid.Filter(f"{level}_acceptance") + df_xnorm = df.Filter(f"{level}_acceptance") else: - df_xnorm = df_fid + df_xnorm = df add_xnorm_histograms( results, From 7b95f3ca796bea6eef6e9d6302ac09fc6aa94505 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Fri, 20 Feb 2026 11:10:40 -0500 Subject: [PATCH 06/23] Fix filtering procs for datasets --- wremnants/datasets/dataset_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wremnants/datasets/dataset_tools.py b/wremnants/datasets/dataset_tools.py index 96cc346ef..91d438880 100644 --- a/wremnants/datasets/dataset_tools.py +++ b/wremnants/datasets/dataset_tools.py @@ -243,7 +243,7 @@ def getDatasets( for sample, info in dataDict.items(): if excl not in [None, []] and (info["group"] in excl or sample in excl): continue - if filter not in [None, []]: + if filt not in [None, []]: # keep the sample if it is explicitly filtered if info["group"] not in filt and sample not in filt: continue From 77742980f536d84539ec804cac59b1d10f3af925 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Fri, 20 Feb 2026 11:16:41 -0500 Subject: [PATCH 07/23] Adapt inclusive xsec plotting script --- scripts/plotting/inclusive_xsec_summary.py | 78 ++++++++++++++++++++-- 1 file changed, 71 insertions(+), 7 deletions(-) diff --git a/scripts/plotting/inclusive_xsec_summary.py b/scripts/plotting/inclusive_xsec_summary.py index 8d1d14e57..9649d1c9d 100644 --- a/scripts/plotting/inclusive_xsec_summary.py +++ b/scripts/plotting/inclusive_xsec_summary.py @@ -36,6 +36,12 @@ parser.add_argument( "--full", action="store_true", default=False, help="full phase space results" ) +parser.add_argument( + "--scaleToNewLumi", + action="store_true", + default=False, + help="scale the results of SMP-20-004 to updated lumi", +) args = parser.parse_args() logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) @@ -115,11 +121,17 @@ ), ] +if args.scaleToNewLumi: + # old lumi / new lumi + scale = 0.199269742 / 0.204602332 +else: + scale = 1 + smp_20_004 = { - r"$\mathrm{W}^{-}$": [8670, 215.63858652847824], - r"$\mathrm{W}^{+}$": [11800, 288.09720581775866], - r"$\mathrm{W}$": [20480, 499.8999899979995], - r"$\mathrm{Z}$": [1952, 48.63126566315131], + r"$\mathrm{W}^{-}$": [8670 * scale, 215.63858652847824], + r"$\mathrm{W}^{+}$": [11800 * scale, 288.09720581775866], + r"$\mathrm{W}$": [20480 * scale, 499.8999899979995], + r"$\mathrm{Z}$": [1952 * scale, 48.63126566315131], r"$\mathrm{W}^{+}/\mathrm{W}^{-}$": [1.3615, 0.009570788891204319], r"$\mathrm{W/Z}$": [10.491, 0.0864002314811714], } @@ -527,9 +539,26 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): ax = fig.add_subplot(111) for pdf_name, result in comp_result.items(): - ckey0 = channel0[1] + " " + channel0[2] - ckey1 = channel1[1] + " " + channel1[2] + if len(channel0[1]) == 2: + mappings0 = channel0[1] + mappings1 = channel1[1] + + if args.full and pdf_name != "TOTAL": + mapping0 = mappings0[1] + mapping1 = mappings1[1] + else: + mapping0 = mappings0[0] + mapping1 = mappings1[0] + else: + mapping0 = channel0[1] + mapping1 = channel1[1] + + ckey0 = mapping0 + " " + channel0[2] + ckey1 = mapping1 + " " + channel1[2] + + found_x = False + found_y = False ibin = 0 for k, r in result["channels"].items(): fittype = "postfit" if f"hist_postfit_inclusive" in r.keys() else "prefit" @@ -540,7 +569,7 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): if k == ckey0 or k == ckey0.replace(identifier, ""): sel = channel0[-1] - + found_x = True if sel is not None: x = hi[sel].value ix = ibin + [i for i in sel.values()][0] @@ -550,6 +579,7 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): if k == ckey1 or k == ckey1.replace(identifier, ""): sel = channel1[-1] + found_y = True if sel is not None: y = hi[sel].value iy = ibin + [i for i in sel.values()][0] @@ -559,6 +589,9 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): ibin += hi.size if hasattr(hi, "size") else 1 + if not found_x or not found_y: + raise RuntimeError(f"Not found x {found_x} or y {found_y}") + cov = result[f"hist_{fittype}_inclusive_cov"].get().values() cov = cov[np.ix_([ix, iy], [ix, iy])] @@ -588,6 +621,37 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): ax.add_patch(ell) ax.plot(x, y, color=icol, marker="o", alpha=0) + # cross sections from SMP-20-004 + x = smp_20_004[channel0[0]][0] + x_err = smp_20_004[channel0[0]][1] + y = smp_20_004[channel1[0]][0] + y_err = smp_20_004[channel1[0]][1] + + plt.errorbar( + x, + y, + xerr=x_err, + yerr=y_err, + fmt="x", + color="black", + ecolor="black", + capsize=5, + label="JHEP04(2025)162", + ) + + # cov = np.array([[x_err**2, 0],[0, y_err**2]]) # dummy covariance + # ell = plot_cov_ellipse( + # cov, + # np.array([x, y]), + # nstd=1, + # edgecolor="black", + # facecolor="none", + # linewidth=2, + # label="JHEP04(2025)162", + # ) + # ax.add_patch(ell) + # ax.plot(x, y, color="black", marker="x") + xlim = ax.get_xlim() ylim = ax.get_ylim() yrange = ylim[1] - ylim[0] From 0d6a0ee7f1da03cb227e53cd4a6435942483aade Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Sat, 21 Feb 2026 10:39:36 -0500 Subject: [PATCH 08/23] Separate out ROOT dependent input tools to avoid loading root library if not necessary --- scripts/hepdata/SMP-23-002/wmass_hepdata.py | 18 ++++----- utilities/io_tools/input_tools.py | 40 ------------------- utilities/io_tools/input_tools_root.py | 44 +++++++++++++++++++++ wremnants/helicity_utils_polvar.py | 12 +++--- wremnants/muon_efficiencies_smooth.py | 12 +++--- wremnants/muon_efficiencies_veto.py | 10 ++--- 6 files changed, 71 insertions(+), 65 deletions(-) create mode 100644 utilities/io_tools/input_tools_root.py diff --git a/scripts/hepdata/SMP-23-002/wmass_hepdata.py b/scripts/hepdata/SMP-23-002/wmass_hepdata.py index 3c0e15e5b..198b9d424 100644 --- a/scripts/hepdata/SMP-23-002/wmass_hepdata.py +++ b/scripts/hepdata/SMP-23-002/wmass_hepdata.py @@ -10,7 +10,7 @@ common_plot_parser, # TODO: move to main common parser ) from utilities.common import base_dir, data_dir -from utilities.io_tools import conversion_tools, input_tools +from utilities.io_tools import conversion_tools, input_tools_root from wums import logging @@ -106,8 +106,8 @@ def make_covariance_matrix(logger=None, verbose=4): # could use load_covariance_pois in utilities/io_tools/combinetf_input.py # which returns covariance as boost hist and a list with rows' labels - infile = input_tools.safeOpenRootFile(filename) - th2 = input_tools.safeGetRootObject(infile, histname, detach=True) + infile = input_tools_root.safeOpenRootFile(filename) + th2 = input_tools_root.safeGetRootObject(infile, histname, detach=True) infile.Close() logger.info("Histogram read from file, converting labels ...") @@ -130,7 +130,7 @@ def make_covariance_matrix(logger=None, verbose=4): houtputname = "covariance_matrix" tag = m.lower() outname = f"{tmp_outdir}covariance_{tag}.root" - outfile = input_tools.safeOpenRootFile(outname, mode="recreate") + outfile = input_tools_root.safeOpenRootFile(outname, mode="recreate") th2.Write(houtputname) outfile.Close() logger.info(f"Histogram {houtputname} saved in file {outname}") @@ -194,8 +194,8 @@ def make_pulls_and_constraints(sub, logger=None, verbose=4): ) # open root file to get histogram and retrieve bin labels - infile = input_tools.safeOpenRootFile(filename) - th2 = input_tools.safeGetRootObject(infile, histname, detach=True) + infile = input_tools_root.safeOpenRootFile(filename) + th2 = input_tools_root.safeGetRootObject(infile, histname, detach=True) infile.Close() logger.info("Histogram read from file ...") @@ -327,7 +327,7 @@ def make_cross_section(sub, logger=None, verbose=4): # loop file to get list of keys # but then use the hepdata_lib helpers to retrieve all the information - infile = input_tools.safeOpenRootFile(filename) + infile = input_tools_root.safeOpenRootFile(filename) hnames = [] htitles = [] xAxisTitle = "" @@ -440,8 +440,8 @@ def make_mass_summary(sub, logger=None, verbose=4): logger.info("Preparing 1 table ...") - infile = input_tools.safeOpenRootFile(filename) - th2 = input_tools.safeGetRootObject(infile, histname, detach=True) + infile = input_tools_root.safeOpenRootFile(filename) + th2 = input_tools_root.safeGetRootObject(infile, histname, detach=True) infile.Close() xAxisTitle = th2.GetYaxis().GetBinLabel(1) if th2.GetNbinsY() == 4: diff --git a/utilities/io_tools/input_tools.py b/utilities/io_tools/input_tools.py index 10f7320b0..54f25789c 100644 --- a/utilities/io_tools/input_tools.py +++ b/utilities/io_tools/input_tools.py @@ -7,7 +7,6 @@ import hist import lz4.frame import numpy as np -import ROOT import uproot from wums import boostHistHelpers as hh @@ -715,45 +714,6 @@ def read_json(fIn): return jsDict -def safeGetRootObject( - fileObject, objectName, quitOnFail=True, silent=False, detach=True -): - obj = fileObject.Get(objectName) - if obj is None: - error_msg = f"Error getting {objectName} from file {fileObject.GetName()}" - if not silent: - logger.error(error_msg) - if quitOnFail: - raise IOError(error_msg) - return None - else: - if detach: - obj.SetDirectory(0) - return obj - - -def safeOpenRootFile(fileName, quitOnFail=True, silent=False, mode="READ"): - fileObject = ROOT.TFile.Open(fileName, mode) - if not fileObject or fileObject.IsZombie(): - error_msg = f"Error when opening file {fileName}" - if not silent: - logger.error(error_msg) - if quitOnFail: - raise IOError(error_msg) - else: - return None - elif not fileObject.IsOpen(): - error_msg = f"File {fileName} was not opened" - if not silent: - logger.error(error_msg) - if quitOnFail: - raise IOError(error_msg) - else: - return None - else: - return fileObject - - def get_metadata(infile): results = None if infile.endswith(".pkl.lz4"): diff --git a/utilities/io_tools/input_tools_root.py b/utilities/io_tools/input_tools_root.py new file mode 100644 index 000000000..6f5bd01b2 --- /dev/null +++ b/utilities/io_tools/input_tools_root.py @@ -0,0 +1,44 @@ +import ROOT + +from wums import logging + +logger = logging.child_logger(__name__) + + +def safeGetRootObject( + fileObject, objectName, quitOnFail=True, silent=False, detach=True +): + obj = fileObject.Get(objectName) + if obj is None: + error_msg = f"Error getting {objectName} from file {fileObject.GetName()}" + if not silent: + logger.error(error_msg) + if quitOnFail: + raise IOError(error_msg) + return None + else: + if detach: + obj.SetDirectory(0) + return obj + + +def safeOpenRootFile(fileName, quitOnFail=True, silent=False, mode="READ"): + fileObject = ROOT.TFile.Open(fileName, mode) + if not fileObject or fileObject.IsZombie(): + error_msg = f"Error when opening file {fileName}" + if not silent: + logger.error(error_msg) + if quitOnFail: + raise IOError(error_msg) + else: + return None + elif not fileObject.IsOpen(): + error_msg = f"File {fileName} was not opened" + if not silent: + logger.error(error_msg) + if quitOnFail: + raise IOError(error_msg) + else: + return None + else: + return fileObject diff --git a/wremnants/helicity_utils_polvar.py b/wremnants/helicity_utils_polvar.py index dd79bec04..e60e7898d 100644 --- a/wremnants/helicity_utils_polvar.py +++ b/wremnants/helicity_utils_polvar.py @@ -4,7 +4,7 @@ import narf import narf.clingutils from utilities import common -from utilities.io_tools import input_tools +from utilities.io_tools import input_tools_root from wums import logging logger = logging.child_logger(__name__) @@ -56,12 +56,12 @@ def makehelicityWeightHelper_polvar( hsys_hist_full = {fld: None for fld in folders} filename = filenames[genVcharge] - fin = input_tools.safeOpenRootFile(filename) + fin = input_tools_root.safeOpenRootFile(filename) nSysts_coeff = {} for ifld, fld in enumerate(folders): f = fin.GetDirectory(fld) # start with nominal - hnom = input_tools.safeGetRootObject(f, f"h_pdf_{fld}") + hnom = input_tools_root.safeGetRootObject(f, f"h_pdf_{fld}") hnom_hist = narf.root_to_hist(hnom, axis_names=["qtOverQ", "absY"]) # create and fill boost histogram if hnom_hist_full is None: @@ -109,12 +109,14 @@ def makehelicityWeightHelper_polvar( nSysts_coeff[ifld] = nSysts for isys in range(nSysts): - hsysDown = input_tools.safeGetRootObject(f, f"h_pdf_{fld}_syst{isys}Down") + hsysDown = input_tools_root.safeGetRootObject( + f, f"h_pdf_{fld}_syst{isys}Down" + ) hsysDown_hist = narf.root_to_hist(hsysDown, axis_names=["qtOverQ", "absY"]) hsys_hist_full[fld].values(flow=False)[:, :, isys, 0] = ( hsysDown_hist.values(flow=False)[:, :] ) - hsysUp = input_tools.safeGetRootObject(f, f"h_pdf_{fld}_syst{isys}Up") + hsysUp = input_tools_root.safeGetRootObject(f, f"h_pdf_{fld}_syst{isys}Up") hsysUp_hist = narf.root_to_hist(hsysUp, axis_names=["qtOverQ", "absY"]) hsys_hist_full[fld].values(flow=False)[:, :, isys, 1] = hsysUp_hist.values( flow=False diff --git a/wremnants/muon_efficiencies_smooth.py b/wremnants/muon_efficiencies_smooth.py index 3d3df8788..63f6a4bc8 100644 --- a/wremnants/muon_efficiencies_smooth.py +++ b/wremnants/muon_efficiencies_smooth.py @@ -9,7 +9,7 @@ import narf from utilities import common -from utilities.io_tools import input_tools +from utilities.io_tools import input_tools_root from wums import boostHistHelpers as hh from wums import logging @@ -115,7 +115,7 @@ def make_muon_efficiency_helpers_smooth( if era == "2017" or era == "2018": filename = data_dir + f"muonSF/{era}/allSmooth_GtoHout_vtxAgnIso.root" - fin = input_tools.safeOpenRootFile(filename) + fin = input_tools_root.safeOpenRootFile(filename) logger.info(f"Scale factors read from {filename}") dict_SF3D = None @@ -178,7 +178,7 @@ def make_muon_efficiency_helpers_smooth( logger.warning( f"Substituting temporarily missing 2D histogram for 'isoantitrig' with 'isonotrig'" ) - hist_root = input_tools.safeGetRootObject(fin, hist_name) + hist_root = input_tools_root.safeGetRootObject(fin, hist_name) logger.debug(f"Syst: {eff_type} {chargeTag} -> {hist_name}") hist_hist = narf.root_to_hist( @@ -572,7 +572,7 @@ def make_muon_efficiency_helpers_smooth( logger.warning( f"Substituting temporarily missing 2D histogram for 'isoantitrig' with 'isonotrig'" ) - hist_root = input_tools.safeGetRootObject(fin, hist_name) + hist_root = input_tools_root.safeGetRootObject(fin, hist_name) # logger.info(f"stat: {effStatKey}|{eff_type} -> {hist_name}") # logger.info(ROOT.AddressOf(hist_root)) @@ -879,7 +879,7 @@ def make_muon_efficiency_helpers_smooth_altSyst( if era == "2017" or era == "2018": filename = data_dir + f"/muonSF/{era}/allSmooth_GtoHout_vtxAgnIso_altBkg.root" - fin = input_tools.safeOpenRootFile(filename) + fin = input_tools_root.safeOpenRootFile(filename) logger.info(f"Scale factors read from {filename}") ## start with NOMI and SYST @@ -888,7 +888,7 @@ def make_muon_efficiency_helpers_smooth_altSyst( for eff_type in allEff_types: chargeTag = charge_tag if eff_type in chargeDependentSteps else "both" hist_name = f"SF_nomiAndAlt_{eratag}_{eff_type}_{chargeTag}_altBkg" - hist_root = input_tools.safeGetRootObject(fin, hist_name) + hist_root = input_tools_root.safeGetRootObject(fin, hist_name) hist_hist = narf.root_to_hist( hist_root, axis_names=["SF eta", "SF pt", "nomi-statUpDown-syst"] diff --git a/wremnants/muon_efficiencies_veto.py b/wremnants/muon_efficiencies_veto.py index 0e13c25b7..f31130db5 100644 --- a/wremnants/muon_efficiencies_veto.py +++ b/wremnants/muon_efficiencies_veto.py @@ -4,7 +4,7 @@ import narf from utilities import common -from utilities.io_tools import input_tools +from utilities.io_tools import input_tools_root from wums import logging logger = logging.child_logger(__name__) @@ -38,13 +38,13 @@ def make_muon_efficiency_helpers_veto(useGlobalOrTrackerVeto=False, era=None): else: Steps = 5 # we decided to compute the syst variations on the veto SFs independently for each of the tnp fits (for global-or-tracker in the muon definition we fit reco, "tracking", P(tracker-seeded track|standalone), P(tracker muon and not global|tracker-seeded track), looseID + dxybs) - file_plus = input_tools.safeOpenRootFile(f"{filename_plus}") - plus_histo = input_tools.safeGetRootObject( + file_plus = input_tools_root.safeOpenRootFile(f"{filename_plus}") + plus_histo = input_tools_root.safeGetRootObject( file_plus, "SF_nomiAndAlt_GtoH_newveto_plus" ) file_plus.Close() - file_minus = input_tools.safeOpenRootFile(f"{filename_minus}") - minus_histo = input_tools.safeGetRootObject( + file_minus = input_tools_root.safeOpenRootFile(f"{filename_minus}") + minus_histo = input_tools_root.safeGetRootObject( file_minus, "SF_nomiAndAlt_GtoH_newveto_minus" ) file_minus.Close() From baf4c662037b62fe334cb0fc51c9722b953af64e Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Sat, 21 Feb 2026 12:19:13 -0500 Subject: [PATCH 09/23] Small updates for total phase space unfolding --- scripts/plotting/inclusive_xsec_summary.py | 8 ++++++-- wremnants/unfolding_tools.py | 5 +++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/plotting/inclusive_xsec_summary.py b/scripts/plotting/inclusive_xsec_summary.py index 9649d1c9d..880bf2301 100644 --- a/scripts/plotting/inclusive_xsec_summary.py +++ b/scripts/plotting/inclusive_xsec_summary.py @@ -621,11 +621,15 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): ax.add_patch(ell) ax.plot(x, y, color=icol, marker="o", alpha=0) + # scale for 2D plot + prob = chi2.cdf(1, df=1) # Get the probability of the 1D n-sigma + scale = np.sqrt(chi2.ppf(prob, df=2)) # Find the 2D equivalent scale + # cross sections from SMP-20-004 x = smp_20_004[channel0[0]][0] - x_err = smp_20_004[channel0[0]][1] + x_err = smp_20_004[channel0[0]][1] * scale y = smp_20_004[channel1[0]][0] - y_err = smp_20_004[channel1[0]][1] + y_err = smp_20_004[channel1[0]][1] * scale plt.errorbar( x, diff --git a/wremnants/unfolding_tools.py b/wremnants/unfolding_tools.py index 8d12b15d6..cb7df5ae3 100644 --- a/wremnants/unfolding_tools.py +++ b/wremnants/unfolding_tools.py @@ -37,6 +37,9 @@ def define_gen_level(df, dataset_name, gen_levels=["prefsr", "postfsr"], mode="w singlelep = mode[0] == "w" or "wlike" in mode + # general preFSR definition is needed in any case + df = theory_tools.define_prefsr_vars(df) + if "prefsr" in gen_levels: # # needed for fiducial phase space definition df = df.Alias("prefsrV_mass", "massVgen") @@ -432,8 +435,6 @@ def __init__( def add_gen_histograms( self, args, df, results, dataset, corr_helpers, theory_helpers={} ): - df = theory_tools.define_prefsr_vars(df) - df = define_gen_level( df, dataset.name, self.unfolding_levels, mode=self.analysis_label ) From 93eb55735209c1e2e67a2d0227901c3da88d1df7 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Fri, 27 Feb 2026 05:56:23 -0500 Subject: [PATCH 10/23] Update README and add some docstrings at the top of files to give a short description of their intented usage --- README.md | 190 +++++++++++------- .../postprocessing/datagroups/datagroups.py | 8 + .../datagroups/datagroups2016.py | 8 +- .../datagroups/datagroupsLowPU.py | 7 +- wremnants/postprocessing/histselections.py | 4 + wremnants/postprocessing/regression.py | 5 + .../datasets/datasetDict_13TeVGen.py | 4 + .../production/datasets/datasetDict_2017G.py | 4 + .../production/datasets/datasetDict_2017H.py | 4 + .../production/datasets/dataset_tools.py | 4 + 10 files changed, 161 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index 51390b315..74f6e7e1b 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,27 @@ # WRemnants -## Instructions: +WRemnants is the analysis framework for the CMS electroweak precision measurements such as the W boson mass, Z boson mass, strong coupling constraint, cross section measurements, and related studies on generator level, experimental calibrations, and future projections. It handles the full analysis chain from processing collision events (NanoAOD) into histograms, through systematic uncertainty estimation, to fit input preparation. The statistical inference is performed by the companion [rabbit](https://github.com/WMass/rabbit) framework. -Activate the singularity image (to be done every time before running code) +## Instructions + +### First time setup + +Activate the container image (to be done every time before running code). +Depending on the cluster you are working on you will need to set the directories that you want to access from within the container. E.g. +```bash +export APPTAINER_BIND="/tmp,/run,/cvmfs/etc/grid-security,/home/,/work/,/data" +``` +And then start the container with ```bash singularity run /cvmfs/unpacked.cern.ch/gitlab-registry.cern.ch/bendavid/cmswmassdocker/wmassdevrolling\:latest ``` +Where a flag `--nv` needs to be added to use NVIDIA GPUs (e.g. for the fit). Activate git Large File Storage (only need to do this once for a given user/home directory) ``` bash git lfs install ``` -Activate git pre-commit hooks (only need to do this once when checking out) -``` bash -git config --local include.path ../.gitconfig -``` -If the pre-commit hook is doing something undesired, it can be by passed by adding “--no-verify” when doing “git commit”. - Get the code (after forking from the central WMass repository) ```bash MY_GIT_USER=$(git config user.github) @@ -32,86 +36,89 @@ git pull --recurse-submodules upstream main git push origin main ``` -The fit is performed with the submodule [rabbit](https://github.com/WMass/rabbit) in the same singularity. - -### Contribute to the code - -**Guidelines** - * Use camel case practice for command line arguments and avoid the "dest" keyword. - * Use snake case practice for function names. - * Class names should start with capital letters. - * When making a new PR, it should target only one subject at a time. This makes it more easy to validate and the integration faster. PR on top of other PR are ok when it is noted in the description, e.g. this PR is on top of PR XYZ. +Activate git pre-commit hooks (only need to do this once when checking out) +``` bash +git config --local include.path ../.gitconfig +``` +If the pre-commit hook is doing something undesired, it can be bypassed by adding “--no-verify” when doing “git commit”. -### Run the code -Source the setup script. -It will create some environment variables to ease access to some folders: - * WREM_BASE: it points to ./WRemnants/ where all the code is - * COMBINE_STUDIES: folder with codes to create datacards and root files for combine, and run the fit - * PLOTS: folder with some scripts for plots and dedicated studies for analysis +### Each session setup +Everytime a new session is started, the first thing to do is enabling the singularity (with an adapted APPTAINER_BIND variable) +```bash +export APPTAINER_BIND="/tmp,/run,/cvmfs/etc/grid-security,/home/,/work/,/data" +singularity run /cvmfs/unpacked.cern.ch/gitlab-registry.cern.ch/bendavid/cmswmassdocker/wmassdevrolling\:latest +``` +and to source the setup script to execute the setup of submodules and create some environment variables to ease access to some folders. ```bash source WRemnants/setup.sh ``` -### Theory agnostic analysis -Make histograms (only nominal and mass variations for now, systematics are being developed) -``` -/usr/bin/time -v python scripts/histmakers/mw_with_mu_eta_pt.py -o outputFolder/ --theoryAgnostic --noAuxiliaryHistograms -``` +### Project overview +The project contains several submodules that point to standalone repositories that may or may not be used also for other projects. Those are: +* [narf](https://github.com/bendavid/narf): This provides the computational backand for the event processing and boost histogram production using Root Data Frames (RDF). +* [wums](https://github.com/WMass/wums): This is a pure python based submodule containing utility functions that can be more widely used such as the input/output tools, common plotting functions, and histogram manipulation. +* [wremnants-data](https://gitlab.cern.ch/cms-wmass/wremnants-data): This repository contains resource files needed for the analysis, such as data quality .json and by lumisection .csv files, experimental scale factors such as for the efficiencies, theory correction files etc. . It is on CERN gitlab using the large file storage. +* [rabbit](https://github.com/WMass/rabbit): This is the fitting framework using tensorflow 2.x as backend. + +The WRemnants project itself is structured using different folders: +* `notebooks/`: jupyter-notebooks for data exploration and quick tests, mainly user specific +* `scripts/`: All executable files should go here such as + * `scripts/analysisTools/`: analysis and user specific scripts + * `scripts/ci/`: for the github continuous integration (CI) workflow. These scripts are executed automatically and get triggered e.g. by opening a pull request (PR). + * `scripts/corrections/`: to compute correction files used in later steps of the analysis + * `scripts/hepdata/`: for data preservation + * `scripts/histmakers/`: for the processing of columnar data (mainly NanoAOD files) into histograms + * `scripts/inspect/`: tools for inspection of input and output files + * `scripts/plotting/`: data visualization + * `scripts/rabbit/`: fit input data preparation + * `scripts/recoil/`: studies around the hadronic recoil calibration + * `scripts/studies/`: other studies + * `scripts/tests/`: statistical tests +* `wremnants/`: Here are the main analysis classes and functions defined that get executed by the scripts + * `wremnants/postprocessing/`: everything related to analysing the histograms such as tools for plotting and fit input data preparation + * `wremnants/production/`: everything related to histogram production using RDF + * `wremnants/templates/`: small analysis specific templates + * `wremnants/utilities/`: things that are commonly and more widely used across the framework and not restricted to histogram production or postprocessing. Such as input/output tool functionality, common definitions, parsing options, etc. + +A typical analysis is performed in a few steps: +1. **Histogram production**: The processing of columnar data (such as collision events in NanoAOD) is performed in `scripts/histmakers/`. A minimal skeleton can be found in `scripts/histmakers/histmaker_template.py`. Datasets to process are defined in `wremnants/production/datasets/` and new files for new data taking periods or data streams may be added. +2. **Postprocessing / plotting**: A ready-to-use script can be found in `scripts/plotting/makeDataMCStackPlot.py` to plot histograms produced by a histmaker. However it may be easier for a new user to write a new, custom plotting script. +3. **Fit input data preparation**: The central analyses use `scripts/rabbit/setupRabbit.py` to prepare the input file needed for [rabbit](https://github.com/WMass/rabbit). A new analysis may write a custom, more specific and simplified script. Some explanation of how to interface [rabbit](https://github.com/WMass/rabbit) is given in that framework and corresponding documentation. +4. **Fitting**: Statistical data analysis performed in [rabbit](https://github.com/WMass/rabbit). See the [rabbit](https://github.com/WMass/rabbit) documentation for more details. -Prepare inputs for the fit (stat-only for now) -``` -/usr/bin/time -v python scripts/combine/setupRabbit.py -i outputFolder/mw_with_mu_eta_pt_scetlib_dyturboCorr.hdf5 -o outputFolder/ --absolutePathInCard --theoryAgnostic -``` -To remove the backgrounds and run signal only one can add __--excludeProcGroups Top Diboson Fake Zmumu DYlowMass Ztautau Wtaunu BkgWmunu__ +### Contribute to the code -Run the fit (for charge combination) -``` -python WRemnants/scripts/combine/fitManager.py -i outputFolder/WMass_pt_eta_statOnly/ --skip-fit-data --theoryAgnostic --comb -``` -### Theory agnostic analysis with POIs as NOIs +**Guidelines** + * When making a new PR, it should target only one subject at a time. This makes it more easy to validate and the integration faster. PR on top of other PR are ok when it is noted in the description, e.g. this PR is on top of PR XYZ. + * Follow a modular approach and avoid cross dependencies between functions and classes. + * Don't pass "args" across functions and in particular the use of very specific args arguments. This makes it difficult to re-use existing functions across different scripts. + * Avoid using "magic strings" that have the purpose of activating a specific logic. + * Use camel case practice for command line arguments and avoid the "dest" keyword. + * Use snake case practice for function names. + * Class names should start with capital letters. -Make histograms (this has all systematics too unlike the standard theory agnostic setup) -``` -/usr/bin/time -v python scripts/histmakers/mw_with_mu_eta_pt.py -o outputFolder/ --theoryAgnostic --poiAsNoi -``` -Prepare inputs for the fit -``` -/usr/bin/time -v python scripts/combine/setupRabbit.py -i outputFolder/mw_with_mu_eta_pt_scetlib_dyturboCorr.hdf5 -o outputFolder/ --absolutePathInCard --theoryAgnostic --poiAsNoi --priorNormXsec 0.5 -``` -To remove the backgrounds and run signal only one can add __--filterProcGroups Wmunu__ +## Run the existing code +The following is a description of the existing analysis workflows. New analyses should ideally follow a similar logic and use the same underlying functions, command line options etc. but it may be easier and cleaner to write new custom scripts. -Run the fit (for charge combination). Note that it is the same command as the traditional analysis, without --theoryAgnostic -``` -python WRemnants/scripts/combine/fitManager.py -i outputFolder/WMass_pt_eta/ --skip-fit-data --comb -``` - -### Traditional analysis +**NOTE**: + * Each script has tons of options, to customize a gazillion of things. Some defined on the top of each files and others, that are more commonly used across different files are defined in `wremnants/utilities/parsing.py`. It's simpler to learn them by asking an expert rather that having an incomplete summary here (developments happen faster than documentation anyway). + +### Histogram production -Make histograms for WMass (for Wlike the script is __mz_wlike_with_mu_eta_pt.py__). -``` +Make histograms for WMass (similar for other scripts such as `mz_wlike_with_mu_eta_pt.py`, `mz_dilepton.py`, and others). +```bash python WRemnants/scripts/histmakers/mw_with_mu_eta_pt.py -o outputFolder/ ``` -More options are loaded from **WRemnants/utilities/common.py** + +### Fit preparation production Make the inputs for the fit. -``` -python WRemnants/scripts/combine/setupRabbit.py -i outputFolder/mw_with_mu_eta_pt_scetlib_dyturboCorr.hdf5 -o outputFolder/ +```bash +python WRemnants/scripts/rabbit/setupRabbit.py -i outputFolder/mw_with_mu_eta_pt_scetlib_dyturboCorr.hdf5 -o outputFolder/ ``` The input file is the output of the previous step. -The default path specified with __-o__ is the local folder. A subfolder with name identifying the specific analysis (e.g. WMass_pt_eta/) is automatically created inside it. Some options may add tags to the folder name: for example, using --doStatOnly will call the folder WMass_pt_eta_statOnly/. - -Combine the datacards for single charges and run the fit (Asimov only) -``` -python WRemnants/scripts/combine/fitManager.py -i outputFolder/WMass_pt_eta/ --comb --skip-fit-data -``` -Run the fits for single charges (Asimov only). These can be produced in the same output folder as the combination, since a postfix is automatically appended to the output card and fit result files. -``` -python WRemnants/scripts/combine/fitManager.py -i outputFolder/WMass_pt_eta/ --fit-single-charge --skip-fit-data [-c ] -``` - -**NOTE**: - * Each script has tons of options, to customize a gazillion of things, it's simpler to learn them by asking an expert rather that having an incomplete summary here (developments happen faster than documentation anyway). +The default path specified with `-o` is the local folder. A subfolder with name identifying the specific analysis (e.g. `WMass_pt_eta/`) is automatically created inside it. Some options may add tags to the folder name: for example, using `--doStatOnly` will call the folder `WMass_pt_eta_statOnly/`. ### Making plots @@ -124,10 +131,10 @@ python scripts/analysisTools/tests/testShapesIsoMtRegions.py mw_with_mu_eta_pt_s Plot prefit shapes (requires root file from setupRabbit.py as input) ``` -python scripts/analysisTools/w_mass_13TeV/plotPrefitTemplatesWRemnants.py WMassCombineInput.root outputFolder/ [-l 16.8] [--pseudodata ] [--wlike] +python scripts/analysisTools/w_mass_13TeV/plotPrefitTemplatesWRemnants.py WMassrabbitInput.root outputFolder/ [-l 16.8] [--pseudodata ] [--wlike] ``` -Make study of fakes for mW analysis, checking mT dependence, with or without dphi cut (see example inside the script for more options). Even if the histmaker was run with the dphi cut, the script uses a dedicated histograms __mTStudyForFakes__ created before that cut, and with dphi in one axis. +Make study of fakes for mW analysis, checking mT dependence, with or without dphi cut (see example inside the script for more options). Even if the histmaker was run with the dphi cut, the script uses a dedicated histograms `mTStudyForFakes` created before that cut, and with dphi in one axis. ``` python scripts/analysisTools/tests/testFakesVsMt.py mw_with_mu_eta_pt_scetlib_dyturboCorr.hdf5 outputFolder/ --rebinx 4 --rebiny 2 --mtBinEdges "0,5,10,15,20,25,30,35,40,45,50,55,60,65" --mtNominalRange "0,40" --mtFitRange "0,40" --fitPolDegree 1 --integralMtMethod sideband --maxPt 50 --met deepMet [--dphiMuonMetCut 0.25] ``` @@ -152,6 +159,41 @@ Print impacts without plotting (no need to specify output folder) python w_mass_13TeV/makeImpactsOnMW.py fitresults_123456789.root --scaleToMeV --showTotal --justPrint ``` +### Theory agnostic analysis + +Make histograms (only nominal and mass variations for now, systematics are being developed) +``` +/usr/bin/time -v python scripts/histmakers/mw_with_mu_eta_pt.py -o outputFolder/ --theoryAgnostic --noAuxiliaryHistograms +``` + +Prepare inputs for the fit (stat-only for now) +``` +/usr/bin/time -v python scripts/rabbit/setupRabbit.py -i outputFolder/mw_with_mu_eta_pt_scetlib_dyturboCorr.hdf5 -o outputFolder/ --absolutePathInCard --theoryAgnostic +``` +To remove the backgrounds and run signal only one can add `--excludeProcGroups Top Diboson Fake Zmumu DYlowMass Ztautau Wtaunu BkgWmunu` + +Run the fit (for charge combination) +``` +python WRemnants/scripts/rabbit/fitManager.py -i outputFolder/WMass_pt_eta_statOnly/ --skip-fit-data --theoryAgnostic --comb +``` +### Theory agnostic analysis with POIs as NOIs + +Make histograms (this has all systematics too unlike the standard theory agnostic setup) +``` +/usr/bin/time -v python scripts/histmakers/mw_with_mu_eta_pt.py -o outputFolder/ --theoryAgnostic --poiAsNoi +``` + +Prepare inputs for the fit +``` +/usr/bin/time -v python scripts/rabbit/setupRabbit.py -i outputFolder/mw_with_mu_eta_pt_scetlib_dyturboCorr.hdf5 -o outputFolder/ --absolutePathInCard --theoryAgnostic --poiAsNoi --priorNormXsec 0.5 +``` +To remove the backgrounds and run signal only one can add `--filterProcGroups Wmunu` + +Run the fit (for charge combination). Note that it is the same command as the traditional analysis, without `--theoryAgnostic` +``` +python WRemnants/scripts/rabbit/fitManager.py -i outputFolder/WMass_pt_eta/ --skip-fit-data --comb +``` + ### Tools for scale factors Make W MC efficiencies for trigger and isolation (needed for anti-iso and anti-trigger SF) @@ -161,7 +203,7 @@ Make W MC efficiencies for trigger and isolation (needed for anti-iso and anti-t python scripts/analysisTools/w_mass_13TeV/makeWMCefficiency3D.py /path/to/file.hdf5 /path/for/plots/makeWMCefficiency3D/ --rebinUt 2 ``` -Then, run 2D smoothing (has to manually edit the default input files inside for now, see other options inside too). Option __--extended__ was used to select SF computed in a larger ut range, but now this might become the default (to be updated) +Then, run 2D smoothing (has to manually edit the default input files inside for now, see other options inside too). Option `--extended` was used to select SF computed in a larger ut range, but now this might become the default (to be updated) ``` python scripts/analysisTools/w_mass_13TeV/run2Dsmoothing.py /path/for/plots/test2Dsmoothing/ ``` diff --git a/wremnants/postprocessing/datagroups/datagroups.py b/wremnants/postprocessing/datagroups/datagroups.py index a64e0f112..d95f154ce 100644 --- a/wremnants/postprocessing/datagroups/datagroups.py +++ b/wremnants/postprocessing/datagroups/datagroups.py @@ -1,3 +1,11 @@ +""" +This is the central class to load, manipulate and hold histograms for plotting and fit input data preparation. +A datagroups object contains multiple datagroup objects. +Each datagroup represents a independenly treated process (e.g. a separate histogram sich as 'Wmunu` with a different color in the stacked plotted histogram) and contains multiple members. +Each member typically represents the output from a single dataset from the histogram production (e.g. Wminusmunu). +New, manipulated datagroup objects with members can be constructed that are not bound to that logic. +""" + import itertools import math import os diff --git a/wremnants/postprocessing/datagroups/datagroups2016.py b/wremnants/postprocessing/datagroups/datagroups2016.py index 7bf61a503..6311fa596 100644 --- a/wremnants/postprocessing/datagroups/datagroups2016.py +++ b/wremnants/postprocessing/datagroups/datagroups2016.py @@ -1,3 +1,8 @@ +""" +Define the datagroups objects with individual datagroup objects and it's members. +This one is for the single muon and dimuon analysis at high PU used for 2016 postVFP analysis but it may be used also for other, similar analyses. +""" + from wums import logging logger = logging.child_logger(__name__) @@ -5,7 +10,6 @@ def make_datagroups_2016( dg, - combine=False, pseudodata_pdfset=None, excludeGroups=None, filterGroups=None, @@ -30,7 +34,7 @@ def make_datagroups_2016( members=dg.get_members_from_results(startswith=["GG", "QG"]), ) - if pseudodata_pdfset and dg.combine: + if pseudodata_pdfset: dg.addGroup( f"pdf{pseudodata_pdfset.upper()}_sum", label=f"pdf{pseudodata_pdfset.upper()}", diff --git a/wremnants/postprocessing/datagroups/datagroupsLowPU.py b/wremnants/postprocessing/datagroups/datagroupsLowPU.py index dda31a1c5..efb2b8197 100644 --- a/wremnants/postprocessing/datagroups/datagroupsLowPU.py +++ b/wremnants/postprocessing/datagroups/datagroupsLowPU.py @@ -1,9 +1,14 @@ +""" +Define the datagroups objects with individual datagroup objects and it's members. +This one is for the single muon/electron and dimuon/electron analysis at low PU used for 2017 analysis but it may be used also for other, similar analyses. +""" + from wums import logging logger = logging.child_logger(__name__) -def make_datagroups_lowPU(dg, combine=False, excludeGroups=None, filterGroups=None): +def make_datagroups_lowPU(dg, excludeGroups=None, filterGroups=None): # reset datagroups dg.groups = {} diff --git a/wremnants/postprocessing/histselections.py b/wremnants/postprocessing/histselections.py index e01a2c342..f509e8dd6 100644 --- a/wremnants/postprocessing/histselections.py +++ b/wremnants/postprocessing/histselections.py @@ -1,3 +1,7 @@ +""" +Implementing various configurations for (extended) ABCD methods for nonprompt muon estimation. +""" + import hist import numpy as np from scipy import interpolate diff --git a/wremnants/postprocessing/regression.py b/wremnants/postprocessing/regression.py index f4da9fab4..a744b6192 100644 --- a/wremnants/postprocessing/regression.py +++ b/wremnants/postprocessing/regression.py @@ -1,3 +1,8 @@ +""" +Performing (non-negative) linear least square for different polynomials. +Mainly used in wremnants/postprocessing/histselections.py for smoothing the nonprompt lepton estimation in the extended ABCD method +""" + import numpy as np from scipy import stats from scipy.optimize import nnls diff --git a/wremnants/production/datasets/datasetDict_13TeVGen.py b/wremnants/production/datasets/datasetDict_13TeVGen.py index d04bdf07f..bb3619f1c 100644 --- a/wremnants/production/datasets/datasetDict_13TeVGen.py +++ b/wremnants/production/datasets/datasetDict_13TeVGen.py @@ -1,3 +1,7 @@ +""" +For the use in detector-independent generator studies and input to theory corrections +""" + import copy from wremnants.production.datasets.datasetDict_2016PostVFP import ( diff --git a/wremnants/production/datasets/datasetDict_2017G.py b/wremnants/production/datasets/datasetDict_2017G.py index 48c851afe..53a0c1ec3 100644 --- a/wremnants/production/datasets/datasetDict_2017G.py +++ b/wremnants/production/datasets/datasetDict_2017G.py @@ -1,3 +1,7 @@ +""" +This is the Low PU data set taken at 5.020 TeV with an integrated luminosity of about 300/pb +""" + from wremnants.utilities import common lumicsv = f"{common.data_dir}/bylsoutput_2017G.csv" diff --git a/wremnants/production/datasets/datasetDict_2017H.py b/wremnants/production/datasets/datasetDict_2017H.py index bde0b69d7..0e84ffee5 100644 --- a/wremnants/production/datasets/datasetDict_2017H.py +++ b/wremnants/production/datasets/datasetDict_2017H.py @@ -1,3 +1,7 @@ +""" +This is the Low PU data set taken at 13 TeV with an integrated luminosity of about 200/pb +""" + from wremnants.utilities import common lumijson = f"{common.data_dir}/lowPU/Cert_306896-307082_13TeV_PromptReco_Collisions17_JSON_LowPU_lowPU_suppressedHighPULS.txt" diff --git a/wremnants/production/datasets/dataset_tools.py b/wremnants/production/datasets/dataset_tools.py index f9fc61be7..43d24d225 100644 --- a/wremnants/production/datasets/dataset_tools.py +++ b/wremnants/production/datasets/dataset_tools.py @@ -1,3 +1,7 @@ +""" +Functionality to load and prepare datasets with the focus on CMS NanoAOD or NanoGEN files +""" + import importlib import os import random From 126f111194316cfc975e3749bfe20bc4ff8bfa11 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Mon, 16 Mar 2026 08:30:43 -0400 Subject: [PATCH 11/23] Add option to add masked channel to extrapolate unfolding to full phase space --- scripts/rabbit/setupRabbit.py | 34 +++++++++++-------- .../postprocessing/datagroups/datagroups.py | 15 ++------ 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index bd42f5c23..a53d895b0 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -117,6 +117,11 @@ def make_subparsers(parser): action="store_true", help="Include underflow/overflow in masked channels (for iterative unfolding)", ) + parser.add_argument( + "--unfoldingFullPhaseSpace", + action="store_true", + help="Include masked channel with extrapolation to the full phase space", + ) parser.add_argument( "--unfoldSimultaneousWandZ", action="store_true", @@ -3028,20 +3033,21 @@ def outputFolderName(outfolder, datagroups, doStatOnly, postfix): unfolding_with_flow=args.unfoldingWithFlow, ) - # add masked channel in full phase space - datagroups_xnorm = setup( - writer, - args, - ifile, - f"{args.unfoldingLevel}_full", - iLumiScale, - genvar, - genvar=genvar, - stat_only=args.doStatOnly or args.doStatOnlyMasked, - channel=f"{channel}_full_masked", - unfolding_scalemap=unfolding_scalemap, - unfolding_with_flow=True, - ) + if args.unfoldingFullPhaseSpace: + # add masked channel in full phase space + datagroups_xnorm = setup( + writer, + args, + ifile, + f"{args.unfoldingLevel}_full", + iLumiScale, + genvar, + genvar=genvar, + stat_only=args.doStatOnly or args.doStatOnlyMasked, + channel=f"{channel}_full_masked", + unfolding_scalemap=unfolding_scalemap, + unfolding_with_flow=True, + ) if args.unfoldSimultaneousWandZ and datagroups.mode == "w_mass": # for simultaneous unfolding of W and Z we need to add the noi variations on the Z background in the single lepton channel diff --git a/wremnants/postprocessing/datagroups/datagroups.py b/wremnants/postprocessing/datagroups/datagroups.py index d95f154ce..7c4d242b1 100644 --- a/wremnants/postprocessing/datagroups/datagroups.py +++ b/wremnants/postprocessing/datagroups/datagroups.py @@ -46,19 +46,8 @@ def __init__(self, infile, mode=None, xnorm=False, **kwargs): self.results = pickle.load(f) elif infile.endswith(".hdf5"): logger.info("Load input file") - infiles = infile.split(",") - self.results = {} - for ifile in sorted(infiles): - logger.info(f"Now at {ifile}") - h5file = h5py.File(ifile, "r") - result = base_io.load_results_h5py(h5file) - for k, v in result.items(): - if k in self.results.keys() and k in ["meta_info"]: - logger.warning(f"Skip {k}") - continue - - self.results[k] = v - + h5file = h5py.File(infile, "r") + self.results = base_io.load_results_h5py(h5file) else: raise ValueError(f"{infile} has unsupported file type") From e198759c7a2d837d02eb7abfaac7c88b000b1978 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Sat, 28 Mar 2026 14:19:52 -0400 Subject: [PATCH 12/23] Disable MC inflation for unfolding --- scripts/rabbit/setupRabbit.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index a9f09a37d..d38521fa1 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -1411,13 +1411,16 @@ def setup( masked_flow_axes=masked_flow_axes, ) else: + if isUnfolding: + bin_by_bin_stat_scale = 1.0 + elif wmass: + bin_by_bin_stat_scale = args.binByBinStatScaleForMW + elif dilepton: + bin_by_bin_stat_scale = args.binByBinStatScaleForDilepton + datagroups.addNominalHistograms( real_data=args.realData, - bin_by_bin_stat_scale=( - args.binByBinStatScaleForMW - if wmass - else args.binByBinStatScaleForDilepton if dilepton else 1.0 - ), + bin_by_bin_stat_scale=bin_by_bin_stat_scale, fitresult_data=fitresult_data, masked=datagroups.xnorm and fitresult_data is None, masked_flow_axes=masked_flow_axes, @@ -1559,7 +1562,6 @@ def setup( raise NotImplementedError( f"noi variations currently only works for 1 signal group but got {len(signal_groups)}" ) - rabbit_helpers.add_noi_unfolding_variations( datagroups, label, From fb79ae82529d19340ccf785e53d17e315cda216c Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Mon, 30 Mar 2026 17:46:07 -0400 Subject: [PATCH 13/23] Update submodules --- rabbit | 2 +- wums | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rabbit b/rabbit index acbe1d44c..6b08932c2 160000 --- a/rabbit +++ b/rabbit @@ -1 +1 @@ -Subproject commit acbe1d44c9e5f53262fa00f83a195288f44c789a +Subproject commit 6b08932c24cd447ca7340a80195c29c2de0546d2 diff --git a/wums b/wums index c7e8a11a1..23d5b2034 160000 --- a/wums +++ b/wums @@ -1 +1 @@ -Subproject commit c7e8a11a136b4b9d0df1f6fcb21d6127a4feea72 +Subproject commit 23d5b20343c022d31c30b480e6b40e2d830dfe1f From 0145bdaa2a37ed40a5ec0fcd0863d7cfbce50dc4 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Mon, 30 Mar 2026 21:48:02 -0400 Subject: [PATCH 14/23] Fix CI --- .github/workflows/main.yml | 20 ++++++++++---------- scripts/rabbit/setupRabbit.py | 2 ++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 02218a087..1f961f482 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -341,7 +341,7 @@ jobs: # - name: bsm plot parameter correlations # run: >- - # scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py '$WREMNANTS_OUTDIR/WMass_eta_pt_charge_bsm/fitresults.hdf5' + # scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py '$WREMNANTS_OUTDIR/WMass_eta_pt_charge_bsm/fitresults.hdf5' # -o $WEB_DIR/$PLOT_DIR/BSM --params 'WtoNMuMass50' massShiftW100MeV # --title CMS --subtitle Preliminary --titlePos 0 --config 'wremnants/utilities/styles/styles.py' --correlation --showNumbers @@ -475,14 +475,14 @@ jobs: - name: plot xsec covariance run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/WMass_eta_pt_charge_poiAsNoi/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_mw --title CMS --subtitle Preliminary --postfix poiAsNoi -m Project ch0_masked ptGen -m Project ch0_masked absEtaGen --config wremnants/utilities/styles/styles.py - name: plot xsec corrrelation run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/WMass_eta_pt_charge_poiAsNoi/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_mw --title CMS --subtitle Preliminary --postfix poiAsNoi -m Project ch0_masked ptGen -m Project ch0_masked absEtaGen --config wremnants/utilities/styles/styles.py --correlation @@ -1097,14 +1097,14 @@ jobs: - name: plot xsec covariance run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_poiAsNoi/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --title CMS --subtitle Preliminary --postfix poiAsNoi -m Project ch0_masked ptVGen -m Project ch0_masked absYVGen --config wremnants/utilities/styles/styles.py - name: plot xsec corrrelation run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_poiAsNoi/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --title CMS --subtitle Preliminary --postfix poiAsNoi -m Project ch0_masked ptVGen -m Project ch0_masked absYVGen --config wremnants/utilities/styles/styles.py --correlation @@ -1185,7 +1185,7 @@ jobs: - name: unfolding plot pulls run: >- scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_pulls_and_impacts.py - $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_unfolding/fitresults.hdf5 -m ungrouped --sortDescending -s constraint --debug --noImpacts + $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_unfolding/fitresults.hdf5 -m ungrouped --sortDescending -s constraint --debug --impactType none -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --postfix unfolding -n 50 --otherExtensions png pdf - name: plot xsec @@ -1204,14 +1204,14 @@ jobs: - name: plot xsec covariance run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_unfolding/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --postfix unfolding --title CMS --subtitle Preliminary -m Project ch0_masked ptVGen -m Project ch0_masked absYVGen --config wremnants/utilities/styles/styles.py - name: plot xsec corrrelation run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_unfolding/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --postfix unfolding --title CMS --subtitle Preliminary -m Project ch0_masked ptVGen -m Project ch0_masked absYVGen --config wremnants/utilities/styles/styles.py --correlation @@ -1549,14 +1549,14 @@ jobs: - name: alphaS z gen-fit xsec covariance run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $ALPHAS_Z_GENFIT -o $ALPHAS_PLOT_DIR --postfix z_genfit --title CMS --subtitle Preliminary -m Project chSigmaUL ptVGen -m Project chSigmaUL absYVGen --config wremnants/utilities/styles/styles.py - name: alphaS z gen-fit xsec corrrelation run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $ALPHAS_Z_GENFIT -o $ALPHAS_PLOT_DIR --postfix z_genfit --title CMS --subtitle Preliminary -m Project chSigmaUL ptVGen -m Project chSigmaUL absYVGen --config wremnants/utilities/styles/styles.py --correlation diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 6f6e8b09e..fa6c5a24c 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -1502,6 +1502,8 @@ def setup( bin_by_bin_stat_scale = args.binByBinStatScaleForMW elif dilepton: bin_by_bin_stat_scale = args.binByBinStatScaleForDilepton + else: + bin_by_bin_stat_scale = 1.0 datagroups.addNominalHistograms( real_data=args.realData, From b6b494dae91b5ae6853c582f5e67a4102458e04b Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Mon, 6 Apr 2026 10:57:26 -0400 Subject: [PATCH 15/23] Rename 'theory_helpers' to 'helicity_smoothing_helpers' and add option to disable these --- scripts/histmakers/mw_lowPU.py | 22 ++++-- scripts/histmakers/mw_with_mu_eta_pt.py | 75 +++++++++++-------- scripts/histmakers/mz_dilepton.py | 56 +++++++++----- scripts/histmakers/mz_lowPU.py | 22 ++++-- scripts/histmakers/mz_wlike_with_mu_eta_pt.py | 16 ++-- scripts/histmakers/w_z_gen_dists.py | 10 +-- wremnants/production/theory_corrections.py | 36 +++++---- wremnants/production/unfolding_tools.py | 16 ++-- wremnants/utilities/parsing.py | 7 +- 9 files changed, 160 insertions(+), 100 deletions(-) diff --git a/scripts/histmakers/mw_lowPU.py b/scripts/histmakers/mw_lowPU.py index 0ed17fa27..7f4863322 100644 --- a/scripts/histmakers/mw_lowPU.py +++ b/scripts/histmakers/mw_lowPU.py @@ -138,11 +138,13 @@ "transverseMass", ] ## was transverseMass -theory_helpers_procs = theory_corrections.make_theory_helpers( +helicity_smoothing_helpers_procs = theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr ) -axis_ptVgen = theory_helpers_procs["W"]["qcdScale"].hist.axes["ptVgen"] -axis_chargeVgen = theory_helpers_procs["W"]["qcdScale"].hist.axes["chargeVgen"] +axis_ptVgen = helicity_smoothing_helpers_procs["W"]["qcdScale"].hist.axes["ptVgen"] +axis_chargeVgen = helicity_smoothing_helpers_procs["W"]["qcdScale"].hist.axes[ + "chargeVgen" +] groups_to_aggregate = args.aggregateGroups @@ -190,9 +192,9 @@ def build_graph(df, dataset): results = [] isQCDMC = dataset.group == "QCD" - theory_helpers = None + helicity_smoothing_helpers = None if dataset.name in samples.vprocs: - theory_helpers = theory_helpers_procs[dataset.name[0]] + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] if dataset.is_data: df = df.DefinePerSample("weight", "1.0") @@ -246,7 +248,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in unfolding_axes[level] if a.name != "acceptance"], [c for c in unfolding_cols[level] if c != f"{level}_acceptance"], base_name=level, @@ -416,7 +418,11 @@ def build_graph(df, dataset): df = df.Define("exp_weight", "SFMC") df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) else: df = df.DefinePerSample("nominal_weight", "1.0") @@ -524,7 +530,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, a, c, base_name=n, diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 6ae7e9da9..172f33091 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -443,9 +443,14 @@ args.fitresult, channel="ch1_masked" ) -theory_helpers_procs = theory_corrections.make_theory_helpers( - args.pdfs, args.theoryCorr, procs=["Z", "W"] -) +if args.skipByHelicityCorrection: + helicity_smoothing_helpers_procs = ( + theory_corrections.make_helicity_smoothing_helpers( + args.pdfs, args.theoryCorr, procs=["Z", "W"] + ) + ) +else: + helicity_smoothing_helpers_procs = {} if args.theoryAgnostic: theoryAgnostic_axes, theoryAgnostic_cols = binning.get_theoryAgnostic_axes( @@ -643,25 +648,26 @@ filePath=args.theoryAgnosticFilePath, ) -# Helper for muR and muF as polynomial variations -muRmuFPolVar_helpers_minus = makehelicityWeightHelper_polvar( - genVcharge=-1, - fileTag=args.muRmuFPolVarFileTag, - filePath=args.muRmuFPolVarFilePath, - noUL=True, -) -muRmuFPolVar_helpers_plus = makehelicityWeightHelper_polvar( - genVcharge=1, - fileTag=args.muRmuFPolVarFileTag, - filePath=args.muRmuFPolVarFilePath, - noUL=True, -) -muRmuFPolVar_helpers_Z = makehelicityWeightHelper_polvar( - genVcharge=0, - fileTag=args.muRmuFPolVarFileTag, - filePath=args.muRmuFPolVarFilePath, - noUL=True, -) +if args.theoryAgnostic: + # Helper for muR and muF as polynomial variations + muRmuFPolVar_helpers_minus = makehelicityWeightHelper_polvar( + genVcharge=-1, + fileTag=args.muRmuFPolVarFileTag, + filePath=args.muRmuFPolVarFilePath, + noUL=True, + ) + muRmuFPolVar_helpers_plus = makehelicityWeightHelper_polvar( + genVcharge=1, + fileTag=args.muRmuFPolVarFileTag, + filePath=args.muRmuFPolVarFilePath, + noUL=True, + ) + muRmuFPolVar_helpers_Z = makehelicityWeightHelper_polvar( + genVcharge=0, + fileTag=args.muRmuFPolVarFileTag, + filePath=args.muRmuFPolVarFilePath, + noUL=True, + ) # recoil initialization if not args.noRecoil: @@ -705,9 +711,10 @@ def build_graph(df, dataset): hist.storage.Double() ) # turn off sum weight square for systematic histograms - theory_helpers = {} - if isWorZ: - theory_helpers = theory_helpers_procs[dataset.name[0]] + if dataset.name[0] in helicity_smoothing_helpers_procs.keys(): + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] + else: + helicity_smoothing_helpers = {} # disable auxiliary histograms when unfolding to reduce memory consumptions, or when doing the original theory agnostic without --poiAsNoi auxiliary_histograms = True @@ -803,7 +810,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [], [], base_name="gen", @@ -860,7 +867,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in unfolding_axes[level] if a.name != "acceptance"], [ c @@ -881,7 +888,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in unfolding_axes[level] if a.name != "acceptance"], [ c @@ -898,7 +905,7 @@ def build_graph(df, dataset): elif dataset.name == "Zmumu_2016PostVFP": if args.unfolding and dataset.name == "Zmumu_2016PostVFP": df = unfolder_z.add_gen_histograms( - args, df, results, dataset, corr_helpers, theory_helpers + args, df, results, dataset, corr_helpers, helicity_smoothing_helpers ) if not unfolder_z.poi_as_noi: @@ -1306,7 +1313,11 @@ def build_graph(df, dataset): logger.debug(f"Exp weight defined: {weight_expr}") df = df.Define("exp_weight", weight_expr) df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) if ( @@ -1842,7 +1853,7 @@ def build_graph(df, dataset): nominal_cols, ) - if isWorZ and not hasattr(dataset, "out_of_acceptance"): + if isWorZ and args.theoryAgnostic and not hasattr(dataset, "out_of_acceptance"): theoryAgnostic_helpers_cols = [ "qtOverQ", "absYVgen", @@ -2071,7 +2082,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, axes, cols, for_wmass=True, diff --git a/scripts/histmakers/mz_dilepton.py b/scripts/histmakers/mz_dilepton.py index 906eab55e..adcc84d78 100644 --- a/scripts/histmakers/mz_dilepton.py +++ b/scripts/histmakers/mz_dilepton.py @@ -361,14 +361,20 @@ muon_prefiring_helper, muon_prefiring_helper_stat, muon_prefiring_helper_syst = ( muon_prefiring.make_muon_prefiring_helpers(era=era) ) -procs = [ - p - for p, grp in (("W", samples.wprocs), ("Z", samples.zprocs)) - if any(d.name in grp for d in datasets) -] -theory_helpers_procs = theory_corrections.make_theory_helpers( - args.pdfs, args.theoryCorr, procs=procs -) + +if args.skipByHelicityCorrection: + procs = [ + p + for p, grp in (("W", samples.wprocs), ("Z", samples.zprocs)) + if any(d.name in grp for d in datasets) + ] + helicity_smoothing_helpers_procs = ( + theory_corrections.make_helicity_smoothing_helpers( + args.pdfs, args.theoryCorr, procs=procs + ) + ) +else: + helicity_smoothing_helpers_procs = {} # extra axes which can be used to label tensor_axes if args.binnedScaleFactors: @@ -532,9 +538,10 @@ def build_graph(df, dataset): isZ = dataset.name in samples.zprocs isWorZ = isW or isZ - theory_helpers = {} - if isWorZ: - theory_helpers = theory_helpers_procs[dataset.name[0]] + if dataset.name[0] in helicity_smoothing_helpers_procs.keys(): + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] + else: + helicity_smoothing_helpers = {} cvh_helper = data_calibration_helper if dataset.is_data else mc_calibration_helper jpsi_helper = data_jpsi_crctn_helper if dataset.is_data else mc_jpsi_crctn_helper @@ -577,7 +584,12 @@ def build_graph(df, dataset): if args.unfolding and dataset.group == "Zmumu": df = unfolder_z.add_gen_histograms( - args, df, results, dataset, corr_helpers, theory_helpers=theory_helpers + args, + df, + results, + dataset, + corr_helpers, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) if not unfolder_z.poi_as_noi: @@ -598,7 +610,11 @@ def build_graph(df, dataset): df_gen = df df_gen = df_gen.DefinePerSample("exp_weight", "1.0") df_gen = theory_corrections.define_theory_weights_and_corrs( - df_gen, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df_gen, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) for obs in auxiliary_gen_axes: @@ -613,7 +629,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [all_axes[obs]], [obs], base_name=f"gen_{obs}", @@ -921,7 +937,11 @@ def build_graph(df, dataset): logger.debug(f"Experimental weight defined: {weight_expr}") df = df.Define("exp_weight", weight_expr) df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) results.append( @@ -1059,7 +1079,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, obs_axes, obs, base_name=obs_name, @@ -1081,7 +1101,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [all_axes[obs]], [obs], base_name=f"nominal_{obs}", @@ -1282,7 +1302,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, axes, cols, for_wmass=False, diff --git a/scripts/histmakers/mz_lowPU.py b/scripts/histmakers/mz_lowPU.py index cafdf1243..6d11be073 100644 --- a/scripts/histmakers/mz_lowPU.py +++ b/scripts/histmakers/mz_lowPU.py @@ -103,11 +103,13 @@ axes_mt = [axis_mt] cols_mt = ["transverseMass"] -theory_helpers_procs = theory_corrections.make_theory_helpers( +helicity_smoothing_helpers_procs = theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr ) -axis_ptVgen = theory_helpers_procs["Z"]["qcdScale"].hist.axes["ptVgen"] -axis_chargeVgen = theory_helpers_procs["Z"]["qcdScale"].hist.axes["chargeVgen"] +axis_ptVgen = helicity_smoothing_helpers_procs["Z"]["qcdScale"].hist.axes["ptVgen"] +axis_chargeVgen = helicity_smoothing_helpers_procs["Z"]["qcdScale"].hist.axes[ + "chargeVgen" +] if args.unfolding: @@ -151,9 +153,9 @@ def build_graph(df, dataset): results = [] - theory_helpers = None + helicity_smoothing_helpers = None if dataset.name in samples.vprocs: - theory_helpers = theory_helpers_procs[dataset.name[0]] + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] if dataset.is_data: df = df.DefinePerSample("weight", "1.0") @@ -168,7 +170,7 @@ def build_graph(df, dataset): if args.unfolding and dataset.group == base_group: df = unfolder_z.add_gen_histograms( - args, df, results, dataset, corr_helpers, theory_helpers + args, df, results, dataset, corr_helpers, helicity_smoothing_helpers ) if not unfolder_z.poi_as_noi: @@ -356,7 +358,11 @@ def build_graph(df, dataset): df = df.Define("exp_weight", "SFMC") df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) else: df = df.DefinePerSample("nominal_weight", "1.0") @@ -539,7 +545,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, a, c, base_name=n, diff --git a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py index 0620619ca..bbbdf8a36 100644 --- a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py +++ b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py @@ -292,7 +292,7 @@ muon_prefiring.make_muon_prefiring_helpers(era=era) ) -theory_helpers_procs = theory_corrections.make_theory_helpers( +helicity_smoothing_helpers_procs = theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr, corrs=["qcdScale", "alphaS", "pdf"] ) @@ -456,9 +456,9 @@ def build_graph(df, dataset): isZ = dataset.name in samples.zprocs isWorZ = isW or isZ - theory_helpers = None + helicity_smoothing_helpers = None if isWorZ: - theory_helpers = theory_helpers_procs[dataset.name[0]] + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] if dataset.is_data: df = df.DefinePerSample("weight", "1.0") @@ -582,7 +582,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in unfolding_axes[level] if a.name != "acceptance"], [c for c in unfolding_cols[level] if c != f"{level}_acceptance"], base_name=level, @@ -910,7 +910,11 @@ def build_graph(df, dataset): logger.debug(f"Exp weight defined: {weight_expr}") df = df.Define("exp_weight", weight_expr) df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) results.append( @@ -1428,7 +1432,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, axes, cols, for_wmass=False, diff --git a/scripts/histmakers/w_z_gen_dists.py b/scripts/histmakers/w_z_gen_dists.py index ffec8ecb6..239b990e4 100644 --- a/scripts/histmakers/w_z_gen_dists.py +++ b/scripts/histmakers/w_z_gen_dists.py @@ -192,7 +192,7 @@ corrs.append("qcdScale") if args.centralBosonPDFWeight: corrs.append("pdf_central") -theory_helpers_procs = theory_corrections.make_theory_helpers( +helicity_smoothing_helpers_procs = theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr, procs=["Z", "W"], corrs=corrs ) @@ -215,9 +215,9 @@ def build_graph(df, dataset): ] # in samples.zprocs if isW or isZ: - theory_helpers = theory_helpers_procs[dataset.name[0]] + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] else: - theory_helpers = {} + helicity_smoothing_helpers = {} theoryAgnostic_axes, _ = binning.get_theoryAgnostic_axes( ptV_flow=True, absYV_flow=True, wlike="Z" in dataset.name @@ -316,7 +316,7 @@ def build_graph(df, dataset): df = df.Define("isEvenEvent", "event % 2 == 0") df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers + df, dataset.name, corr_helpers, args, helicity_smoothing_helpers ) if isZ or dataset.group in [ @@ -957,7 +957,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, nominal_axes, nominal_cols, base_name="nominal_gen", diff --git a/wremnants/production/theory_corrections.py b/wremnants/production/theory_corrections.py index 139515dd6..eafc03985 100644 --- a/wremnants/production/theory_corrections.py +++ b/wremnants/production/theory_corrections.py @@ -380,7 +380,9 @@ def define_pdf_columns(df, dataset_name, pdfs, noAltUnc): return df -def define_central_pdf_weight_from_helicities(df, dataset_name, pdf, theory_helpers): +def define_central_pdf_weight_from_helicities( + df, dataset_name, pdf, helicity_smoothing_helpers +): logger.info("Using PDF weights from helicities for the central PDF weight") pdf_name = theory_utils.pdfMap[pdf]["name"] @@ -390,7 +392,7 @@ def define_central_pdf_weight_from_helicities(df, dataset_name, pdf, theory_help df = df.DefinePerSample("unity", "1.") df = df.Define( tensorName, - theory_helpers["pdf_central"], + helicity_smoothing_helpers["pdf_central"], [ "massVgen", "absYVgen", @@ -430,7 +432,9 @@ def define_central_pdf_weight(df, dataset_name, pdf): ) -def define_theory_weights_and_corrs(df, dataset_name, helpers, args, theory_helpers={}): +def define_theory_weights_and_corrs( + df, dataset_name, helpers, args, helicity_smoothing_helpers={} +): if "LHEPart_status" in df.GetColumnNames(): df = generator_level_definitions.define_lhe_vars(df) @@ -455,15 +459,15 @@ def define_theory_weights_and_corrs(df, dataset_name, helpers, args, theory_help df = df.DefinePerSample("theory_weight_truncate", "10.") if ( - theory_helpers - and "pdf_central" in theory_helpers.keys() - and theory_helpers["pdf_central"] is not None + helicity_smoothing_helpers + and "pdf_central" in helicity_smoothing_helpers.keys() + and helicity_smoothing_helpers["pdf_central"] is not None ): df = define_central_pdf_weight_from_helicities( df, dataset_name, args.pdfs[0] if len(args.pdfs) >= 1 else None, - theory_helpers, + helicity_smoothing_helpers, ) else: # if no boson-parametrized weights are available df = define_central_pdf_weight( @@ -1033,19 +1037,19 @@ def make_corr_by_helicity( return corr_coeffs -def make_theory_helpers( +def make_helicity_smoothing_helpers( pdfs, theory_corr=[], procs=["Z", "W"], corrs=["qcdScale", "pdf", "pdf_from_corr", "alphaS", "pdf_central"], ): - theory_helpers_procs = {p: {} for p in procs} + helicity_smoothing_helpers_procs = {p: {} for p in procs} - for proc in theory_helpers_procs.keys(): + for proc in helicity_smoothing_helpers_procs.keys(): if "qcdScale" in corrs: - theory_helpers_procs[proc]["qcdScale"] = ( + helicity_smoothing_helpers_procs[proc]["qcdScale"] = ( make_qcd_uncertainty_helper_by_helicity( is_z=proc == "Z", rebin_ptVgen=False, @@ -1054,7 +1058,7 @@ def make_theory_helpers( ) if "pdf" in corrs: - theory_helpers_procs[proc]["pdf"] = ( + helicity_smoothing_helpers_procs[proc]["pdf"] = ( make_pdfs_uncertainties_helper_by_helicity( proc=proc, pdfs=pdfs, @@ -1062,7 +1066,7 @@ def make_theory_helpers( ) if "pdf_from_corr" in corrs: pdf_from_corrs = [x + "_Corr" for x in theory_corr if "pdfvar" in x] - theory_helpers_procs[proc]["pdf_from_corr"] = ( + helicity_smoothing_helpers_procs[proc]["pdf_from_corr"] = ( make_pdfs_from_corrs_uncertainties_helper_by_helicity( proc=proc, pdfs_from_corrs=pdf_from_corrs, @@ -1070,14 +1074,14 @@ def make_theory_helpers( ) if "alphaS" in corrs: as_vars = [x + "_Corr" for x in theory_corr if "pdfas" in x] - theory_helpers_procs[proc]["alphaS"] = ( + helicity_smoothing_helpers_procs[proc]["alphaS"] = ( make_alphaS_uncertainties_helper_by_helicity( proc=proc, as_vars=as_vars, ) ) if "pdf_central" in corrs: - theory_helpers_procs[proc]["pdf_central"] = ( + helicity_smoothing_helpers_procs[proc]["pdf_central"] = ( make_uncertainty_helper_by_helicity( proc=proc, nom=theory_utils.pdfMap[pdfs[0]]["name"], @@ -1088,7 +1092,7 @@ def make_theory_helpers( ) ) - return theory_helpers_procs + return helicity_smoothing_helpers_procs def make_qcd_uncertainty_helper_by_helicity( diff --git a/wremnants/production/unfolding_tools.py b/wremnants/production/unfolding_tools.py index 1fbaec711..84b3ff782 100644 --- a/wremnants/production/unfolding_tools.py +++ b/wremnants/production/unfolding_tools.py @@ -182,7 +182,7 @@ def add_xnorm_histograms( args, dataset_name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, unfolding_axes, unfolding_cols, base_name="xnorm", @@ -193,7 +193,11 @@ def add_xnorm_histograms( df_xnorm = df_xnorm.DefinePerSample("exp_weight", "1.0") df_xnorm = theory_corrections.define_theory_weights_and_corrs( - df_xnorm, dataset_name, corr_helpers, args, theory_helpers=theory_helpers + df_xnorm, + dataset_name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) df_xnorm = df_xnorm.Define("xnorm", "0.5") @@ -231,7 +235,7 @@ def add_xnorm_histograms( args, dataset_name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, xnorm_axes, xnorm_cols, base_name=base_name, @@ -436,7 +440,7 @@ def __init__( ) def add_gen_histograms( - self, args, df, results, dataset, corr_helpers, theory_helpers={} + self, args, df, results, dataset, corr_helpers, helicity_smoothing_helpers={} ): df = define_gen_level( df, dataset.name, self.unfolding_levels, mode=self.analysis_label @@ -487,7 +491,7 @@ def add_gen_histograms( args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in self.unfolding_axes[level] if a.name != "acceptance"], [ c @@ -508,7 +512,7 @@ def add_gen_histograms( args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in self.unfolding_axes[level] if a.name != "acceptance"], [ c diff --git a/wremnants/utilities/parsing.py b/wremnants/utilities/parsing.py index e17066227..3c43b36bf 100644 --- a/wremnants/utilities/parsing.py +++ b/wremnants/utilities/parsing.py @@ -203,10 +203,15 @@ def __call__(self, parser, namespace, values, option_string=None): ], help="Add EW theory corrections without modifying the default theoryCorr list. Will be appended to args.theoryCorr", ) + parser.add_argument( + "--skipByHelicityCorrection", + action="store_true", + help="Apply the QCD corrections and uncertainties from MiNNLO event weights, otherwise use by-helicity reweighting", + ) parser.add_argument( "--skipHelicity", action="store_true", - help="Skip the qcdScaleByHelicity histogram (it can be huge)", + help="Skip the qcdScaleByHelicity histogram production (it can be huge)", ) parser.add_argument( "--noRecoil", action="store_true", help="Don't apply recoild correction" From d0eb12436627fb8f673f63ac714be47f528f2491 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Mon, 6 Apr 2026 18:22:51 -0400 Subject: [PATCH 16/23] Update plotting scripts for unfolding and move them in separate area --- .../unfolding}/inclusive_xsec_summary.py | 193 +++++++++++------- scripts/analysisTools/unfolding/limits.py | 169 +++++++++++++++ .../unfolding}/unfolding_massbias.py | 0 3 files changed, 288 insertions(+), 74 deletions(-) rename scripts/{plotting => analysisTools/unfolding}/inclusive_xsec_summary.py (79%) create mode 100644 scripts/analysisTools/unfolding/limits.py rename scripts/{plotting => analysisTools/unfolding}/unfolding_massbias.py (100%) diff --git a/scripts/plotting/inclusive_xsec_summary.py b/scripts/analysisTools/unfolding/inclusive_xsec_summary.py similarity index 79% rename from scripts/plotting/inclusive_xsec_summary.py rename to scripts/analysisTools/unfolding/inclusive_xsec_summary.py index dc742f8dd..a5ed5e5c3 100644 --- a/scripts/plotting/inclusive_xsec_summary.py +++ b/scripts/analysisTools/unfolding/inclusive_xsec_summary.py @@ -42,6 +42,12 @@ default=False, help="scale the results of SMP-20-004 to updated lumi", ) +parser.add_argument( + "--includeSMP20004", + action="store_true", + default=False, + help="include data points from SMP-20-004 (JHEP04(2025)162)", +) args = parser.parse_args() logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) @@ -71,7 +77,10 @@ else: pdf_results[pdf_name] = pdf_mapping -nPDFs = len(args.pdfFiles) +all_1d_pdf_names = list(pdf_results.keys()) + [ + k for k in comp_result.keys() if k != "TOTAL" +] +nPDFs = len(all_1d_pdf_names) pdf_colors = { "CT18": "#2ca02c", "CT18Z": "#E42536", @@ -169,7 +178,7 @@ hp = result[mapping]["channels"][channel]["hist_prefit_inclusive"].get() h1 = result[mapping]["channels"][channel]["hist_postfit_inclusive"].get() hi = result[mapping]["channels"][channel][ - "hist_postfit_inclusive_global_impacts_grouped" + "hist_postfit_inclusive_gaussian_global_impacts_grouped" ].get() if selection is not None: hp = hp[selection] @@ -194,10 +203,10 @@ labels = labels[mask] impacts = impacts[mask] - if np.sum(impacts**2) ** 0.5 / error - 1 > 10e-10: - raise RuntimeError( - f"Sources don't add up to total error, got a difference of {np.sum(impacts**2)**0.5/error - 1}" - ) + # if np.sum(impacts**2) ** 0.5 / error - 1 > 10e-10: + # raise RuntimeError( + # f"Sources don't add up to total error, got a difference of {np.sum(impacts**2)**0.5/error - 1}" + # ) labels = np.append(labels, "Total") impacts = np.append(impacts, error) @@ -261,6 +270,42 @@ df[f"{pdf_name}_error"] = hr.variance**0.5 df[f"{pdf_name}_pdf"] = hr_impacts[{"impacts": f"pdf{pdf_name}"}] + for pdf_name, comp_res in comp_result.items(): + if pdf_name == "TOTAL": + continue + + candidates = [mappings] if not isinstance(mappings, tuple) else list(mappings) + channel_data = None + for m in candidates: + ckey = f"{m.replace(identifier, '')} {channel.replace(identifier, '')}" + if ckey in comp_res["channels"]: + channel_data = comp_res["channels"][ckey] + break + if channel_data is None: + continue + fittype = ( + "postfit" if "hist_postfit_inclusive" in channel_data.keys() else "prefit" + ) + hr = channel_data[f"hist_{fittype}_inclusive"].get() + + if selection is not None: + hr = hr[selection] + if getattr(hr, "axes", False) and "yield" in hr.axes.name: + hr = hr[{"yield": hist.sum}] + + df[pdf_name] = hr.value + df[f"{pdf_name}_error"] = hr.variance**0.5 + + if f"hist_{fittype}_inclusive_global_impacts_grouped" in channel_data: + hr_impacts = channel_data[ + f"hist_{fittype}_inclusive_global_impacts_grouped" + ].get() + if selection is not None: + hr_impacts = hr_impacts[selection] + if getattr(hr_impacts, "axes", False) and "yield" in hr_impacts.axes.name: + hr_impacts = hr_impacts[{"yield": hist.sum}] + df[f"{pdf_name}_pdf"] = hr_impacts[{"impacts": f"pdf{pdf_name}"}] + # Convert 'labels' column to categorical with the custom order df["label"] = pd.Categorical(df["label"], categories=custom_order, ordered=True) @@ -320,7 +365,7 @@ ax = fig.add_subplot(111) # x axis range -lo, hi = 0.925, 1.085 +lo, hi = 0.97, 1.085 # totals = [] # stats = [] @@ -350,7 +395,8 @@ # ax.errorbar([prefit], [i+0.5], xerr=prefit_err, color="red", marker="o", label="Prefit" if i ==0 else None) - for j, pdf_name in enumerate(pdf_results.keys()): + j = 0 + for j, pdf_name in enumerate(all_1d_pdf_names): pdf_value = df_g[pdf_name].values[0] / norm pdf_error = df_g[f"{pdf_name}_error"].values[0] / norm ax.errorbar( @@ -361,28 +407,28 @@ marker="o", label=pdf_name if i == 0 else None, ) - # # only plot PDF uncertainties - # pdf_error_pdf = df_g[f"{pdf_name}_pdf"].values[0] / norm - # ax.errorbar( - # [pdf_value], - # [i + 1 - (j + 1) / (nPDFs + 1)], - # xerr=pdf_error_pdf, - # color=pdf_colors[pdf_name], - # capsize=5, - # capthick=2, - # marker="o", - # ) + if f"{pdf_name}_pdf" in df_g.columns: + pdf_error_pdf = df_g[f"{pdf_name}_pdf"].values[0] / norm + ax.errorbar( + [pdf_value], + [i + 1 - (j + 1) / (nPDFs + nMeas)], + xerr=pdf_error_pdf, + color=pdf_colors[pdf_name], + capsize=5, + capthick=2, + marker="o", + ) # cross sections from SMP-20-004 - j = j + 1 - ax.errorbar( - [smp_20_004[name][0]] / norm, - [i + 1 - (j + 1) / (nPDFs + nMeas)], - xerr=smp_20_004[name][1] / norm, - color="black", - marker="x", - label="JHEP04(2025)162" if i == 0 else None, - ) + if args.includeSMP20004: + ax.errorbar( + smp_20_004[name][0] / norm, + i + 0.5, + xerr=smp_20_004[name][1] / norm, + color="black", + marker="x", + label="JHEP04(2025)162" if i == 0 else None, + ) # round to two significant digits in total uncertainty sig_digi = 2 - int(math.floor(math.log10(abs(total)))) - 1 @@ -527,11 +573,20 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): return ellipse -for name, channel0, channel1, unit in ( - ("WpWm", xsec_keys[0], xsec_keys[1], "pb"), - ("WZ", xsec_keys[2], xsec_keys[3], "pb"), - ("R", xsec_keys[4], xsec_keys[5], None), -): +plot_2d_configs = { + False: ( # fiducial phase space + ("WpWm", xsec_keys[0], xsec_keys[1], "pb", (3065, 3200), (3975, 4200)), + ("WZ", xsec_keys[2], xsec_keys[3], "pb", (7050, 7375), (585, 615)), + ("R", xsec_keys[4], xsec_keys[5], None, (1.297, 1.304), (11.9, 12.2)), + ), + True: ( # total phase space + ("WpWm", xsec_keys[0], xsec_keys[1], "pb", (7600, 9600), (10200, 12600)), + ("WZ", xsec_keys[2], xsec_keys[3], "pb", (18000, 22000), (1720, 2100)), + ("R", xsec_keys[4], xsec_keys[5], None, (1.32, 1.41), (10.1, 10.85)), + ), +} + +for name, channel0, channel1, unit, xlim, ylim in plot_2d_configs[args.full]: plt.clf() fig = plt.figure() @@ -621,50 +676,40 @@ def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): ax.add_patch(ell) ax.plot(x, y, color=icol, marker="o", alpha=0) - # scale for 2D plot - prob = chi2.cdf(1, df=1) # Get the probability of the 1D n-sigma - scale = np.sqrt(chi2.ppf(prob, df=2)) # Find the 2D equivalent scale + if args.includeSMP20004: + # cross sections from SMP-20-004 — correlations unknown, show as 2D error bars + x = smp_20_004[channel0[0]][0] + x_err = smp_20_004[channel0[0]][1] + y = smp_20_004[channel1[0]][0] + y_err = smp_20_004[channel1[0]][1] - # cross sections from SMP-20-004 - x = smp_20_004[channel0[0]][0] - x_err = smp_20_004[channel0[0]][1] * scale - y = smp_20_004[channel1[0]][0] - y_err = smp_20_004[channel1[0]][1] * scale - - plt.errorbar( - x, - y, - xerr=x_err, - yerr=y_err, - fmt="x", - color="black", - ecolor="black", - capsize=5, - label="JHEP04(2025)162", - ) + ax.errorbar( + x, + y, + xerr=x_err, + yerr=y_err, + fmt="x", + color="black", + ecolor="black", + capsize=4, + capthick=1.5, + label="JHEP04(2025)162", + ) + if xlim is not None: + ax.set_xlim(*xlim) + + if ylim is not None: + ax.set_ylim(*ylim) + else: + xlim = ax.get_xlim() + ylim = ax.get_ylim() + yrange = ylim[1] - ylim[0] + + ax.set_ylim(ylim[0], ylim[1] + yrange * 0.25) - # cov = np.array([[x_err**2, 0],[0, y_err**2]]) # dummy covariance - # ell = plot_cov_ellipse( - # cov, - # np.array([x, y]), - # nstd=1, - # edgecolor="black", - # facecolor="none", - # linewidth=2, - # label="JHEP04(2025)162", - # ) - # ax.add_patch(ell) - # ax.plot(x, y, color="black", marker="x") - - xlim = ax.get_xlim() - ylim = ax.get_ylim() - yrange = ylim[1] - ylim[0] - - ax.set_xlim(*xlim) - ax.set_ylim(ylim[0], ylim[1] + yrange * 0.25) if unit is not None: - plt.xlabel(rf"$\sigma({channel0[0].replace("$","")})$ [pb]") - plt.ylabel(rf"$\sigma({channel1[0].replace("$","")})$ [pb]") + plt.xlabel(rf"$\sigma({channel0[0].replace('$', '')})$ [pb]") + plt.ylabel(rf"$\sigma({channel1[0].replace('$', '')})$ [pb]") else: plt.xlabel(channel0[0]) plt.ylabel(channel1[0]) diff --git a/scripts/analysisTools/unfolding/limits.py b/scripts/analysisTools/unfolding/limits.py new file mode 100644 index 000000000..c16dff35c --- /dev/null +++ b/scripts/analysisTools/unfolding/limits.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +import argparse + +import mplhep as hep +import numpy as np + +from wums import logging, output_tools, plot_tools + +hep.style.use(hep.style.ROOT) + + +def parseArgs(): + parser = argparse.ArgumentParser() + + parser.add_argument("xsecfile", type=str, help="File with mass, xsec") + parser.add_argument("limitfile", type=str, help="File with mass, limits") + parser.add_argument( + "--model", + required=True, + type=str, + choices=["zprime", "hnl"], + help="Model for comprison", + ) + + parser.add_argument("-o", "--outpath", type=str, default="./plots") + parser.add_argument("--title", default="CMS", type=str) + parser.add_argument("--subtitle", default="", type=str) + parser.add_argument("--xlabel", default=r"$m_{Z'}$ [GeV]") + parser.add_argument("--ylabel", default=r"$g_{Z'}$") + parser.add_argument("--verbose", type=int, default=3) + parser.add_argument("--noColorLogger", action="store_true") + parser.add_argument( + "--legPos", type=str, default="upper right", help="Set legend position" + ) + parser.add_argument( + "--legSize", + type=str, + default="small", + help="Legend text size (small: axis ticks size, large: axis label size, number)", + ) + parser.add_argument( + "--legCols", type=int, default=2, help="Number of columns in legend" + ) + parser.add_argument( + "-p", "--postfix", type=str, help="Postfix for output file name" + ) + parser.add_argument( + "--ylim", + type=float, + nargs=2, + help="Min and max values for y axis (if not specified, range set automatically)", + ) + parser.add_argument( + "--upperLimit", + type=float, + nargs="+", + help="Upper limit of measurements", + ) + return parser.parse_args() + + +def main(): + args = parseArgs() + logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) + + outdir = output_tools.make_plot_dir(args.outpath) + + # --- load data ------------------------------------------------------------ + mass, xsec = np.loadtxt(args.xsecfile, unpack=True) + mass2, limits = np.loadtxt(args.limitfile, unpack=True) + + # expected limits from xsec + expected = np.array(args.upperLimit) / (xsec * 100) + if args.model == "zprime": + expected = np.sqrt(expected) + + # --- build figure via plot_tools ----------------------------------------- + fig, ax = plot_tools.figure( + None, + args.xlabel, + args.ylabel, + automatic_scale=False, + xlim=(0, max(mass.max(), mass2.max()) * 1.05), + ylim=args.ylim, + logy=True, + ) + + if args.model == "hnl": + first = 17 + ax.plot( + mass, + expected, + color="black", + marker="", + linestyle="-", + label="This analysis (expected)", + ) + ax.plot( + mass2[:first], + limits[:first], + color="brown", + marker="", + linestyle="-", + label="Prompt $3\ell = (e, \mu, \tau)$ \n arXiv:2403.00100", + ) + ax.plot( + mass2[first - 1 :], + limits[first - 1 :], + color="purple", + marker="", + linestyle="-", + label="Prompt 1 + 1 displaced & jet \n CMS-PAS-EXO-21-011", + ) + elif args.model == "zprime": + ax.plot( + mass, + expected, + color="black", + marker="", + linestyle="-", + label="Limit from $\sigma=100pb$", + ) + ax.plot( + mass2, + limits, + color="brown", + marker="", + linestyle="-", + label="Phys.Rev.D 110 (2024) 072008, 2024.", + ) + + plot_tools.add_decor( + ax, + args.title, + args.subtitle, + data=True, + lumi=None, + loc=2, + ) + + plot_tools.addLegend( + ax, + ncols=args.legCols, + loc=args.legPos, + text_size=args.legSize, + ) + + to_join = ["limits_comparison", args.postfix] + outfile = "_".join(filter(lambda x: x, to_join)) + if args.subtitle == "Preliminary": + outfile += "_preliminary" + + plot_tools.save_pdf_and_png(outdir, outfile) + + # write index + log + output_tools.write_index_and_log( + outdir, + outfile, + analysis_meta_info={ + "xsecfile": args.xsecfile, + "limitfile": args.limitfile, + }, + args=args, + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/plotting/unfolding_massbias.py b/scripts/analysisTools/unfolding/unfolding_massbias.py similarity index 100% rename from scripts/plotting/unfolding_massbias.py rename to scripts/analysisTools/unfolding/unfolding_massbias.py From d2e515ad581eeefa5664a817c1fb385b14ce8843 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Mon, 6 Apr 2026 18:25:23 -0400 Subject: [PATCH 17/23] Edits to 'Rename 'theory_helpers' to 'helicity_smoothing_helpers' and add option to disable these' --- scripts/histmakers/mw_with_mu_eta_pt.py | 4 ++-- scripts/histmakers/mz_dilepton.py | 4 ++-- wremnants/production/systematics.py | 14 +++++++------- wremnants/production/unfolding_tools.py | 1 + 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 172f33091..f6741e8f5 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -444,13 +444,13 @@ ) if args.skipByHelicityCorrection: + helicity_smoothing_helpers_procs = {} +else: helicity_smoothing_helpers_procs = ( theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr, procs=["Z", "W"] ) ) -else: - helicity_smoothing_helpers_procs = {} if args.theoryAgnostic: theoryAgnostic_axes, theoryAgnostic_cols = binning.get_theoryAgnostic_axes( diff --git a/scripts/histmakers/mz_dilepton.py b/scripts/histmakers/mz_dilepton.py index adcc84d78..d04cf7974 100644 --- a/scripts/histmakers/mz_dilepton.py +++ b/scripts/histmakers/mz_dilepton.py @@ -363,6 +363,8 @@ ) if args.skipByHelicityCorrection: + helicity_smoothing_helpers_procs = {} +else: procs = [ p for p, grp in (("W", samples.wprocs), ("Z", samples.zprocs)) @@ -373,8 +375,6 @@ args.pdfs, args.theoryCorr, procs=procs ) ) -else: - helicity_smoothing_helpers_procs = {} # extra axes which can be used to label tensor_axes if args.binnedScaleFactors: diff --git a/wremnants/production/systematics.py b/wremnants/production/systematics.py index 28b3d1bce..4eeaa96da 100644 --- a/wremnants/production/systematics.py +++ b/wremnants/production/systematics.py @@ -1347,7 +1347,7 @@ def add_theory_hists( args, dataset_name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, axes, cols, base_name="nominal", @@ -1418,21 +1418,21 @@ def add_theory_hists( if for_wmass or isZ: - if theory_helpers.get("qcdScale") is not None: + if helicity_smoothing_helpers.get("qcdScale") is not None: logger.debug(f"Make QCD scale histograms for {dataset_name}") # there is no W backgrounds for the Wlike, make QCD scale histograms only for Z # should probably remove the charge here, because the Z only has a single charge and the pt distribution does not depend on which charged lepton is selected add_qcdScaleByHelicityUnc_hist( results, df, - theory_helpers.get("qcdScale"), + helicity_smoothing_helpers.get("qcdScale"), scale_axes, scale_cols, **info, ) - if theory_helpers.get("pdf") is not None: - pdf_helpers = theory_helpers.get("pdf") + if helicity_smoothing_helpers.get("pdf") is not None: + pdf_helpers = helicity_smoothing_helpers.get("pdf") for pdf in args.pdfs: pdf_name = theory_utils.pdfMap[pdf]["name"] if pdf_name not in pdf_helpers.keys(): @@ -1453,7 +1453,7 @@ def add_theory_hists( cols, **info, ) - for pdf_name, pdf_from_corr_helper in theory_helpers.get( + for pdf_name, pdf_from_corr_helper in helicity_smoothing_helpers.get( "pdf_from_corr", {} ).items(): logger.debug( @@ -1470,7 +1470,7 @@ def add_theory_hists( **info, ) - for k, v in theory_helpers.get("alphaS", {}).items(): + for k, v in helicity_smoothing_helpers.get("alphaS", {}).items(): logger.debug( f"Make alphaS uncertainty by helicity histogram for {dataset_name} and alphaS from correction {k}" ) diff --git a/wremnants/production/unfolding_tools.py b/wremnants/production/unfolding_tools.py index 84b3ff782..25d116165 100644 --- a/wremnants/production/unfolding_tools.py +++ b/wremnants/production/unfolding_tools.py @@ -498,6 +498,7 @@ def add_gen_histograms( for c in self.unfolding_cols[level] if c != f"{level}_acceptance" ], + add_helicity_axis=self.add_helicity_axis, base_name=f"{level}_full", ) From 07293b10067a72826c5479efa2eba3bc5aa773f5 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Tue, 7 Apr 2026 15:59:37 -0400 Subject: [PATCH 18/23] Support MiNNLO QCD scale from event weight hist (default is from helicity smoothing); Use corrections by default --- scripts/rabbit/setupRabbit.py | 32 +++-- .../postprocessing/rabbit_theory_helper.py | 123 ++++++++++++------ wremnants/postprocessing/syst_tools.py | 3 + 3 files changed, 103 insertions(+), 55 deletions(-) diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 16cdd381a..29f014983 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -716,15 +716,20 @@ def make_parser(parser=None, argv=None): help="Scale the PDF hessian uncertainties by this factor (by default take the value in the pdfInfo map)", ) parser.add_argument( - "--pdfUncFromCorr", + "--pdfUncFromUncorr", action="store_true", - help="Take PDF uncertainty from correction hist (requires having run that correction)", + help="Take PDF uncertainty from uncorrected hist (by default it reads it from the correction hist, but requires having run that correction)", ) parser.add_argument( "--asUncFromUncorr", action="store_true", help="Take alpha_S uncertainty from uncorrected hist (by default it reads it from the correction hist, but requires having run that correction)", ) + parser.add_argument( + "--minnloUncFromUncorr", + action="store_true", + help="Take minnlo muR-muF uncertainty from uncorrected hist (by default it reads it from the correction hist, but requires having run that correction)", + ) parser.add_argument( "--scaleMinnloScale", default=1.0, @@ -1632,6 +1637,8 @@ def setup( decorwidth=decorwidth, ) + add_theory_uncertainties = not stat_only and not args.noTheoryUnc + # this appears within doStatOnly because technically these nuisances should be part of it if isPoiAsNoi: if isTheoryAgnostic: @@ -1718,7 +1725,7 @@ def setup( ) if ("zwidth" in args.noi and not wmass) or ( - not datagroups.xnorm and not stat_only and not args.noTheoryUnc + not datagroups.xnorm and add_theory_uncertainties ): # Variation from EW fit (mostly driven by alphas unc.) datagroups.addSystematic( @@ -1738,7 +1745,7 @@ def setup( ) # TODO: move closer to W mass uncertainty? - if wmass and ("wwidth" in args.noi or (not stat_only and not args.noTheoryUnc)): + if wmass and ("wwidth" in args.noi or add_theory_uncertainties): rabbit_helpers.add_W_width_uncertainty( datagroups, signal_samples_forMass, @@ -1747,7 +1754,7 @@ def setup( label=label, ) - if "sin2thetaW" in args.noi or (not stat_only and not args.noTheoryUnc): + if "sin2thetaW" in args.noi or add_theory_uncertainties: datagroups.addSystematic( "sin2thetaWeightZ", name=f"Sin2thetaZ0p00003", @@ -1764,7 +1771,7 @@ def setup( passToFakes=passSystToFakes, ) - if "alphaS" in args.noi or (not stat_only and not args.noTheoryUnc): + if "alphaS" in args.noi or add_theory_uncertainties: theorySystSamples = ["signal_samples_inctau"] if wmass: if args.helicityFitTheoryUnc: @@ -1788,8 +1795,9 @@ def setup( np_model=args.npUnc, tnp_scale=args.scaleTNP, mirror_tnp=False, - pdf_from_corr=args.pdfUncFromCorr, + pdf_from_corr=not args.pdfUncFromUncorr, as_from_corr=not args.asUncFromUncorr, + minnlo_from_corr=not args.minnloUncFromUncorr, scale_pdf_unc=args.scalePdf, samples=theorySystSamples, minnlo_unc=args.minnloScaleUnc, @@ -1797,16 +1805,14 @@ def setup( from_hels=not args.noTheoryCorrsViaHelicities, theory_symmetrize=args.symmetrizeTheoryUnc, pdf_symmetrize=args.symmetrizePdfUnc, + helicity_fit_unc=args.helicityFitTheoryUnc, ) theory_helper.add_pdf_alphas_variation( noi="alphaS" in args.noi, ) - - if not stat_only and not args.noTheoryUnc: - theory_helper.add_all_theory_unc( - helicity_fit_unc=args.helicityFitTheoryUnc, - ) + if add_theory_uncertainties: + theory_helper.add_all_theory_unc() if stat_only: # print a card with only mass weights @@ -1815,7 +1821,7 @@ def setup( ) return datagroups - if not args.noTheoryUnc: + if add_theory_uncertainties: if wmass and not datagroups.xnorm: if args.massConstraintModeZ == "automatic": constrainMassZ = True diff --git a/wremnants/postprocessing/rabbit_theory_helper.py b/wremnants/postprocessing/rabbit_theory_helper.py index 1133c5674..f0ec8e14d 100644 --- a/wremnants/postprocessing/rabbit_theory_helper.py +++ b/wremnants/postprocessing/rabbit_theory_helper.py @@ -73,7 +73,7 @@ def __init__(self, label, datagroups, args, hasNonsigSamples=False): self.datagroups = datagroups corr_hists = self.datagroups.args_from_metadata("theoryCorr") - self.corr_hist_name = (corr_hists[0] + "_Corr") if corr_hists else None + self.corr_hist_name = (corr_hists[0] + "_Corr") if corr_hists else "" self.syst_ax = "vars" self.corr_hist = None @@ -109,6 +109,7 @@ def configure( mirror_tnp=True, as_from_corr=True, pdf_from_corr=False, + minnlo_from_corr=True, pdf_operation=None, samples=[], scale_pdf_unc=-1.0, @@ -117,6 +118,7 @@ def configure( from_hels=False, theory_symmetrize="quadratic", pdf_symmetrize="quadratic", + helicity_fit_unc=False, ): self.set_resum_unc_type(resumUnc) @@ -128,11 +130,12 @@ def configure( self.tnp_scale = tnp_scale self.mirror_tnp = mirror_tnp self.pdf_from_corr = pdf_from_corr - self.as_from_corr = pdf_from_corr or as_from_corr + self.as_from_corr = as_from_corr + self.minnlo_from_corr = minnlo_from_corr self.pdf_operation = pdf_operation self.scale_pdf_unc = scale_pdf_unc self.samples = samples - self.helicity_fit_unc = False + self.helicity_fit_unc = helicity_fit_unc self.minnlo_scale = minnlo_scale self.from_hels = from_hels @@ -140,10 +143,10 @@ def configure( self.theory_symmetrize = convert_none(theory_symmetrize) self.pdf_symmetrize = convert_none(pdf_symmetrize) - def add_all_theory_unc(self, helicity_fit_unc=False): - self.helicity_fit_unc = helicity_fit_unc + def add_all_theory_unc(self): self.add_nonpert_unc(model=self.np_model) self.add_resum_unc(scale=self.tnp_scale) + self.add_fixed_order_unc() if "nnlojet" in self.corr_hist_name: self.add_stat_unc() # additional uncertainty for effect of shower and intrinsic kt on angular coeffs @@ -254,6 +257,7 @@ def add_resum_unc(self, scale=1): transition=self.transitionUnc, ) + def add_fixed_order_unc(self): if self.minnlo_unc and self.minnlo_unc not in ["none", None]: # sigma_-1 uncertainty is covered by scetlib-dyturbo uncertainties if they are used helicities_to_exclude = None if self.resumUnc == "minnlo" else [-1] @@ -266,7 +270,7 @@ def add_resum_unc(self, scale=1): # orthogonality of chebychev polynomials means that there is no # double counting with fully correlated uncertainty scale_inclusive = 1.0 - else: + elif "Pt" in self.minnlo_unc: fine_pt_binning = binning.ptV_binning[::2] nptfine = len(fine_pt_binning) - 1 scale_inclusive = np.sqrt((nptfine - 1) / nptfine) @@ -279,6 +283,8 @@ def add_resum_unc(self, scale=1): scale=self.minnlo_scale, symmetrize=self.theory_symmetrize, ) + else: + scale_inclusive = 1.0 self.add_minnlo_scale_uncertainty( sample_group, @@ -293,7 +299,6 @@ def add_minnlo_scale_uncertainty( self, sample_group, extra_name="", - use_hel_hist=True, rebin_pt=None, helicities_to_exclude=None, pt_min=None, @@ -306,40 +311,38 @@ def add_minnlo_scale_uncertainty( ) return - helicity = "Helicity" in self.minnlo_unc - pt_binned = "Pt" in self.minnlo_unc - scale_hist = ( - "qcdScale" if not (helicity or use_hel_hist) else "qcdScaleByHelicity" - ) - if "helicity" in scale_hist.lower(): - use_hel_hist = True + group_name = f"QCDscale{self.sample_label(sample_group)}" + base_name = f"{group_name}{extra_name}" - # All possible syst_axes + pt_binned = "Pt" in self.minnlo_unc # TODO: Move the axes to common and refer to axis_chargeVgen etc by their name attribute, not just # assuming the name is unchanged obs = self.datagroups.fit_axes pt_ax = "ptVgen" if "ptVgen" not in obs else "ptVgenAlt" - syst_axes = [pt_ax, "vars"] - syst_ax_labels = ["PtV", "var"] - format_with_values = ["edges", "center"] + if self.minnlo_from_corr: + scale_hist = "qcdScaleByHelicity" - group_name = f"QCDscale{self.sample_label(sample_group)}" - base_name = f"{group_name}{extra_name}" + syst_axes = ["vars"] + syst_ax_labels = ["var"] - skip_entries = [] - preop_map = {} - - # skip nominal - skip_entries.append({"vars": "nominal"}) + skip_entries = [] + # skip nominal + skip_entries.append({"vars": "nominal"}) + # skip pythia shower and kt variations since they are handled elsewhere + skip_entries.append({"vars": "pythia_shower_kt"}) - # skip pythia shower and kt variations since they are handled elsewhere - skip_entries.append({"vars": "pythia_shower_kt"}) + # All possible syst_axes + syst_axes = [pt_ax, *syst_axes] + syst_ax_labels = ["PtV", *syst_ax_labels] + format_with_values = ["edges", "center"] - if helicities_to_exclude: - for helicity in helicities_to_exclude: - skip_entries.append({"vars": f"helicity_{helicity}_Down"}) - skip_entries.append({"vars": f"helicity_{helicity}_Up"}) + if helicities_to_exclude: + for helicity in helicities_to_exclude: + skip_entries.append({"vars": f"helicity_{helicity}_Down"}) + skip_entries.append({"vars": f"helicity_{helicity}_Up"}) + else: + scale_hist = "qcdScale" # NOTE: The map needs to be keyed on the base procs not the group names, which is # admittedly a bit nasty @@ -347,9 +350,6 @@ def add_minnlo_scale_uncertainty( logger.debug(f"using {scale_hist} histogram for QCD scale systematics") logger.debug(f"expanded_samples: {expanded_samples}") - preop_map = {} - preop_args = {} - if pt_binned: signal_samples = self.datagroups.procGroups["signal_samples"] binning = np.array(rebin_pt) if rebin_pt else None @@ -382,29 +382,34 @@ def add_minnlo_scale_uncertainty( pt_idx = np.argmax(binning >= pt_min) skip_entries.extend([{pt_ax: complex(0, x)} for x in binning[:pt_idx]]) - func = ( + preop = ( syst_tools.gen_hist_to_variations if pt_ax == "ptVgenAlt" else syst_tools.hist_to_variations ) - preop_map = {proc: func for proc in expanded_samples} - preop_args["gen_axes"] = [pt_ax] - preop_args["rebin_axes"] = [pt_ax] - preop_args["rebin_edges"] = [binning] + + preop_args = { + "gen_axes": [pt_ax], + "rebin_axes": [pt_ax], + "rebin_edges": [binning], + } if pt_ax == "ptVgenAlt": preop_args["gen_obs"] = ["ptVgen"] + else: + preop = None + preop_args = {} # Skip MiNNLO unc. - if self.resumUnc and not (pt_binned or helicity): + if self.resumUnc and not (pt_binned or "Helicity" in self.minnlo_unc): logger.warning( "Without pT or helicity splitting, only the SCETlib uncertainty will be applied!" ) - else: + elif self.minnlo_from_corr: # FIXME Maybe put W and Z nuisances in the same group group_name += f"MiNNLO" self.datagroups.addSystematic( scale_hist, - preOpMap=preop_map, + preOpMap=preop, preOpArgs=preop_args, symmetrize=symmetrize, processes=[sample_group], @@ -425,6 +430,40 @@ def add_minnlo_scale_uncertainty( scale=scale, name=base_name, # Needed to allow it to be called multiple times ) + else: + # vary muR or muF one at a time + for var, other in [["muRfact", "muFfact"], ["muFfact", "muRfact"]]: + group_name += f"MiNNLO" + self.datagroups.addSystematic( + scale_hist, + preOp=lambda h, *args, **kwargs: ( + preop(h[{other: 1}], *args, **kwargs) + if preop + else h[{other: 1}] + ), + preOpArgs=preop_args, + symmetrize=symmetrize, + processes=[sample_group], + groups=[ + group_name, + "QCDscale", + "angularCoeffs", + "theory", + "theory_qcd", + ], + splitGroup={}, + systAxes=[var], + systNameReplace=[ + [f"{var}0p5", f"{var}Down"], + [f"{var}2", f"{var}Up"], + ], + skipEntries=[{var: 1}], + baseName=base_name + "_", + formatWithValue=["center"], + passToFakes=self.propagate_to_fakes, + scale=scale, + name=base_name, # Needed to allow it to be called multiple times + ) def add_helicity_shower_kt_uncertainty(self): # select the proper variation and project over gen pt unless it is one of the fit variables diff --git a/wremnants/postprocessing/syst_tools.py b/wremnants/postprocessing/syst_tools.py index 13ad732a0..a76336869 100644 --- a/wremnants/postprocessing/syst_tools.py +++ b/wremnants/postprocessing/syst_tools.py @@ -902,6 +902,9 @@ def hist_to_variations( # all the axes have already been projected out, nothing else to do return hist_in + if "vars" not in hist_in.axes.name: + return hist_in + nom_hist = hist_in[{"vars": 0}] nom_hist_sum = nom_hist[gen_sum_expr] From c86de652184e2b36ca77033e23ccc57d21dff4ca Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Wed, 8 Apr 2026 17:41:08 -0400 Subject: [PATCH 19/23] Fixes to previous commits --- .github/workflows/main.yml | 6 +- rabbit | 2 +- .../unfolding/inclusive_xsec_summary.py | 292 +++++++++++++++++- scripts/histmakers/mw_with_mu_eta_pt.py | 36 +-- scripts/histmakers/mz_dilepton.py | 3 +- .../postprocessing/rabbit_theory_helper.py | 2 +- 6 files changed, 311 insertions(+), 30 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1f961f482..4860a9002 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -996,7 +996,7 @@ jobs: mkdir -p $ALPHAS_OUTDIR scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/rabbit/setupRabbit.py \ -i $ALPHAS_HIST_FILE --fitvar "$ALPHAS_FITVAR" --lumiScale $LUMI_SCALE --realData \ - -o $ALPHAS_OUTDIR --noi alphaS --npUnc LatticeEigvars --pdfUncFromCorr + -o $ALPHAS_OUTDIR --noi alphaS --npUnc LatticeEigvars - name: alphaS Z reco fit (${{ matrix.mode }}) run: >- @@ -1418,7 +1418,7 @@ jobs: --fitvar "ptll-yll-cosThetaStarll_quantile-phiStarll_quantile" "eta-pt-charge" \ --lumiScale $LUMI_SCALE $LUMI_SCALE \ -o $ALPHAS_OUTDIR --noi alphaS wmass --postfix reco \ - --npUnc LatticeEigvars --pdfUncFromCorr + --npUnc LatticeEigvars - name: alphaS WZ reco fit run: >- @@ -1467,7 +1467,7 @@ jobs: --genAxes "ptVGen-absYVGen-helicitySig" \ --scaleNormXsecHistYields "0.05" --allowNegativeExpectation \ --realData --systematicType normal --postfix unfolding \ - --npUnc LatticeEigvars --pdfUncFromCorr \ + --npUnc LatticeEigvars \ --rebin 2 2 # this is for CI only! needed for max_files=100 - name: alphaS Z unfolding fit diff --git a/rabbit b/rabbit index 6b08932c2..68240c738 160000 --- a/rabbit +++ b/rabbit @@ -1 +1 @@ -Subproject commit 6b08932c24cd447ca7340a80195c29c2de0546d2 +Subproject commit 68240c7388c9a64ab3ad829bd548cfffe5ff9956 diff --git a/scripts/analysisTools/unfolding/inclusive_xsec_summary.py b/scripts/analysisTools/unfolding/inclusive_xsec_summary.py index a5ed5e5c3..8e6571ec9 100644 --- a/scripts/analysisTools/unfolding/inclusive_xsec_summary.py +++ b/scripts/analysisTools/unfolding/inclusive_xsec_summary.py @@ -48,6 +48,15 @@ default=False, help="include data points from SMP-20-004 (JHEP04(2025)162)", ) +parser.add_argument( + "--theoryFiles", + type=str, + nargs="*", + default=[], + help="Theory prediction files in LABEL:filepath format. " + "Supports DYTURBO 1D txt files and NNLOjet cross.dat files. " + "Multiple files with the same label are combined into one prediction set.", +) args = parser.parse_args() logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) @@ -145,6 +154,223 @@ r"$\mathrm{W/Z}$": [10.491, 0.0864002314811714], } +# Map xsec_key display names to internal process keys +_name_to_proc = { + r"$\mathrm{W}^{-}$": "wm", + r"$\mathrm{W}^{+}$": "wp", + r"$\mathrm{W}$": "w", + r"$\mathrm{Z}$": "z", + r"$\mathrm{W}^{+}/\mathrm{W}^{-}$": "wp_wm", + r"$\mathrm{W/Z}$": "wz", +} + +theory_colors = { + "DYTURBO": "#ff7f0e", + "NNLOjet": "#1f77b4", + "SCETlib+DYTurbo": "#9467bd", +} + + +def _detect_process(path): + """Detect W-/W+/Z from file path (filename or parent directories).""" + parts = path.replace("\\", "/").split("/") + for part in reversed(parts): + p = part.lower() + if p.startswith("results_wm") or p.startswith("wm") or "wminus" in p: + return "wm" + if p.startswith("results_wp") or p.startswith("wp") or "wplus" in p: + return "wp" + if ( + (p.startswith("results_z") or p.startswith("z")) + and "wm" not in p + and "wp" not in p + ): + return "z" + raise ValueError(f"Cannot detect process (z/wm/wp) from path: {path}") + + +def _parse_dyturbo_1d(path): + """Read total cross section from DYTURBO 1D rapidity file (last summary line). + Returns (central_fb, unc_up_fb, unc_dn_fb); no scale variations in 1D files.""" + last = None + with open(path) as f: + for line in f: + s = line.strip() + if s and not s.startswith("#"): + last = s + cols = last.split() + return float(cols[2]), 0.0, 0.0 + + +def _parse_nnlojet_cross(path): + """Parse NNLOjet cross.dat and return (central_fb, unc_up_fb, unc_dn_fb). + Scale uncertainty is symmetric (quadrature of muR and muF half-ranges).""" + labels, data = None, None + with open(path) as f: + for line in f: + s = line.strip() + if s.startswith("#labels:"): + labels = s[len("#labels:") :].split() + elif not s.startswith("#") and s: + data = np.array(s.split(), dtype=float) + col = {lbl.split("[")[0]: i for i, lbl in enumerate(labels)} + vals = np.array([data[col[f"tot_scale0{i+1}"]] for i in range(7)]) + central = vals[3] # scale04 = (muR=1, muF=1) + delta_muR = (vals[5] - vals[1]) / 2 # (scale06 - scale02) / 2 + delta_muF = (vals[2] - vals[4]) / 2 # (scale03 - scale05) / 2 + unc = np.sqrt(delta_muR**2 + delta_muF**2) + return central, unc, unc + + +def _parse_matrix_radish(path): + """Parse MATRIX+RadISH pT distribution file and return (central_fb, unc_up_fb, unc_dn_fb). + Columns: bin_idx, central, stat, scale_down, stat, scale_up, stat. + Total cross section = sum of bins × bin width (1 GeV). Last row is a duplicate, skipped. + """ + data = np.loadtxt(path) + bins = data[:-1] # drop last duplicate row + bin_width = bins[1, 0] - bins[0, 0] # should be 1 GeV + central = np.sum(bins[:, 1]) * bin_width + down = np.sum(bins[:, 3]) * bin_width + up = np.sum(bins[:, 5]) * bin_width + return central, up - central, central - down + + +def _parse_theory_file(path): + with open(path) as f: + first = f.readline().strip() + if first.startswith("#labels: tot_scale01"): + return _parse_nnlojet_cross(path) + elif first.startswith("#ylo yhi PDF0"): + return _parse_dyturbo_1d(path) + elif not first.startswith("#"): + # No header: assume MATRIX_RadISH 7-column pT distribution + cols = first.split() + if len(cols) == 7: + return _parse_matrix_radish(path) + raise ValueError(f"Unknown theory file format: {path}") + + +_SCETLIB_SCALE_VARS = [ + "mufdown", + "mufup", + "kappaFO0.5-kappaf2.", + "kappaFO2.-kappaf0.5", + "mufdown-kappaFO0.5-kappaf2.", + "mufup-kappaFO2.-kappaf0.5", +] + + +def _parse_scetlib_pkl(path): + """Parse a SCETlib+DYTurbo pkl.lz4 correction file. + + Returns dict: proc -> (central_pb, unc_up_pb, unc_dn_pb). + CorrZ files produce {'z': ...}, CorrW files produce {'wm': ..., 'wp': ...}. + Scale uncertainty = asymmetric quadrature of the 6 scale variation deviations. + """ + import pickle + + import lz4.frame + + with lz4.frame.open(path, "rb") as f: + data = pickle.load(f) + + results = {} + for boson_key in [k for k in data if k in ("Z", "W")]: + sub = data[boson_key] + hist_key = next(k for k in sub if k.endswith("_hist")) + h = sub[hist_key] + vars_list = list(h.axes["vars"]) + vals = h.view(flow=True)["value"] + # Axes: Q(underflow+1data+overflow), absY(no underflow, +overflow), + # qT(underflow+data+overflow), charge(no flow), vars(no underflow, +overflow) + # Q data bin is always index 1 in the flow array. + q_slice = 1 + + def _sum_proc(charge_idx, var_idx): + return float(np.sum(vals[q_slice, :, :, charge_idx, var_idx])) + + pdf0_idx = vars_list.index("pdf0") + + if boson_key == "Z": + central = _sum_proc(0, pdf0_idx) + deltas = np.array( + [ + _sum_proc(0, vars_list.index(sv)) - central + for sv in _SCETLIB_SCALE_VARS + if sv in vars_list + ] + ) + unc_up = float(np.sqrt(np.sum(np.where(deltas > 0, deltas, 0) ** 2))) + unc_dn = float(np.sqrt(np.sum(np.where(deltas < 0, deltas, 0) ** 2))) + results["z"] = (central, unc_up, unc_dn) + elif boson_key == "W": + # charge axis: index 0 = W- (charge < 0), index 1 = W+ + for ci, proc in [(0, "wm"), (1, "wp")]: + central = _sum_proc(ci, pdf0_idx) + deltas = np.array( + [ + _sum_proc(ci, vars_list.index(sv)) - central + for sv in _SCETLIB_SCALE_VARS + if sv in vars_list + ] + ) + unc_up = float(np.sqrt(np.sum(np.where(deltas > 0, deltas, 0) ** 2))) + unc_dn = float(np.sqrt(np.sum(np.where(deltas < 0, deltas, 0) ** 2))) + results[proc] = (central, unc_up, unc_dn) + + return results + + +# Build theory_predictions: label -> proc -> (central_pb, unc_up_pb, unc_dn_pb) +theory_predictions = {} +for tf in args.theoryFiles: + label, path = tf.split(":", 1) + if path.endswith(".pkl.lz4"): + # SCETlib+DYTurbo: one file may contain multiple processes + multi = _parse_scetlib_pkl(path) + for proc, (c, u, d) in multi.items(): + theory_predictions.setdefault(label, {})[proc] = (c, u, d) + else: + proc = _detect_process(path) + central_fb, unc_up_fb, unc_dn_fb = _parse_theory_file(path) + theory_predictions.setdefault(label, {})[proc] = ( + central_fb / 1e3, + unc_up_fb / 1e3, + unc_dn_fb / 1e3, + ) + +# Compute derived quantities (W, W+/W-, W/Z) from per-process values +for preds in theory_predictions.values(): + if "wm" in preds and "wp" in preds: + wm, wm_u, wm_d = preds["wm"] + wp, wp_u, wp_d = preds["wp"] + preds["w"] = (wm + wp, np.sqrt(wm_u**2 + wp_u**2), np.sqrt(wm_d**2 + wp_d**2)) + r = wp / wm + preds["wp_wm"] = ( + r, + ( + r * np.sqrt((wp_u / wp) ** 2 + (wm_u / wm) ** 2) + if wp > 0 and wm > 0 + else 0.0 + ), + ( + r * np.sqrt((wp_d / wp) ** 2 + (wm_d / wm) ** 2) + if wp > 0 and wm > 0 + else 0.0 + ), + ) + if "w" in preds and "z" in preds: + w, w_u, w_d = preds["w"] + z, z_u, z_d = preds["z"] + r = w / z + preds["wz"] = ( + r, + r * np.sqrt((w_u / w) ** 2 + (z_u / z) ** 2) if w > 0 and z > 0 else 0.0, + r * np.sqrt((w_d / w) ** 2 + (z_d / z) ** 2) if w > 0 and z > 0 else 0.0, + ) + +nTheory = len(theory_predictions) nMeas = 1 lumi = meta["meta_info_input"]["channel_info"]["ch0"]["lumi"] @@ -365,11 +591,12 @@ ax = fig.add_subplot(111) # x axis range -lo, hi = 0.97, 1.085 +lo, hi = 0.97, 1.115 # totals = [] # stats = [] norms = [] +_theory_labels_added = set() for i, name in enumerate(names[::-1]): df_g = df.loc[df["name"] == name] @@ -395,13 +622,14 @@ # ax.errorbar([prefit], [i+0.5], xerr=prefit_err, color="red", marker="o", label="Prefit" if i ==0 else None) + nSlots = nPDFs + nTheory + nMeas j = 0 for j, pdf_name in enumerate(all_1d_pdf_names): pdf_value = df_g[pdf_name].values[0] / norm pdf_error = df_g[f"{pdf_name}_error"].values[0] / norm ax.errorbar( [pdf_value], - [i + 1 - (j + 1) / (nPDFs + nMeas)], + [i + 1 - (j + 1) / nSlots], xerr=pdf_error, color=pdf_colors[pdf_name], marker="o", @@ -411,7 +639,7 @@ pdf_error_pdf = df_g[f"{pdf_name}_pdf"].values[0] / norm ax.errorbar( [pdf_value], - [i + 1 - (j + 1) / (nPDFs + nMeas)], + [i + 1 - (j + 1) / nSlots], xerr=pdf_error_pdf, color=pdf_colors[pdf_name], capsize=5, @@ -419,6 +647,28 @@ marker="o", ) + proc = _name_to_proc.get(name) + for k, (theo_label, preds) in enumerate(theory_predictions.items()): + if proc not in preds: + continue + central_pb, unc_up_pb, unc_dn_pb = preds[proc] + color = next( + (v for key, v in theory_colors.items() if theo_label.startswith(key)), + f"C{k + 5}", + ) + legend_label = theo_label if theo_label not in _theory_labels_added else None + if legend_label is not None: + _theory_labels_added.add(theo_label) + has_unc = unc_up_pb > 0 or unc_dn_pb > 0 + ax.errorbar( + [central_pb / norm], + [i + 1 - (nPDFs + k + 1) / nSlots], + xerr=[[unc_dn_pb / norm], [unc_up_pb / norm]] if has_unc else None, + color=color, + marker="^", + label=legend_label, + ) + # cross sections from SMP-20-004 if args.includeSMP20004: ax.errorbar( @@ -536,10 +786,44 @@ outname += f"_{args.postfix}" plot_tools.save_pdf_and_png(outdir, outname) +xsec_summary = {} +for name, mappings, channel, selection in xsec_keys: + df_g = df.loc[df["name"] == name] + value = df_g["value"].values[0] + total = df_g.loc[df_g["label"] == "Total"]["impact"].values[0] + stat = df_g.loc[df_g["label"] == "stat"]["impact"].values[0] + + entry = { + "value": value, + "stat": stat, + "total": total, + } + + for pdf_name in all_1d_pdf_names: + if pdf_name in df_g.columns: + pdf_entry = { + "value": df_g[pdf_name].values[0], + "total": df_g[f"{pdf_name}_error"].values[0], + } + if f"{pdf_name}_pdf" in df_g.columns: + pdf_entry["pdf"] = df_g[f"{pdf_name}_pdf"].values[0] + entry[pdf_name] = pdf_entry + + if args.includeSMP20004: + entry["JHEP04(2025)162"] = { + "value": smp_20_004[name][0], + "total": smp_20_004[name][1], + } + + xsec_summary[name] = entry + output_tools.write_index_and_log( outdir, outname, - analysis_meta_info={"CombinetfOutput": meta["meta_info"]}, + analysis_meta_info={ + "CombinetfOutput": meta["meta_info"], + "xsec_summary": xsec_summary, + }, args=args, ) diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 86bd6551c..41a8bbfc9 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -727,10 +727,8 @@ def build_graph(df, dataset): isW = dataset.group in ["Wmunu", "Wtaunu"] isBSM = dataset.name.startswith("WtoNMu") isWmunu = isBSM or dataset.group in ["Wmunu"] - isZ = dataset.group in [ - "Zmumu", - "Ztautau", - ] + isZ = dataset.group in ["Zmumu", "Ztautau"] + isZmumu = dataset.group in ["Zmumu"] isZveto = isZ or dataset.group in ["DYlowMass"] isWorZ = isW or isZ isTop = dataset.group == "Top" @@ -740,7 +738,7 @@ def build_graph(df, dataset): hist.storage.Double() ) # turn off sum weight square for systematic histograms - if dataset.name[0] in helicity_smoothing_helpers_procs.keys(): + if isWorZ and dataset.name[0] in helicity_smoothing_helpers_procs.keys(): helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] else: helicity_smoothing_helpers = {} @@ -931,21 +929,19 @@ def build_graph(df, dataset): cols = [*nominal_cols, *unfolding_cols[level]] break - elif dataset.name == "Zmumu_2016PostVFP": - if args.unfolding and dataset.name == "Zmumu_2016PostVFP": - df = unfolder_z.add_gen_histograms( - args, df, results, dataset, corr_helpers, helicity_smoothing_helpers - ) - - if not unfolder_z.poi_as_noi: - axes = [ - *nominal_axes, - *unfolder_z.unfolding_axes[unfolder_z.unfolding_levels[-1]], - ] - cols = [ - *nominal_cols, - *unfolder_z.unfolding_cols[unfolder_z.unfolding_levels[-1]], - ] + elif isZmumu: + df = unfolder_z.add_gen_histograms( + args, df, results, dataset, corr_helpers, helicity_smoothing_helpers + ) + if not unfolder_z.poi_as_noi: + axes = [ + *nominal_axes, + *unfolder_z.unfolding_axes[unfolder_z.unfolding_levels[-1]], + ] + cols = [ + *nominal_cols, + *unfolder_z.unfolding_cols[unfolder_z.unfolding_levels[-1]], + ] if isWorZ: df = generator_level_definitions.define_prefsr_vars(df) diff --git a/scripts/histmakers/mz_dilepton.py b/scripts/histmakers/mz_dilepton.py index d04cf7974..215a74bf8 100644 --- a/scripts/histmakers/mz_dilepton.py +++ b/scripts/histmakers/mz_dilepton.py @@ -538,7 +538,7 @@ def build_graph(df, dataset): isZ = dataset.name in samples.zprocs isWorZ = isW or isZ - if dataset.name[0] in helicity_smoothing_helpers_procs.keys(): + if isWorZ and dataset.name[0] in helicity_smoothing_helpers_procs.keys(): helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] else: helicity_smoothing_helpers = {} @@ -583,6 +583,7 @@ def build_graph(df, dataset): cols = [*cols, "run"] if args.unfolding and dataset.group == "Zmumu": + print(f"name = {dataset.name}; group = {dataset.group}") df = unfolder_z.add_gen_histograms( args, df, diff --git a/wremnants/postprocessing/rabbit_theory_helper.py b/wremnants/postprocessing/rabbit_theory_helper.py index f0ec8e14d..6d1dd3f8c 100644 --- a/wremnants/postprocessing/rabbit_theory_helper.py +++ b/wremnants/postprocessing/rabbit_theory_helper.py @@ -409,7 +409,7 @@ def add_minnlo_scale_uncertainty( group_name += f"MiNNLO" self.datagroups.addSystematic( scale_hist, - preOpMap=preop, + preOp=preop, preOpArgs=preop_args, symmetrize=symmetrize, processes=[sample_group], From 93c8e0532958dff985ea75b1a532dc0a475aeb45 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Sat, 11 Apr 2026 17:28:41 -0400 Subject: [PATCH 20/23] Replace overflow with np.inf upper edge on ABCD mt/relIso axes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last bin of each axis now explicitly captures [edge, ∞) so rabbit can include it in the fit without relying on histogram overflow. - binning.get_binning_fakes_mt/relIso: append np.inf as last edge - binning.axis_relIsoCat: [0, 0.15, 0.3, inf], overflow=False - mw histmaker axis_mtCat, axis_isoCat: overflow=False - mz_wlike histmaker: replace large-number proxies (1000, 100) with np.inf - histselections: cap regressor x_max at last finite mt edge Co-Authored-By: Claude Sonnet 4.6 --- scripts/histmakers/mw_with_mu_eta_pt.py | 4 ++-- scripts/histmakers/mz_wlike_with_mu_eta_pt.py | 7 ++----- wremnants/postprocessing/histselections.py | 6 ++++-- wremnants/utilities/binning.py | 6 +++++- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 41a8bbfc9..0d8415083 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -312,13 +312,13 @@ ), name="mt", underflow=False, - overflow=True, + overflow=False, ) axis_isoCat = hist.axis.Variable( binning.get_binning_fakes_relIso(high_iso_bins=False), name="relIso", underflow=False, - overflow=True, + overflow=False, ) axes_abcd = [axis_mtCat, axis_isoCat] diff --git a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py index 88bcfaaf2..ef3043439 100644 --- a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py +++ b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py @@ -232,17 +232,14 @@ ) # for isoMt region validation and related tests -# use very high upper edge as a proxy for infinity (cannot exploit overflow bins in the fit) -# can probably use numpy infinity, but this is compatible with the root conversion -# FIXME: now we can probably use overflow bins in the fit axis_mtCat = hist.axis.Variable( - [0, int(args.mtCut / 2.0), args.mtCut, 1000], + [0, int(args.mtCut / 2.0), args.mtCut, np.inf], name="mt", underflow=False, overflow=False, ) axis_isoCat = hist.axis.Variable( - [0, 0.15, 0.3, 100], name="relIso", underflow=False, overflow=False + [0, 0.15, 0.3, np.inf], name="relIso", underflow=False, overflow=False ) if args.addIsoMtAxes: diff --git a/wremnants/postprocessing/histselections.py b/wremnants/postprocessing/histselections.py index 117e9a3a5..9258e371f 100644 --- a/wremnants/postprocessing/histselections.py +++ b/wremnants/postprocessing/histselections.py @@ -1552,13 +1552,15 @@ def __init__( # min and max (in application region) for transformation of bernstain polynomials into interval [0,1] axis_x_min = h[{self.name_x: self.sel_x}].axes[self.name_x].edges[0] if self.name_x == "mt": - # mt does not have an upper bound, cap at 100 + # mt may extend to infinity; use the last finite edge for the regressor edges = ( self.rebin_x if self.rebin_x is not None else h.axes[self.name_x].edges ) - axis_x_max = extend_edges(h.axes[self.name_x].traits, edges)[-1] + all_edges = extend_edges(h.axes[self.name_x].traits, edges) + finite_edges = all_edges[np.isfinite(all_edges)] + axis_x_max = finite_edges[-1] if len(finite_edges) > 0 else 100.0 elif self.name_x in ["iso", "relIso", "relJetLeptonDiff", "dxy"]: # iso and dxy have a finite lower and upper bound in the application region axis_x_max = self.abcd_thresholds[self.name_x][1] diff --git a/wremnants/utilities/binning.py b/wremnants/utilities/binning.py index 470e8b31a..cde0eb72a 100644 --- a/wremnants/utilities/binning.py +++ b/wremnants/utilities/binning.py @@ -538,7 +538,7 @@ def make_bw_binning( # axes with only a few bins for beyond simple ABCD methods axis_isoCat = hist.axis.Variable([0, 4, 8], name="iso", underflow=False, overflow=True) axis_relIsoCat = hist.axis.Variable( - [0, 0.15, 0.3], name="relIso", underflow=False, overflow=True + [0, 0.15, 0.3, np.inf], name="relIso", underflow=False, overflow=False ) @@ -580,6 +580,8 @@ def get_binning_fakes_mt(mt_cut=40, high_mt_bins=False, fine_mt_binning=False): edges = np.append( edges, np.linspace(mt_cut + step, end, int((end - mt_cut) / step)) ) + # last bin captures everything above the highest finite edge (no overflow needed) + edges = np.append(edges, np.inf) return edges @@ -588,6 +590,8 @@ def get_binning_fakes_relIso(high_iso_bins=False): if high_iso_bins: # needed for extended 2D method edges.append(0.3) + # last bin captures everything above the highest finite edge (no overflow needed) + edges.append(np.inf) return edges From f68088fe0d7dff6b33e24a9dbaa4ad9b00c75e82 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Sun, 12 Apr 2026 13:20:03 -0400 Subject: [PATCH 21/23] Fix five bugs identified in code review - rebin_pt: recompute new_edges after trimming input when odd-edge branch fires - systematics.add_syst_hist: fix mutable default tensor_axes=[] -> None - generator_level_definitions: define postfsrPTV for wlike mode (was missing, causing RDF crash when unfolding aliases postfsrV_pt <- postfsrPTV) - unfolding_tools.UnfolderZ: use self.unfolding_levels[0] explicitly instead of relying on loop-variable persistence for the fitresult reweighting block - theory_fit_writer: raise ValueError (not silent None) when required ptV/absYV axes not found; avoid calling axis lookup twice in set_reference Co-Authored-By: Claude Sonnet 4.6 --- wremnants/postprocessing/theory_fit_writer.py | 14 ++++++++------ .../production/generator_level_definitions.py | 8 ++++---- wremnants/production/systematics.py | 6 ++++-- wremnants/production/unfolding_tools.py | 6 +++++- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/wremnants/postprocessing/theory_fit_writer.py b/wremnants/postprocessing/theory_fit_writer.py index fb32781dc..c34b41f01 100644 --- a/wremnants/postprocessing/theory_fit_writer.py +++ b/wremnants/postprocessing/theory_fit_writer.py @@ -37,16 +37,18 @@ def _keep_systematic(self, name): return True def set_reference(self, channel, h, lumi=1.0, scale=1.0, postOp=None): + ptV_name = self.get_ptV_axis_name(h) + absYV_name = self.get_absYV_axis_name(h) self.ref[channel] = { "h": h, "lumi": lumi, "scale": scale, "postOp": postOp, - "ptV_name": self.get_ptV_axis_name(h), - "absYV_name": self.get_absYV_axis_name(h), + "ptV_name": ptV_name, + "absYV_name": absYV_name, "chargeV_name": self.get_charge_axis_name(h), - "ptV_bins": h.axes[self.get_ptV_axis_name(h)].edges, - "absYV_bins": h.axes[self.get_absYV_axis_name(h)].edges, + "ptV_bins": h.axes[ptV_name].edges, + "absYV_bins": h.axes[absYV_name].edges, } self.logger.debug("Initialized channel %s with parameters", channel) self.logger.debug(pprint.pformat(self.ref[channel])) @@ -264,13 +266,13 @@ def get_ptV_axis_name(self, h): for name in ["ptVgen", "ptVGen", "qT"]: if name in h.axes.name: return name - self.logger.debug("Did not find pT axis. Available axes: %s", h.axes.name) + raise ValueError(f"Did not find pT axis. Available axes: {list(h.axes.name)}") def get_absYV_axis_name(self, h): for name in ["absYVgen", "absYVGen", "absY"]: if name in h.axes.name: return name - self.logger.debug("Did not find absY axis. Available axes: %s", h.axes.name) + raise ValueError(f"Did not find absY axis. Available axes: {list(h.axes.name)}") def get_charge_axis_name(self, h): for name in ["chargeVgen", "charge", "qGen"]: diff --git a/wremnants/production/generator_level_definitions.py b/wremnants/production/generator_level_definitions.py index 22101262b..4676c2658 100644 --- a/wremnants/production/generator_level_definitions.py +++ b/wremnants/production/generator_level_definitions.py @@ -402,10 +402,10 @@ def define_postfsr_vars(df, mode=None): else: df = df.Alias("postfsrMET_pt", "MET_fiducialGenPt") df = df.Alias("postfsrMET_phi", "MET_fiducialGenPhi") - df = df.Define( - "postfsrPTV", - "wrem::pt_2(postfsrLep_pt, postfsrLep_phi, postfsrMET_pt, postfsrMET_phi)", - ) + df = df.Define( + "postfsrPTV", + "wrem::pt_2(postfsrLep_pt, postfsrLep_phi, postfsrMET_pt, postfsrMET_phi)", + ) df = df.Define( "postfsrMT", diff --git a/wremnants/production/systematics.py b/wremnants/production/systematics.py index 4eeaa96da..e16d1a251 100644 --- a/wremnants/production/systematics.py +++ b/wremnants/production/systematics.py @@ -31,7 +31,7 @@ def add_syst_hist( axes, cols, tensor_name=None, - tensor_axes=[], + tensor_axes=None, addhelicity=False, propagateToHelicity=False, nhelicity=6, @@ -53,7 +53,9 @@ def add_syst_hist( 2) templates corresponding to each helicity cross section differential in relevant quantities that are sensitive to polarization (e.g. cos(theta*), phi*, lepton eta) -> Reweight event by event via "helWeight_tensor" """ - if not isinstance(tensor_axes, (list, tuple)): + if tensor_axes is None: + tensor_axes = [] + elif not isinstance(tensor_axes, (list, tuple)): tensor_axes = [tensor_axes] if addhelicity: if len(tensor_axes) == 0: diff --git a/wremnants/production/unfolding_tools.py b/wremnants/production/unfolding_tools.py index 25d116165..1aaa0a5ad 100644 --- a/wremnants/production/unfolding_tools.py +++ b/wremnants/production/unfolding_tools.py @@ -345,6 +345,7 @@ def rebin_pt(edges): if len(new_edges) % 2: # in case it's an odd number of edges, last two bins are overflow edges = edges[:-1] + new_edges = np.array([*edges[:2], *edges[3::2]]) return new_edges @@ -475,9 +476,12 @@ def add_gen_histograms( self.unfolding_corr_helper, [*self.unfolding_corr_helper.hist.axes.name[:-1], "unity"], ) + # fitresult reweighting only supported for a single unfolding level + # (enforced in __init__); use index 0 explicitly rather than relying + # on loop-variable persistence from the select_fiducial_space loop above df = df.Define( "central_weight", - f"{level}_acceptance ? unfoldingWeight_tensor(0) : unity", + f"{self.unfolding_levels[0]}_acceptance ? unfoldingWeight_tensor(0) : unity", ) for level in self.unfolding_levels: From 75a3d33369ac6e1b3eac70a454355b797e8eb378 Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Thu, 16 Apr 2026 11:03:30 -0400 Subject: [PATCH 22/23] Add support for simultaneous (extended)ABCD fit --- scripts/rabbit/regen_smoothing_params.py | 300 +++++++++++++++++++++ scripts/rabbit/setupRabbit.py | 83 +++--- wremnants/postprocessing/histselections.py | 26 +- 3 files changed, 375 insertions(+), 34 deletions(-) create mode 100644 scripts/rabbit/regen_smoothing_params.py diff --git a/scripts/rabbit/regen_smoothing_params.py b/scripts/rabbit/regen_smoothing_params.py new file mode 100644 index 000000000..ae99b0642 --- /dev/null +++ b/scripts/rabbit/regen_smoothing_params.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +"""Dump per-region smoothing polynomial coefficients for SmoothExtendedABCD. + +Refit the linearized smoothing polynomial from a nominal fake histogram and +save the per-region polynomial coefficients in the SmoothExtendedABCD +power-series basis. The output can be loaded as initial parameter values in a +rabbit fit via ``params:PATH`` in the ``--paramModel SmoothExtendedABCDIsoMT`` +CLI token. + +Typical use:: + + python scripts/rabbit/regen_smoothing_params.py \\ + -i mw_with_mu_eta_pt_scetlib_dyturbo.hdf5 \\ + -o /path/to/params.hdf5 +""" + +import os + +import h5py +import numpy as np + +from wremnants.postprocessing.datagroups.datagroups import Datagroups +from wremnants.postprocessing.histselections import FakeSelectorSimpleABCD +from wremnants.postprocessing.regression import Regressor +from wremnants.utilities import common, parsing +from wums import ioutils, logging, output_tools + +logger = logging.child_logger(__name__) + + +def make_parser(): + parser = parsing.base_parser() + parser.description = __doc__ + parser.add_argument( + "-i", + "--inputFile", + required=True, + type=str, + help="Input HDF5 histogram file (output of a histmaker).", + ) + parser.add_argument( + "-o", + "--outpath", + required=True, + type=str, + help="Output path for the params HDF5 file (.hdf5 appended if missing).", + ) + parser.add_argument( + "--inputBaseName", + default="nominal", + type=str, + help="Name of the nominal histogram inside the input file.", + ) + parser.add_argument( + "--fakerateAxes", + nargs="+", + default=["eta", "pt", "charge"], + help="Axes for the fakerate binning.", + ) + parser.add_argument( + "--fakeEstimation", + type=str, + default="extended1D", + choices=["simple", "extrapolate", "extended1D", "extended2D"], + help="Fake estimation mode (must match what will be used in setupRabbit).", + ) + parser.add_argument( + "--fakeSmoothingMode", + type=str, + default="full", + choices=FakeSelectorSimpleABCD.smoothing_modes, + help="Smoothing mode for fake estimate.", + ) + parser.add_argument( + "--fakeSmoothingOrder", + type=int, + default=3, + help="Polynomial order for the spectrum smoothing.", + ) + parser.add_argument( + "--fakeSmoothingPolynomial", + type=str, + default="chebyshev", + choices=Regressor.polynomials, + help="Polynomial type for the spectrum smoothing.", + ) + parser.add_argument( + "--excludeProcGroups", + type=str, + nargs="*", + default=["QCD"], + help="Process groups to exclude when building Datagroups.", + ) + parser.add_argument( + "--filterProcGroups", + type=str, + nargs="*", + default=None, + help="If set, keep only these process groups when building Datagroups.", + ) + return parser + + +def dump_smoothing_params( + outpath, fakeselector, datagroups, inputBaseName, meta_data_dict=None, postfix="" +): + """ + Re-fit the linearized smoothing polynomial for the nominal fake histogram + (without the region reduction step) and save the per-region polynomial + coefficients in the SmoothExtendedABCD power-series basis to *outpath*. + + The saved array has shape (5 * n_outer * (order+1),) with the layout + [A_params, B_params, C_params, Ax_params, Bx_params], matching the + internal layout expected by SmoothExtendedABCD as initial_params. + + Flat ABCD index ordering for the 5 regions with signal_region=False + (FakeSelector1DExtendedABCD, with flow=True, after y-axis flip so tight iso is last): + 0=Bx (low mt, pass iso), 1=Ax (low mt, fail iso), + 2=B (mid mt, pass iso), 3=A (mid mt, fail iso), + 4=C (signal mt, fail iso) ← application region, rabbit's free parameter + Note: signal_region=False drops flat index 5 = D (signal mt, pass iso = signal + region), which is rabbit's predicted region. + + Histselections ↔ SmoothExtendedABCD model name mapping: + histsel "application region" (signal mt + fail iso) = C_model (free) + histsel "signal region" (signal mt + pass iso) = D_model (predicted) + + Mapping to model order [A=0, B=1, C=2, Ax=3, Bx=4]: + model_A ← sideband flat 3 + model_B ← sideband flat 2 + model_C ← sideband flat 4 + model_Ax ← sideband flat 1 + model_Bx ← sideband flat 0 + """ + g = datagroups.fakeName + # Build the combined fake histogram (data - prompt MC) the same way the normal + # histogram loading does, but without applying the histselector. + datagroups.loadHistsForDatagroups( + inputBaseName, syst="", procsToRead=[g], label="_dump_tmp", applySelection=False + ) + h_fakes = datagroups.groups[g].hists["_dump_tmp"] + + # Single call with signal_region=False: returns 5 regions [Bx=0, Ax=1, B=2, A=3, C=4] + # The dropped 6th flat element (D = signal mt + pass iso) is the predicted region. + fakeselector.calculate_fullABCD_smoothed(h_fakes, signal_region=False) + if not hasattr(fakeselector, "_params_before_reduce"): + raise RuntimeError( + "dump_smoothing_params: _params_before_reduce not found on fakeselector. " + "Ensure fakeSmoothingMode='full' is used." + ) + # Shape: (*outer_dims, 5, order+1) — indices [Bx=0, Ax=1, B=2, A=3, C=4] + params_5d = fakeselector._params_before_reduce.copy() + + reg = fakeselector.spectrum_regressor + order = reg.order + + # Flatten outer dims → (n_outer_flat, n_abcd, order+1) + outer_shape = params_5d.shape[:-2] + n_outer_flat = int(np.prod(outer_shape)) + params_5d_flat = params_5d.reshape(n_outer_flat, 5, order + 1) + + # Assemble the 5 model regions in model order [A, B, C, Ax, Bx] + # A ← flat 3, B ← flat 2, C ← flat 4, Ax ← flat 1, Bx ← flat 0 + params_model = np.stack( + [ + params_5d_flat[:, 2, :], # B (mid mt, pass iso) + params_5d_flat[:, 3, :], # A (mid mt, fail iso) + params_5d_flat[:, 4, :], # C (signal mt, fail iso) = application region + params_5d_flat[:, 0, :], # Bx (low mt, pass iso) + params_5d_flat[:, 1, :], # Ax (low mt, fail iso) + ], + axis=1, + ) # (n_outer, 5, order+1) + + # The regressor was fit with polynomial=chebyshev, normalised x to [-1,1] + # via x_cheby = 2*(x - center)/(max-min). + # The SmoothExtendedABCD model uses a power-series basis normalised to [0,1] + # via x_norm = (x - x_min)/(x_max - x_min). + # Conversion: evaluate the Chebyshev polynomial at the model's x_norm points + # and refit with the power-series Vandermonde. + smooth_ax = h_fakes.axes[fakeselector.smoothing_axis_name] + x_centers = np.array(smooth_ax.centers) + bin_widths = np.array(smooth_ax.widths) + x_min_model = float(x_centers.min()) + x_max_model = float(x_centers.max()) + x_norm = ( + (x_centers - x_min_model) / (x_max_model - x_min_model) + if x_max_model > x_min_model + else np.zeros_like(x_centers) + ) + + # Transform x_centers to the regressor's Chebyshev domain [-1, 1] + x_cheby = reg.transform_x(x_centers) # shape (n_smooth,) + + # Evaluate Chebyshev polynomials T_k at x_cheby: shape (n_smooth, order+1) + n_smooth = len(x_cheby) + T = np.zeros((n_smooth, order + 1)) + T[:, 0] = 1.0 + if order >= 1: + T[:, 1] = x_cheby + for k in range(2, order + 1): + T[:, k] = 2.0 * x_cheby * T[:, k - 1] - T[:, k - 2] + + # Evaluate regressor log-rate at each pt bin: (n_outer, 5, n_smooth) + log_rate = np.einsum("oak,sk->oas", params_model, T) + + # The regressor fits log(data_yield / bin_width). + # The model (with mc = ones) predicts yield = exp(-q), so: + # q(x) = -log(data_yield) = -(log_rate + log(bin_width)) + log_yield = log_rate + np.log(bin_widths) # broadcast over (n_outer, 5, n_smooth) + target = -log_yield # q values to represent as a power series + + # Build power-series Vandermonde for x_norm: shape (n_smooth, order+1) + V = np.column_stack([x_norm**k for k in range(order + 1)]) + + # Solve V @ q_coeffs = target for each (outer, abcd) in least-squares sense + target_mat = target.transpose(2, 0, 1).reshape(n_smooth, n_outer_flat * 5) + q_mat = np.linalg.lstsq(V, target_mat, rcond=None)[0] # (order+1, n_outer*5) + q_model = q_mat.T.reshape(n_outer_flat, 5, order + 1) # (n_outer, 5, order+1) + + # Flatten to model's layout [A_block, B_block, C_block, Ax_block, Bx_block] + # Each block: n_outer × (order+1) in C-order + params_out = q_model.transpose(1, 0, 2).reshape(-1) # (5 * n_outer * (order+1),) + if outpath and not os.path.isdir(outpath): + os.makedirs(outpath) + + outfile = f"{outpath}/params" + if postfix: + outfile += f"_{postfix}" + + outfile += ".hdf5" + + with h5py.File(outfile, mode="w") as f: + f.create_dataset("params", data=params_out) + f.create_dataset("order", data=np.array(order)) + f.create_dataset( + "smoothing_axis_name", + data=np.array(fakeselector.smoothing_axis_name, dtype=h5py.string_dtype()), + ) + f.create_dataset("n_outer", data=np.array(n_outer_flat)) + f.create_dataset("outer_shape", data=np.array(outer_shape, dtype="int64")) + if meta_data_dict is not None: + ioutils.pickle_dump_h5py("meta", meta_data_dict, f) + + logger.info( + f"Saved smoothing initial params to {outfile} " + f"(shape {params_out.shape}, n_outer={n_outer_flat}, n_abcd=5, order={order})" + ) + + +def main(): + parser = make_parser() + args = parser.parse_args() + logging.setup_logger(__file__, args.verbose, args.noColorLogger) + + dg = Datagroups( + args.inputFile, + excludeGroups=args.excludeProcGroups if args.excludeProcGroups else None, + filterGroups=args.filterProcGroups if args.filterProcGroups else None, + ) + + dg.fakerate_axes = args.fakerateAxes + dg.set_histselectors( + dg.getNames(), + args.inputBaseName, + mode=args.fakeEstimation, + smoothing_mode=args.fakeSmoothingMode, + smoothingOrderSpectrum=args.fakeSmoothingOrder, + smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, + mcCorr=None, + integrate_x=True, + forceGlobalScaleFakes=False, + abcdExplicitAxisEdges={}, + fakeTransferAxis="", + fakeTransferCorrFileName=None, + histAxesRemovedBeforeFakes=[], + ) + + fakeselector = dg.groups[dg.fakeName].histselector + logger.info(f"fakeselector type: {type(fakeselector).__name__}") + + meta_data_dict = { + "meta_info": output_tools.make_meta_info_dict( + args=args, + wd=common.base_dir, + ), + "meta_info_input": dg.getMetaInfo(), + } + + dump_smoothing_params( + args.outpath, + fakeselector, + dg, + args.inputBaseName, + meta_data_dict=meta_data_dict, + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 1cf15e7bd..4c3705694 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -583,7 +583,15 @@ def make_parser(parser=None, argv=None): type=str, help="Set the mode for the fake estimation", default="extended1D", - choices=["mc", "closure", "simple", "extrapolate", "extended1D", "extended2D"], + choices=[ + "none", + "mc", + "closure", + "simple", + "extrapolate", + "extended1D", + "extended2D", + ], ) parser.add_argument( "--forceGlobalScaleFakes", @@ -1334,38 +1342,49 @@ def setup( if args.qcdProcessName: datagroups.fakeName = args.qcdProcessName - abcdExplicitAxisEdges = {} - if len(args.ABCDedgesByAxis): - for item in args.ABCDedgesByAxis: - ax_name, ax_edges = item.split("=") - abcdExplicitAxisEdges[ax_name] = [float(x) for x in ax_edges.split(",")] - if wmass and not datagroups.xnorm: - datagroups.fakerate_axes = args.fakerateAxes - # datagroups.fakeTransferAxis = args.fakeTransferAxis if args.fakeTransferAxis in args.fakerateAxes else "" - # datagroups.fakeTransferCorrFileName = args.fakeTransferCorrFileName - histselector_kwargs = dict( - mode=args.fakeEstimation, - smoothing_mode=args.fakeSmoothingMode, - smoothingOrderSpectrum=args.fakeSmoothingOrder, - smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, - mcCorr=args.fakeMCCorr, - integrate_x="mt" not in fitvar, - forceGlobalScaleFakes=args.forceGlobalScaleFakes, - abcdExplicitAxisEdges=abcdExplicitAxisEdges, - fakeTransferAxis=( - args.fakeTransferAxis - if args.fakeTransferAxis in args.fakerateAxes - else "" - ), - fakeTransferCorrFileName=args.fakeTransferCorrFileName, - histAxesRemovedBeforeFakes=( - [str(x[0]) for x in args.presel] if args.presel else [] - ), - ) - datagroups.set_histselectors( - datagroups.getNames(), inputBaseName, **histselector_kwargs - ) + if args.fakeEstimation not in [None, "none"]: + abcdExplicitAxisEdges = {} + if len(args.ABCDedgesByAxis): + for item in args.ABCDedgesByAxis: + ax_name, ax_edges = item.split("=") + abcdExplicitAxisEdges[ax_name] = [ + float(x) for x in ax_edges.split(",") + ] + + datagroups.fakerate_axes = args.fakerateAxes + # datagroups.fakeTransferAxis = args.fakeTransferAxis if args.fakeTransferAxis in args.fakerateAxes else "" + # datagroups.fakeTransferCorrFileName = args.fakeTransferCorrFileName + histselector_kwargs = dict( + mode=args.fakeEstimation, + smoothing_mode=args.fakeSmoothingMode, + smoothingOrderSpectrum=args.fakeSmoothingOrder, + smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, + mcCorr=args.fakeMCCorr, + integrate_x="mt" not in fitvar, + forceGlobalScaleFakes=args.forceGlobalScaleFakes, + abcdExplicitAxisEdges=abcdExplicitAxisEdges, + fakeTransferAxis=( + args.fakeTransferAxis + if args.fakeTransferAxis in args.fakerateAxes + else "" + ), + fakeTransferCorrFileName=args.fakeTransferCorrFileName, + histAxesRemovedBeforeFakes=( + [str(x[0]) for x in args.presel] if args.presel else [] + ), + ) + datagroups.set_histselectors( + datagroups.getNames(), inputBaseName, **histselector_kwargs + ) + else: + from wremnants.postprocessing import histselections as sel + + g = datagroups.fakeName + members = datagroups.groups[g].members[:] + if len(members) == 0: + raise RuntimeError(f"No member found for group {g}") + datagroups.groups[g].histselector = sel.OnesSelector() logger.debug(f"Making datacards with these processes: {datagroups.getProcesses()}") diff --git a/wremnants/postprocessing/histselections.py b/wremnants/postprocessing/histselections.py index 9258e371f..2ccc1ef9a 100644 --- a/wremnants/postprocessing/histselections.py +++ b/wremnants/postprocessing/histselections.py @@ -152,9 +152,9 @@ def __init__( # or if abcdExplicitAxisEdges is provided from outside self.abcd_thresholds = { "pt": [26, 28, 30], - "mt": [0, 40] if self.ABCDmode == "simple" else [0, 20, 40], + "mt": [0, 40, np.inf] if self.ABCDmode == "simple" else [0, 20, 40, np.inf], "iso": [0, 4, 8, 12], - "relIso": [0, 0.15, 0.3, 0.45], + "relIso": [0, 0.15, np.inf], "relJetLeptonDiff": [0, 0.2, 0.35, 0.5], "dxy": [0, 0.01, 0.02, 0.03], } @@ -403,6 +403,23 @@ def get_hist(self, h, is_nominal=False): return self.get_hist_passX_passY(h) +class OnesSelector(object): + """ + Don't actually select anything, just set the values of the histogram to ones + This is e.g. to perform the ABCD method in the simultaneous fit of all regions to data + """ + + def __init__(self, *args, **kwargs): + pass + + # signal region selection + def get_hist(self, h, is_nominal=False): + h_new = h.copy() + h_new.values()[...] = np.ones_like(h.values()) + h_new.variances()[...] = np.zeros_like(h.values()) + return h_new + + class FakeSelectorSimpleABCD(HistselectorABCD): # supported smoothing mode choices smoothing_modes = ["binned", "fakerate", "hybrid", "full"] @@ -811,6 +828,11 @@ def smoothen( logger.debug("Reduce is " + str(reduce)) + # Always save params before (optional) reduction so the dump can access + # per-region polynomial coefficients for both signal_region=False and + # signal_region=True calls. + self._params_before_reduce = regressor.params.copy() + if reduce: # add up parameters from smoothing of individual sideband regions if type(self) == FakeSelectorSimpleABCD: From 8ef2d114eeac9121d4d8bb5cbf3034241cfa732a Mon Sep 17 00:00:00 2001 From: davidwalter2 Date: Thu, 16 Apr 2026 11:03:30 -0400 Subject: [PATCH 23/23] Add support for simultaneous (extended)ABCD fit --- scripts/rabbit/regen_smoothing_params.py | 300 +++++++++++++++++++++ scripts/rabbit/setupRabbit.py | 83 +++--- wremnants/postprocessing/histselections.py | 26 +- 3 files changed, 375 insertions(+), 34 deletions(-) create mode 100644 scripts/rabbit/regen_smoothing_params.py diff --git a/scripts/rabbit/regen_smoothing_params.py b/scripts/rabbit/regen_smoothing_params.py new file mode 100644 index 000000000..cb0ceeb43 --- /dev/null +++ b/scripts/rabbit/regen_smoothing_params.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +"""Dump per-region smoothing polynomial coefficients for SmoothExtendedABCD. + +Refit the linearized smoothing polynomial from a nominal fake histogram and +save the per-region polynomial coefficients in the SmoothExtendedABCD +power-series basis. The output can be loaded as initial parameter values in a +rabbit fit via ``params:PATH`` in the ``--paramModel SmoothExtendedABCDIsoMT`` +CLI token. + +Typical use:: + + python scripts/rabbit/regen_smoothing_params.py \\ + -i mw_with_mu_eta_pt_scetlib_dyturbo.hdf5 \\ + -o /path/to/params.hdf5 +""" + +import os + +import h5py +import numpy as np + +from wremnants.postprocessing.datagroups.datagroups import Datagroups +from wremnants.postprocessing.histselections import FakeSelectorSimpleABCD +from wremnants.postprocessing.regression import Regressor +from wremnants.utilities import common, parsing +from wums import ioutils, logging, output_tools + +logger = logging.child_logger(__name__) + + +def make_parser(): + parser = parsing.base_parser() + parser.description = __doc__ + parser.add_argument( + "-i", + "--inputFile", + required=True, + type=str, + help="Input HDF5 histogram file (output of a histmaker).", + ) + parser.add_argument( + "-o", + "--outpath", + required=True, + type=str, + help="Output path for the params HDF5 file (.hdf5 appended if missing).", + ) + parser.add_argument( + "--inputBaseName", + default="nominal", + type=str, + help="Name of the nominal histogram inside the input file.", + ) + parser.add_argument( + "--fakerateAxes", + nargs="+", + default=["eta", "pt", "charge"], + help="Axes for the fakerate binning.", + ) + parser.add_argument( + "--fakeEstimation", + type=str, + default="extended1D", + choices=["simple", "extrapolate", "extended1D", "extended2D"], + help="Fake estimation mode (must match what will be used in setupRabbit).", + ) + parser.add_argument( + "--fakeSmoothingMode", + type=str, + default="full", + choices=FakeSelectorSimpleABCD.smoothing_modes, + help="Smoothing mode for fake estimate.", + ) + parser.add_argument( + "--fakeSmoothingOrder", + type=int, + default=3, + help="Polynomial order for the spectrum smoothing.", + ) + parser.add_argument( + "--fakeSmoothingPolynomial", + type=str, + default="chebyshev", + choices=Regressor.polynomials, + help="Polynomial type for the spectrum smoothing.", + ) + parser.add_argument( + "--excludeProcGroups", + type=str, + nargs="*", + default=["QCD"], + help="Process groups to exclude when building Datagroups.", + ) + parser.add_argument( + "--filterProcGroups", + type=str, + nargs="*", + default=None, + help="If set, keep only these process groups when building Datagroups.", + ) + return parser + + +def dump_smoothing_params( + outpath, fakeselector, datagroups, inputBaseName, meta_data_dict=None, postfix="" +): + """ + Re-fit the linearized smoothing polynomial for the nominal fake histogram + (without the region reduction step) and save the per-region polynomial + coefficients in the SmoothExtendedABCD power-series basis to *outpath*. + + The saved array has shape (5 * n_outer * (order+1),) with the layout + [A_params, B_params, C_params, Ax_params, Bx_params], matching the + internal layout expected by SmoothExtendedABCD as initial_params. + + Flat ABCD index ordering for the 5 regions with signal_region=False + (FakeSelector1DExtendedABCD, with flow=True, after y-axis flip so tight iso is last): + 0=Ax (low mt, fail iso), 1=Bx (low mt, pass iso), + 2=A (mid mt, fail iso), 3=B (mid mt, pass iso), + 4=C (signal mt, fail iso) ← application region, rabbit's free parameter + Note: signal_region=False drops flat index 5 = D (signal mt, pass iso = signal + region), which is rabbit's predicted region. + + Histselections ↔ SmoothExtendedABCD model name mapping: + histsel "application region" (signal mt + fail iso) = C_model (free) + histsel "signal region" (signal mt + pass iso) = D_model (predicted) + + Mapping to model order [A=0, B=1, C=2, Ax=3, Bx=4]: + model_A ← sideband flat 2 + model_B ← sideband flat 3 + model_C ← sideband flat 4 + model_Ax ← sideband flat 0 + model_Bx ← sideband flat 1 + """ + g = datagroups.fakeName + # Build the combined fake histogram (data - prompt MC) the same way the normal + # histogram loading does, but without applying the histselector. + datagroups.loadHistsForDatagroups( + inputBaseName, syst="", procsToRead=[g], label="_dump_tmp", applySelection=False + ) + h_fakes = datagroups.groups[g].hists["_dump_tmp"] + + # Single call with signal_region=False: returns 5 regions [Ax=0, Bx=1, A=2, B=3, C=4] + # The dropped 6th flat element (D = signal mt + pass iso) is the predicted region. + fakeselector.calculate_fullABCD_smoothed(h_fakes, signal_region=False) + if not hasattr(fakeselector, "_params_before_reduce"): + raise RuntimeError( + "dump_smoothing_params: _params_before_reduce not found on fakeselector. " + "Ensure fakeSmoothingMode='full' is used." + ) + # Shape: (*outer_dims, 5, order+1) — indices [Ax=0, Bx=1, A=2, B=3, C=4] + params_5d = fakeselector._params_before_reduce.copy() + + reg = fakeselector.spectrum_regressor + order = reg.order + + # Flatten outer dims → (n_outer_flat, n_abcd, order+1) + outer_shape = params_5d.shape[:-2] + n_outer_flat = int(np.prod(outer_shape)) + params_5d_flat = params_5d.reshape(n_outer_flat, 5, order + 1) + + # Assemble the 5 model regions in model order [A, B, C, Ax, Bx] + # A ← flat 2, B ← flat 3, C ← flat 4, Ax ← flat 0, Bx ← flat 1 + params_model = np.stack( + [ + params_5d_flat[:, 2, :], # A (mid mt, fail iso) + params_5d_flat[:, 3, :], # B (mid mt, pass iso) + params_5d_flat[:, 4, :], # C (signal mt, fail iso) = application region + params_5d_flat[:, 0, :], # Ax (low mt, fail iso) + params_5d_flat[:, 1, :], # Bx (low mt, pass iso) + ], + axis=1, + ) # (n_outer, 5, order+1) + + # The regressor was fit with polynomial=chebyshev, normalised x to [-1,1] + # via x_cheby = 2*(x - center)/(max-min). + # The SmoothExtendedABCD model uses a power-series basis normalised to [0,1] + # via x_norm = (x - x_min)/(x_max - x_min). + # Conversion: evaluate the Chebyshev polynomial at the model's x_norm points + # and refit with the power-series Vandermonde. + smooth_ax = h_fakes.axes[fakeselector.smoothing_axis_name] + x_centers = np.array(smooth_ax.centers) + bin_widths = np.array(smooth_ax.widths) + x_min_model = float(x_centers.min()) + x_max_model = float(x_centers.max()) + x_norm = ( + (x_centers - x_min_model) / (x_max_model - x_min_model) + if x_max_model > x_min_model + else np.zeros_like(x_centers) + ) + + # Transform x_centers to the regressor's Chebyshev domain [-1, 1] + x_cheby = reg.transform_x(x_centers) # shape (n_smooth,) + + # Evaluate Chebyshev polynomials T_k at x_cheby: shape (n_smooth, order+1) + n_smooth = len(x_cheby) + T = np.zeros((n_smooth, order + 1)) + T[:, 0] = 1.0 + if order >= 1: + T[:, 1] = x_cheby + for k in range(2, order + 1): + T[:, k] = 2.0 * x_cheby * T[:, k - 1] - T[:, k - 2] + + # Evaluate regressor log-rate at each pt bin: (n_outer, 5, n_smooth) + log_rate = np.einsum("oak,sk->oas", params_model, T) + + # The regressor fits log(data_yield / bin_width). + # The model (with mc = ones) predicts yield = exp(-q), so: + # q(x) = -log(data_yield) = -(log_rate + log(bin_width)) + log_yield = log_rate + np.log(bin_widths) # broadcast over (n_outer, 5, n_smooth) + target = -log_yield # q values to represent as a power series + + # Build power-series Vandermonde for x_norm: shape (n_smooth, order+1) + V = np.column_stack([x_norm**k for k in range(order + 1)]) + + # Solve V @ q_coeffs = target for each (outer, abcd) in least-squares sense + target_mat = target.transpose(2, 0, 1).reshape(n_smooth, n_outer_flat * 5) + q_mat = np.linalg.lstsq(V, target_mat, rcond=None)[0] # (order+1, n_outer*5) + q_model = q_mat.T.reshape(n_outer_flat, 5, order + 1) # (n_outer, 5, order+1) + + # Flatten to model's layout [A_block, B_block, C_block, Ax_block, Bx_block] + # Each block: n_outer × (order+1) in C-order + params_out = q_model.transpose(1, 0, 2).reshape(-1) # (5 * n_outer * (order+1),) + if outpath and not os.path.isdir(outpath): + os.makedirs(outpath) + + outfile = f"{outpath}/params" + if postfix: + outfile += f"_{postfix}" + + outfile += ".hdf5" + + with h5py.File(outfile, mode="w") as f: + f.create_dataset("params", data=params_out) + f.create_dataset("order", data=np.array(order)) + f.create_dataset( + "smoothing_axis_name", + data=np.array(fakeselector.smoothing_axis_name, dtype=h5py.string_dtype()), + ) + f.create_dataset("n_outer", data=np.array(n_outer_flat)) + f.create_dataset("outer_shape", data=np.array(outer_shape, dtype="int64")) + if meta_data_dict is not None: + ioutils.pickle_dump_h5py("meta", meta_data_dict, f) + + logger.info( + f"Saved smoothing initial params to {outfile} " + f"(shape {params_out.shape}, n_outer={n_outer_flat}, n_abcd=5, order={order})" + ) + + +def main(): + parser = make_parser() + args = parser.parse_args() + logging.setup_logger(__file__, args.verbose, args.noColorLogger) + + dg = Datagroups( + args.inputFile, + excludeGroups=args.excludeProcGroups if args.excludeProcGroups else None, + filterGroups=args.filterProcGroups if args.filterProcGroups else None, + ) + + dg.fakerate_axes = args.fakerateAxes + dg.set_histselectors( + dg.getNames(), + args.inputBaseName, + mode=args.fakeEstimation, + smoothing_mode=args.fakeSmoothingMode, + smoothingOrderSpectrum=args.fakeSmoothingOrder, + smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, + mcCorr=None, + integrate_x=True, + forceGlobalScaleFakes=False, + abcdExplicitAxisEdges={}, + fakeTransferAxis="", + fakeTransferCorrFileName=None, + histAxesRemovedBeforeFakes=[], + ) + + fakeselector = dg.groups[dg.fakeName].histselector + logger.info(f"fakeselector type: {type(fakeselector).__name__}") + + meta_data_dict = { + "meta_info": output_tools.make_meta_info_dict( + args=args, + wd=common.base_dir, + ), + "meta_info_input": dg.getMetaInfo(), + } + + dump_smoothing_params( + args.outpath, + fakeselector, + dg, + args.inputBaseName, + meta_data_dict=meta_data_dict, + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 1cf15e7bd..4c3705694 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -583,7 +583,15 @@ def make_parser(parser=None, argv=None): type=str, help="Set the mode for the fake estimation", default="extended1D", - choices=["mc", "closure", "simple", "extrapolate", "extended1D", "extended2D"], + choices=[ + "none", + "mc", + "closure", + "simple", + "extrapolate", + "extended1D", + "extended2D", + ], ) parser.add_argument( "--forceGlobalScaleFakes", @@ -1334,38 +1342,49 @@ def setup( if args.qcdProcessName: datagroups.fakeName = args.qcdProcessName - abcdExplicitAxisEdges = {} - if len(args.ABCDedgesByAxis): - for item in args.ABCDedgesByAxis: - ax_name, ax_edges = item.split("=") - abcdExplicitAxisEdges[ax_name] = [float(x) for x in ax_edges.split(",")] - if wmass and not datagroups.xnorm: - datagroups.fakerate_axes = args.fakerateAxes - # datagroups.fakeTransferAxis = args.fakeTransferAxis if args.fakeTransferAxis in args.fakerateAxes else "" - # datagroups.fakeTransferCorrFileName = args.fakeTransferCorrFileName - histselector_kwargs = dict( - mode=args.fakeEstimation, - smoothing_mode=args.fakeSmoothingMode, - smoothingOrderSpectrum=args.fakeSmoothingOrder, - smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, - mcCorr=args.fakeMCCorr, - integrate_x="mt" not in fitvar, - forceGlobalScaleFakes=args.forceGlobalScaleFakes, - abcdExplicitAxisEdges=abcdExplicitAxisEdges, - fakeTransferAxis=( - args.fakeTransferAxis - if args.fakeTransferAxis in args.fakerateAxes - else "" - ), - fakeTransferCorrFileName=args.fakeTransferCorrFileName, - histAxesRemovedBeforeFakes=( - [str(x[0]) for x in args.presel] if args.presel else [] - ), - ) - datagroups.set_histselectors( - datagroups.getNames(), inputBaseName, **histselector_kwargs - ) + if args.fakeEstimation not in [None, "none"]: + abcdExplicitAxisEdges = {} + if len(args.ABCDedgesByAxis): + for item in args.ABCDedgesByAxis: + ax_name, ax_edges = item.split("=") + abcdExplicitAxisEdges[ax_name] = [ + float(x) for x in ax_edges.split(",") + ] + + datagroups.fakerate_axes = args.fakerateAxes + # datagroups.fakeTransferAxis = args.fakeTransferAxis if args.fakeTransferAxis in args.fakerateAxes else "" + # datagroups.fakeTransferCorrFileName = args.fakeTransferCorrFileName + histselector_kwargs = dict( + mode=args.fakeEstimation, + smoothing_mode=args.fakeSmoothingMode, + smoothingOrderSpectrum=args.fakeSmoothingOrder, + smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, + mcCorr=args.fakeMCCorr, + integrate_x="mt" not in fitvar, + forceGlobalScaleFakes=args.forceGlobalScaleFakes, + abcdExplicitAxisEdges=abcdExplicitAxisEdges, + fakeTransferAxis=( + args.fakeTransferAxis + if args.fakeTransferAxis in args.fakerateAxes + else "" + ), + fakeTransferCorrFileName=args.fakeTransferCorrFileName, + histAxesRemovedBeforeFakes=( + [str(x[0]) for x in args.presel] if args.presel else [] + ), + ) + datagroups.set_histselectors( + datagroups.getNames(), inputBaseName, **histselector_kwargs + ) + else: + from wremnants.postprocessing import histselections as sel + + g = datagroups.fakeName + members = datagroups.groups[g].members[:] + if len(members) == 0: + raise RuntimeError(f"No member found for group {g}") + datagroups.groups[g].histselector = sel.OnesSelector() logger.debug(f"Making datacards with these processes: {datagroups.getProcesses()}") diff --git a/wremnants/postprocessing/histselections.py b/wremnants/postprocessing/histselections.py index 9258e371f..2ccc1ef9a 100644 --- a/wremnants/postprocessing/histselections.py +++ b/wremnants/postprocessing/histselections.py @@ -152,9 +152,9 @@ def __init__( # or if abcdExplicitAxisEdges is provided from outside self.abcd_thresholds = { "pt": [26, 28, 30], - "mt": [0, 40] if self.ABCDmode == "simple" else [0, 20, 40], + "mt": [0, 40, np.inf] if self.ABCDmode == "simple" else [0, 20, 40, np.inf], "iso": [0, 4, 8, 12], - "relIso": [0, 0.15, 0.3, 0.45], + "relIso": [0, 0.15, np.inf], "relJetLeptonDiff": [0, 0.2, 0.35, 0.5], "dxy": [0, 0.01, 0.02, 0.03], } @@ -403,6 +403,23 @@ def get_hist(self, h, is_nominal=False): return self.get_hist_passX_passY(h) +class OnesSelector(object): + """ + Don't actually select anything, just set the values of the histogram to ones + This is e.g. to perform the ABCD method in the simultaneous fit of all regions to data + """ + + def __init__(self, *args, **kwargs): + pass + + # signal region selection + def get_hist(self, h, is_nominal=False): + h_new = h.copy() + h_new.values()[...] = np.ones_like(h.values()) + h_new.variances()[...] = np.zeros_like(h.values()) + return h_new + + class FakeSelectorSimpleABCD(HistselectorABCD): # supported smoothing mode choices smoothing_modes = ["binned", "fakerate", "hybrid", "full"] @@ -811,6 +828,11 @@ def smoothen( logger.debug("Reduce is " + str(reduce)) + # Always save params before (optional) reduction so the dump can access + # per-region polynomial coefficients for both signal_region=False and + # signal_region=True calls. + self._params_before_reduce = regressor.params.copy() + if reduce: # add up parameters from smoothing of individual sideband regions if type(self) == FakeSelectorSimpleABCD: