diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0fb4507b9..b8368e9c6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -341,7 +341,7 @@ jobs: # - name: bsm plot parameter correlations # run: >- - # scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py '$WREMNANTS_OUTDIR/WMass_eta_pt_charge_bsm/fitresults.hdf5' + # scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py '$WREMNANTS_OUTDIR/WMass_eta_pt_charge_bsm/fitresults.hdf5' # -o $WEB_DIR/$PLOT_DIR/BSM --params 'WtoNMuMass50' massShiftW100MeV # --title CMS --subtitle Preliminary --titlePos 0 --config 'wremnants/utilities/styles/styles.py' --correlation --showNumbers @@ -475,14 +475,14 @@ jobs: - name: plot xsec covariance run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/WMass_eta_pt_charge_poiAsNoi/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_mw --title CMS --subtitle Preliminary --postfix poiAsNoi -m Project ch0_masked ptGen -m Project ch0_masked absEtaGen --config wremnants/utilities/styles/styles.py - name: plot xsec corrrelation run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/WMass_eta_pt_charge_poiAsNoi/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_mw --title CMS --subtitle Preliminary --postfix poiAsNoi -m Project ch0_masked ptGen -m Project ch0_masked absEtaGen --config wremnants/utilities/styles/styles.py --correlation @@ -997,8 +997,7 @@ jobs: mkdir -p $ALPHAS_OUTDIR scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/rabbit/setupRabbit.py \ -i $ALPHAS_HIST_FILE --fitvar "$ALPHAS_FITVAR" --lumiScale $LUMI_SCALE --realData \ - -o $ALPHAS_OUTDIR --noi alphaS --npUnc LatticeEigvars --pdfUncFromCorr \ - --axlim ptll 0j 44j + -o $ALPHAS_OUTDIR --noi alphaS --npUnc LatticeEigvars --axlim ptll 0j 44j - name: alphaS Z reco fit (${{ matrix.mode }}) run: >- @@ -1099,14 +1098,14 @@ jobs: - name: plot xsec covariance run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_poiAsNoi/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --title CMS --subtitle Preliminary --postfix poiAsNoi -m Project ch0_masked ptVGen -m Project ch0_masked absYVGen --config wremnants/utilities/styles/styles.py - name: plot xsec corrrelation run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_poiAsNoi/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --title CMS --subtitle Preliminary --postfix poiAsNoi -m Project ch0_masked ptVGen -m Project ch0_masked absYVGen --config wremnants/utilities/styles/styles.py --correlation @@ -1187,7 +1186,7 @@ jobs: - name: unfolding plot pulls run: >- scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_pulls_and_impacts.py - $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_unfolding/fitresults.hdf5 -m ungrouped --sortDescending -s constraint --debug --noImpacts + $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_unfolding/fitresults.hdf5 -m ungrouped --sortDescending -s constraint --debug --impactType none -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --postfix unfolding -n 50 --otherExtensions png pdf - name: plot xsec @@ -1206,14 +1205,14 @@ jobs: - name: plot xsec covariance run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_unfolding/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --postfix unfolding --title CMS --subtitle Preliminary -m Project ch0_masked ptVGen -m Project ch0_masked absYVGen --config wremnants/utilities/styles/styles.py - name: plot xsec corrrelation run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $WREMNANTS_OUTDIR/ZMassDilepton_ptll_yll_unfolding/fitresults.hdf5 -o $WEB_DIR/$PLOT_DIR/unfolding_dilepton --postfix unfolding --title CMS --subtitle Preliminary -m Project ch0_masked ptVGen -m Project ch0_masked absYVGen --config wremnants/utilities/styles/styles.py --correlation @@ -1420,7 +1419,7 @@ jobs: # --fitvar "ptll-yll-cosThetaStarll_quantile-phiStarll_quantile" "eta-pt-charge" \ # --lumiScale $LUMI_SCALE $LUMI_SCALE \ # -o $ALPHAS_OUTDIR --noi alphaS wmass --postfix reco \ - # --npUnc LatticeEigvars --pdfUncFromCorr + # --npUnc LatticeEigvars # - name: alphaS WZ reco fit # run: >- @@ -1469,7 +1468,7 @@ jobs: --genAxes "ptVGen-absYVGen-helicitySig" \ --scaleNormXsecHistYields "0.05" --allowNegativeExpectation \ --realData --systematicType normal --postfix unfolding \ - --npUnc LatticeEigvars --pdfUncFromCorr \ + --npUnc LatticeEigvars \ --rebin 2 2 # this is for CI only! needed for max_files=100 - name: alphaS Z unfolding fit @@ -1551,14 +1550,14 @@ jobs: - name: alphaS z gen-fit xsec covariance run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $ALPHAS_Z_GENFIT -o $ALPHAS_PLOT_DIR --postfix z_genfit --title CMS --subtitle Preliminary -m Project chSigmaUL ptVGen -m Project chSigmaUL absYVGen --config wremnants/utilities/styles/styles.py - name: alphaS z gen-fit xsec corrrelation run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_hists_cov.py + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run.sh rabbit_plot_cov.py $ALPHAS_Z_GENFIT -o $ALPHAS_PLOT_DIR --postfix z_genfit --title CMS --subtitle Preliminary -m Project chSigmaUL ptVGen -m Project chSigmaUL absYVGen --config wremnants/utilities/styles/styles.py --correlation diff --git a/rabbit b/rabbit index acbe1d44c..68240c738 160000 --- a/rabbit +++ b/rabbit @@ -1 +1 @@ -Subproject commit acbe1d44c9e5f53262fa00f83a195288f44c789a +Subproject commit 68240c7388c9a64ab3ad829bd548cfffe5ff9956 diff --git a/scripts/analysisTools/unfolding/inclusive_xsec_summary.py b/scripts/analysisTools/unfolding/inclusive_xsec_summary.py new file mode 100644 index 000000000..8e6571ec9 --- /dev/null +++ b/scripts/analysisTools/unfolding/inclusive_xsec_summary.py @@ -0,0 +1,1040 @@ +# On results of fiducial inclusive cross sections and their ratios +# make a summary plot with different theory predictions +# make a latex summary table with the breakdown of uncertainties + +import math + +import hist +import matplotlib.pyplot as plt +import mplhep as hep +import numpy as np +import pandas as pd +from matplotlib.patches import Ellipse, Polygon +from scipy.stats import chi2 + +import rabbit.io_tools +from wremnants.utilities import parsing +from wremnants.utilities.io_tools import tex_tools + +from wums import logging, output_tools, plot_tools # isort: skip + +parser = parsing.plot_parser() +parser.add_argument("infile", type=str, help="Rabbit fitresult file") +parser.add_argument( + "--pdfFiles", + type=str, + nargs="*", + default=[], + help="Rabbit fitresult files with alternative pdf predictions", +) +parser.add_argument( + "--config", + type=str, + default=None, + help="Path to config file for style formatting", +) +parser.add_argument( + "--full", action="store_true", default=False, help="full phase space results" +) +parser.add_argument( + "--scaleToNewLumi", + action="store_true", + default=False, + help="scale the results of SMP-20-004 to updated lumi", +) +parser.add_argument( + "--includeSMP20004", + action="store_true", + default=False, + help="include data points from SMP-20-004 (JHEP04(2025)162)", +) +parser.add_argument( + "--theoryFiles", + type=str, + nargs="*", + default=[], + help="Theory prediction files in LABEL:filepath format. " + "Supports DYTURBO 1D txt files and NNLOjet cross.dat files. " + "Multiple files with the same label are combined into one prediction set.", +) +args = parser.parse_args() + +logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) + +config = plot_tools.load_config(args.config) + +grouping = getattr(config, "nuisance_grouping", {}).get("xsecs", None) +translate_label = getattr(config, "systematics_labels", {}) + +fitresult, meta = rabbit.io_tools.get_fitresult(args.infile, meta=True) +result = fitresult["mappings"] + +pdf_results = {} +comp_result = {} +for pdf_file in args.pdfFiles: + pdf_name = pdf_file.split("/")[-2].split("_")[-1] + pdf_name = pdf_name.upper().replace("AN3LO", "an3lo") + + logger.info(f"Now at PDF {pdf_name}") + + pdf_result, pdf_meta = rabbit.io_tools.get_fitresult(pdf_file, meta=True) + + pdf_mapping = pdf_result["mappings"] + + if "CompositeMapping" in pdf_mapping.keys(): + comp_result[pdf_name] = pdf_mapping["CompositeMapping"] + else: + pdf_results[pdf_name] = pdf_mapping + +all_1d_pdf_names = list(pdf_results.keys()) + [ + k for k in comp_result.keys() if k != "TOTAL" +] +nPDFs = len(all_1d_pdf_names) +pdf_colors = { + "CT18": "#2ca02c", + "CT18Z": "#E42536", + "PDF4LHC21": "#9467bd", + "MSHT20": "#7f7f7f", + "MSHT20an3lo": "#8c564b", + "NNPDF31": "#e377c2", + "NNPDF40": "#17becf", +} + +# mapping, channel, bin +identifier = "_masked" +if args.full: + identifier = f"_full{identifier}" +xsec_keys = [ + ( + r"$\mathrm{W}^{-}$", + f"Project ch1{identifier} qGen", + f"ch1{identifier}", + {"qGen": 0}, + ), + ( + r"$\mathrm{W}^{+}$", + f"Project ch1{identifier} qGen", + f"ch1{identifier}", + {"qGen": 1}, + ), + (r"$\mathrm{W}$", f"Project ch1{identifier}", f"ch1{identifier}", None), + (r"$\mathrm{Z}$", f"Project ch0{identifier}", f"ch0{identifier}", None), + ( + r"$\mathrm{W}^{+}/\mathrm{W}^{-}$", + ( + f"Ratio ch1{identifier} ch1{identifier} qGen:1,ptGen:sum,absEtaGen:sum qGen:0,ptGen:sum,absEtaGen:sum", + f"Ratio ch1{identifier} ch1{identifier} qGen:1 qGen:0", + ), + f"ch1{identifier}", + None, + ), + ( + r"$\mathrm{W/Z}$", + ( + f"Ratio ch1{identifier} ch0{identifier} qGen:sum,ptGen:sum,absEtaGen:sum ptVGen:sum,absYVGen:sum", + f"Ratio ch1{identifier} ch0{identifier} qGen:sum count:sum", + ), + f"ch1{identifier}_ch0{identifier}", + None, + ), +] + +if args.scaleToNewLumi: + # old lumi / new lumi + scale = 0.199269742 / 0.204602332 +else: + scale = 1 + +smp_20_004 = { + r"$\mathrm{W}^{-}$": [8670 * scale, 215.63858652847824], + r"$\mathrm{W}^{+}$": [11800 * scale, 288.09720581775866], + r"$\mathrm{W}$": [20480 * scale, 499.8999899979995], + r"$\mathrm{Z}$": [1952 * scale, 48.63126566315131], + r"$\mathrm{W}^{+}/\mathrm{W}^{-}$": [1.3615, 0.009570788891204319], + r"$\mathrm{W/Z}$": [10.491, 0.0864002314811714], +} + +# Map xsec_key display names to internal process keys +_name_to_proc = { + r"$\mathrm{W}^{-}$": "wm", + r"$\mathrm{W}^{+}$": "wp", + r"$\mathrm{W}$": "w", + r"$\mathrm{Z}$": "z", + r"$\mathrm{W}^{+}/\mathrm{W}^{-}$": "wp_wm", + r"$\mathrm{W/Z}$": "wz", +} + +theory_colors = { + "DYTURBO": "#ff7f0e", + "NNLOjet": "#1f77b4", + "SCETlib+DYTurbo": "#9467bd", +} + + +def _detect_process(path): + """Detect W-/W+/Z from file path (filename or parent directories).""" + parts = path.replace("\\", "/").split("/") + for part in reversed(parts): + p = part.lower() + if p.startswith("results_wm") or p.startswith("wm") or "wminus" in p: + return "wm" + if p.startswith("results_wp") or p.startswith("wp") or "wplus" in p: + return "wp" + if ( + (p.startswith("results_z") or p.startswith("z")) + and "wm" not in p + and "wp" not in p + ): + return "z" + raise ValueError(f"Cannot detect process (z/wm/wp) from path: {path}") + + +def _parse_dyturbo_1d(path): + """Read total cross section from DYTURBO 1D rapidity file (last summary line). + Returns (central_fb, unc_up_fb, unc_dn_fb); no scale variations in 1D files.""" + last = None + with open(path) as f: + for line in f: + s = line.strip() + if s and not s.startswith("#"): + last = s + cols = last.split() + return float(cols[2]), 0.0, 0.0 + + +def _parse_nnlojet_cross(path): + """Parse NNLOjet cross.dat and return (central_fb, unc_up_fb, unc_dn_fb). + Scale uncertainty is symmetric (quadrature of muR and muF half-ranges).""" + labels, data = None, None + with open(path) as f: + for line in f: + s = line.strip() + if s.startswith("#labels:"): + labels = s[len("#labels:") :].split() + elif not s.startswith("#") and s: + data = np.array(s.split(), dtype=float) + col = {lbl.split("[")[0]: i for i, lbl in enumerate(labels)} + vals = np.array([data[col[f"tot_scale0{i+1}"]] for i in range(7)]) + central = vals[3] # scale04 = (muR=1, muF=1) + delta_muR = (vals[5] - vals[1]) / 2 # (scale06 - scale02) / 2 + delta_muF = (vals[2] - vals[4]) / 2 # (scale03 - scale05) / 2 + unc = np.sqrt(delta_muR**2 + delta_muF**2) + return central, unc, unc + + +def _parse_matrix_radish(path): + """Parse MATRIX+RadISH pT distribution file and return (central_fb, unc_up_fb, unc_dn_fb). + Columns: bin_idx, central, stat, scale_down, stat, scale_up, stat. + Total cross section = sum of bins × bin width (1 GeV). Last row is a duplicate, skipped. + """ + data = np.loadtxt(path) + bins = data[:-1] # drop last duplicate row + bin_width = bins[1, 0] - bins[0, 0] # should be 1 GeV + central = np.sum(bins[:, 1]) * bin_width + down = np.sum(bins[:, 3]) * bin_width + up = np.sum(bins[:, 5]) * bin_width + return central, up - central, central - down + + +def _parse_theory_file(path): + with open(path) as f: + first = f.readline().strip() + if first.startswith("#labels: tot_scale01"): + return _parse_nnlojet_cross(path) + elif first.startswith("#ylo yhi PDF0"): + return _parse_dyturbo_1d(path) + elif not first.startswith("#"): + # No header: assume MATRIX_RadISH 7-column pT distribution + cols = first.split() + if len(cols) == 7: + return _parse_matrix_radish(path) + raise ValueError(f"Unknown theory file format: {path}") + + +_SCETLIB_SCALE_VARS = [ + "mufdown", + "mufup", + "kappaFO0.5-kappaf2.", + "kappaFO2.-kappaf0.5", + "mufdown-kappaFO0.5-kappaf2.", + "mufup-kappaFO2.-kappaf0.5", +] + + +def _parse_scetlib_pkl(path): + """Parse a SCETlib+DYTurbo pkl.lz4 correction file. + + Returns dict: proc -> (central_pb, unc_up_pb, unc_dn_pb). + CorrZ files produce {'z': ...}, CorrW files produce {'wm': ..., 'wp': ...}. + Scale uncertainty = asymmetric quadrature of the 6 scale variation deviations. + """ + import pickle + + import lz4.frame + + with lz4.frame.open(path, "rb") as f: + data = pickle.load(f) + + results = {} + for boson_key in [k for k in data if k in ("Z", "W")]: + sub = data[boson_key] + hist_key = next(k for k in sub if k.endswith("_hist")) + h = sub[hist_key] + vars_list = list(h.axes["vars"]) + vals = h.view(flow=True)["value"] + # Axes: Q(underflow+1data+overflow), absY(no underflow, +overflow), + # qT(underflow+data+overflow), charge(no flow), vars(no underflow, +overflow) + # Q data bin is always index 1 in the flow array. + q_slice = 1 + + def _sum_proc(charge_idx, var_idx): + return float(np.sum(vals[q_slice, :, :, charge_idx, var_idx])) + + pdf0_idx = vars_list.index("pdf0") + + if boson_key == "Z": + central = _sum_proc(0, pdf0_idx) + deltas = np.array( + [ + _sum_proc(0, vars_list.index(sv)) - central + for sv in _SCETLIB_SCALE_VARS + if sv in vars_list + ] + ) + unc_up = float(np.sqrt(np.sum(np.where(deltas > 0, deltas, 0) ** 2))) + unc_dn = float(np.sqrt(np.sum(np.where(deltas < 0, deltas, 0) ** 2))) + results["z"] = (central, unc_up, unc_dn) + elif boson_key == "W": + # charge axis: index 0 = W- (charge < 0), index 1 = W+ + for ci, proc in [(0, "wm"), (1, "wp")]: + central = _sum_proc(ci, pdf0_idx) + deltas = np.array( + [ + _sum_proc(ci, vars_list.index(sv)) - central + for sv in _SCETLIB_SCALE_VARS + if sv in vars_list + ] + ) + unc_up = float(np.sqrt(np.sum(np.where(deltas > 0, deltas, 0) ** 2))) + unc_dn = float(np.sqrt(np.sum(np.where(deltas < 0, deltas, 0) ** 2))) + results[proc] = (central, unc_up, unc_dn) + + return results + + +# Build theory_predictions: label -> proc -> (central_pb, unc_up_pb, unc_dn_pb) +theory_predictions = {} +for tf in args.theoryFiles: + label, path = tf.split(":", 1) + if path.endswith(".pkl.lz4"): + # SCETlib+DYTurbo: one file may contain multiple processes + multi = _parse_scetlib_pkl(path) + for proc, (c, u, d) in multi.items(): + theory_predictions.setdefault(label, {})[proc] = (c, u, d) + else: + proc = _detect_process(path) + central_fb, unc_up_fb, unc_dn_fb = _parse_theory_file(path) + theory_predictions.setdefault(label, {})[proc] = ( + central_fb / 1e3, + unc_up_fb / 1e3, + unc_dn_fb / 1e3, + ) + +# Compute derived quantities (W, W+/W-, W/Z) from per-process values +for preds in theory_predictions.values(): + if "wm" in preds and "wp" in preds: + wm, wm_u, wm_d = preds["wm"] + wp, wp_u, wp_d = preds["wp"] + preds["w"] = (wm + wp, np.sqrt(wm_u**2 + wp_u**2), np.sqrt(wm_d**2 + wp_d**2)) + r = wp / wm + preds["wp_wm"] = ( + r, + ( + r * np.sqrt((wp_u / wp) ** 2 + (wm_u / wm) ** 2) + if wp > 0 and wm > 0 + else 0.0 + ), + ( + r * np.sqrt((wp_d / wp) ** 2 + (wm_d / wm) ** 2) + if wp > 0 and wm > 0 + else 0.0 + ), + ) + if "w" in preds and "z" in preds: + w, w_u, w_d = preds["w"] + z, z_u, z_d = preds["z"] + r = w / z + preds["wz"] = ( + r, + r * np.sqrt((w_u / w) ** 2 + (z_u / z) ** 2) if w > 0 and z > 0 else 0.0, + r * np.sqrt((w_d / w) ** 2 + (z_d / z) ** 2) if w > 0 and z > 0 else 0.0, + ) + +nTheory = len(theory_predictions) +nMeas = 1 + +lumi = meta["meta_info_input"]["channel_info"]["ch0"]["lumi"] + +custom_order = [ + "Total", + "stat", + "binByBinStat", + "luminosity", + "Fake", + "CMS_background", + "muon_eff_syst", + "muon_eff_stat", + "prefire", + "muonCalibration", + "recoil", + "pdfCT18Z", + "angularCoeffs", + "pTModeling", + "theory_ew", + "massAndWidths", +] + +dfs = [] +for name, mappings, channel, selection in xsec_keys: + if len(mappings) == 2: + mapping = mappings[0] + else: + mapping = mappings + + hp = result[mapping]["channels"][channel]["hist_prefit_inclusive"].get() + h1 = result[mapping]["channels"][channel]["hist_postfit_inclusive"].get() + hi = result[mapping]["channels"][channel][ + "hist_postfit_inclusive_gaussian_global_impacts_grouped" + ].get() + if selection is not None: + hp = hp[selection] + h1 = h1[selection] + hi = hi[selection] + if getattr(h1, "axes", False) and "yield" in h1.axes.name: + hp = hp[{"yield": hist.sum}] + h1 = h1[{"yield": hist.sum}] + hi = hi[{"yield": hist.sum}] + + prefit = hp.value + prefit_error = hp.variance**0.5 + + value = h1.value + error = h1.variance**0.5 + + impacts = hi.values() + + labels = np.array(hi.axes["impacts"]) + mask = np.isin(labels, grouping) + + labels = labels[mask] + impacts = impacts[mask] + + # if np.sum(impacts**2) ** 0.5 / error - 1 > 10e-10: + # raise RuntimeError( + # f"Sources don't add up to total error, got a difference of {np.sum(impacts**2)**0.5/error - 1}" + # ) + + labels = np.append(labels, "Total") + impacts = np.append(impacts, error) + + df = pd.DataFrame(np.array(impacts, dtype=np.float64).T, columns=["impact"]) + + df["label"] = labels + + for label, combine_labels in { + "binByBinStat": ["binByBinStatW", "binByBinStatZ", "binByBinStat"], + "theory_ew": ["theory_ew", "massShift", "sin2thetaZ", "widthW", "widthZ"], + }.items(): + + subset = df[df["label"].isin(combine_labels)] + subset = subset.fillna(0) + combined = np.sqrt((subset[["impact"]] ** 2).sum()) + + # df = df.drop(columns=["binByBinStatW", "binByBinStatZ", "massShift", "sin2thetaZ", "widthW", "widthZ"]) + + new_row = pd.DataFrame( + { + "label": [label], + "impact": [combined["impact"]], + } + ) + + # Remove old rows and append the new one + df = df[~df["label"].isin(combine_labels)] + df = pd.concat([df, new_row], ignore_index=True) + + df["name"] = name + df["value"] = value + + df["prefit"] = prefit + df["prefit_error"] = prefit_error + + for pdf_name, pdf_res in pdf_results.items(): + if len(mappings) == 2: + if mappings[0].replace(identifier, "") not in pdf_res.keys(): + mapping = mappings[1] + else: + mapping = mappings[0] + else: + mapping = mappings + + mapping = mapping.replace(identifier, "") + channel_mappings = pdf_res[mapping]["channels"][channel.replace(identifier, "")] + hr = channel_mappings["hist_prefit_inclusive"].get() + hr_impacts = channel_mappings[ + "hist_prefit_inclusive_global_impacts_grouped" + ].get() + + if selection is not None: + hr = hr[selection] + hr_impacts = hr_impacts[selection] + if getattr(hr, "axes", False) and "yield" in hr.axes.name: + hr = hr[{"yield": hist.sum}] + hr_impacts = hr_impacts[{"yield": hist.sum}] + + df[pdf_name] = hr.value + df[f"{pdf_name}_error"] = hr.variance**0.5 + df[f"{pdf_name}_pdf"] = hr_impacts[{"impacts": f"pdf{pdf_name}"}] + + for pdf_name, comp_res in comp_result.items(): + if pdf_name == "TOTAL": + continue + + candidates = [mappings] if not isinstance(mappings, tuple) else list(mappings) + channel_data = None + for m in candidates: + ckey = f"{m.replace(identifier, '')} {channel.replace(identifier, '')}" + if ckey in comp_res["channels"]: + channel_data = comp_res["channels"][ckey] + break + if channel_data is None: + continue + fittype = ( + "postfit" if "hist_postfit_inclusive" in channel_data.keys() else "prefit" + ) + hr = channel_data[f"hist_{fittype}_inclusive"].get() + + if selection is not None: + hr = hr[selection] + if getattr(hr, "axes", False) and "yield" in hr.axes.name: + hr = hr[{"yield": hist.sum}] + + df[pdf_name] = hr.value + df[f"{pdf_name}_error"] = hr.variance**0.5 + + if f"hist_{fittype}_inclusive_global_impacts_grouped" in channel_data: + hr_impacts = channel_data[ + f"hist_{fittype}_inclusive_global_impacts_grouped" + ].get() + if selection is not None: + hr_impacts = hr_impacts[selection] + if getattr(hr_impacts, "axes", False) and "yield" in hr_impacts.axes.name: + hr_impacts = hr_impacts[{"yield": hist.sum}] + df[f"{pdf_name}_pdf"] = hr_impacts[{"impacts": f"pdf{pdf_name}"}] + + # Convert 'labels' column to categorical with the custom order + df["label"] = pd.Categorical(df["label"], categories=custom_order, ordered=True) + + df["source"] = df["label"].apply(lambda l: translate_label.get(l, l)) + + df = df.sort_values("label", ascending=False) + + dfs.append(df) + +df = pd.concat(dfs) + +logger.debug(df) + +names = [k[0] for k in xsec_keys] + +outdir = output_tools.make_plot_dir(args.outpath, eoscp=args.eoscp) + +# make latex table +outname = "summary_table" +if args.postfix: + outname += f"_{args.postfix}" + +df_t = df.copy() +relative = True # compute relative uncertainty +percentage = True # numbers in percentage + +if relative: + df_t["impact"] /= df_t["value"] +if percentage: + df_t["impact"] *= 100 + +# sorting +cat_dtype = pd.CategoricalDtype(categories=names, ordered=True) +df_t["name"] = df_t["name"].astype(cat_dtype) + +tex_tools.make_latex_table( + df_t, + output_dir=outdir, + output_name=outname, + column_title=None, + caption="Uncertainties in percentage.", + label="", + sublabel="", + column_name="name", + row_name="source", + cell_columns=["impact"], + cell_format=lambda x: f"${round(x,2)}$", + sort="impact", +) + +# make plot 1D +hep.style.use(hep.style.ROOT) + +plt.clf() +fig = plt.figure() +fig.subplots_adjust(left=0.15, right=0.99, top=0.99, bottom=0.125) +ax = fig.add_subplot(111) + +# x axis range +lo, hi = 0.97, 1.115 + +# totals = [] +# stats = [] +norms = [] +_theory_labels_added = set() +for i, name in enumerate(names[::-1]): + df_g = df.loc[df["name"] == name] + + norm = df_g["value"].values[0] + total = df_g.loc[df_g["label"] == "Total"]["impact"].values[0] + stat = df_g.loc[df_g["label"] == "stat"]["impact"].values[0] + total_rel = total / norm + stat_rel = stat / norm + + prefit = df_g["prefit"].values[0] / norm + prefit_err = df_g["prefit_error"].values[0] / norm + + norms.append(norm) + # totals.append(total) + # stats.append(stat) + + x1 = ax.bar( + 1.0, height=1, bottom=i, width=2 * total_rel, color="silver" # , label="Total" + ) + x2 = ax.bar( + 1.0, height=1, bottom=i, width=2 * stat_rel, color="gold" # , label="Stat" + ) + + # ax.errorbar([prefit], [i+0.5], xerr=prefit_err, color="red", marker="o", label="Prefit" if i ==0 else None) + + nSlots = nPDFs + nTheory + nMeas + j = 0 + for j, pdf_name in enumerate(all_1d_pdf_names): + pdf_value = df_g[pdf_name].values[0] / norm + pdf_error = df_g[f"{pdf_name}_error"].values[0] / norm + ax.errorbar( + [pdf_value], + [i + 1 - (j + 1) / nSlots], + xerr=pdf_error, + color=pdf_colors[pdf_name], + marker="o", + label=pdf_name if i == 0 else None, + ) + if f"{pdf_name}_pdf" in df_g.columns: + pdf_error_pdf = df_g[f"{pdf_name}_pdf"].values[0] / norm + ax.errorbar( + [pdf_value], + [i + 1 - (j + 1) / nSlots], + xerr=pdf_error_pdf, + color=pdf_colors[pdf_name], + capsize=5, + capthick=2, + marker="o", + ) + + proc = _name_to_proc.get(name) + for k, (theo_label, preds) in enumerate(theory_predictions.items()): + if proc not in preds: + continue + central_pb, unc_up_pb, unc_dn_pb = preds[proc] + color = next( + (v for key, v in theory_colors.items() if theo_label.startswith(key)), + f"C{k + 5}", + ) + legend_label = theo_label if theo_label not in _theory_labels_added else None + if legend_label is not None: + _theory_labels_added.add(theo_label) + has_unc = unc_up_pb > 0 or unc_dn_pb > 0 + ax.errorbar( + [central_pb / norm], + [i + 1 - (nPDFs + k + 1) / nSlots], + xerr=[[unc_dn_pb / norm], [unc_up_pb / norm]] if has_unc else None, + color=color, + marker="^", + label=legend_label, + ) + + # cross sections from SMP-20-004 + if args.includeSMP20004: + ax.errorbar( + smp_20_004[name][0] / norm, + i + 0.5, + xerr=smp_20_004[name][1] / norm, + color="black", + marker="x", + label="JHEP04(2025)162" if i == 0 else None, + ) + + # round to two significant digits in total uncertainty + sig_digi = 2 - int(math.floor(math.log10(abs(total)))) - 1 + + if sig_digi <= 0: + norm = int(norm) + total = int(total) + else: + norm = round(norm, sig_digi) + total = round(total, sig_digi) + + ax.text( + lo + 0.005, + i + 0.5, + name, + fontsize=20, + verticalalignment="bottom", + horizontalalignment="left", + ) + title = rf"${norm} \pm {total}" + if "/" in name: + title += "$" + else: + title += r"\,\mathrm{pb}$" + ax.text( + hi - 0.04, + i + 0.5, + title, + fontsize=20, + verticalalignment="bottom", + horizontalalignment="left", + ) + +# ax.text( +# hi - 0.04, +# len(names) + 0.5, +# r"$\mathrm{Measured} \pm {unc}$", +# fontsize=20, +# verticalalignment="bottom", +# horizontalalignment="left", +# ) + +x0 = ax.plot([1.0, 1.0], [0, len(norms)], color="black") +ax.plot([lo, hi], [len(norms), len(norms)], color="black") + +p = Polygon( + [[0, 0], [0, 0], [0, 0], [0, 0]], + facecolor="silver", + linestyle="solid", + edgecolor="black", + linewidth=2, + alpha=0.6, +) + +p.outer_color = "grey" +p.outer_alpha = 0.5 +p.inner_color = "gold" +p.inner_alpha = 1 + +extra_handles = [(p,)] + +extra_labels = ["Measurement"] + +leg = plot_tools.addLegend( + ax, + ncols=args.legCols, + text_size="small", + bbox_to_anchor=None, + loc="upper left", + reverse=False, + markerfirst=True, + labelcolor="black", + extra_handles=extra_handles, + extra_labels=extra_labels, + extra_entries_first=False, + custom_handlers=(["doubleband"]), + padding_loc="auto", +) + +# add a link to the legend entry +for text in leg.get_texts(): + if "JHEP04(2025)162" in text.get_text(): + text.set_url("https://doi.org/10.1007/JHEP04(2025)162") + # Optional: Make it look like a link + # text.set_color("blue") + +ax.set_xlim([lo, hi]) +ax.set_ylim([0, len(norms) + 2]) + +ax.set_xlabel("Prediction / Measurement") + +# Disable ticks on the top and right axes +ax.tick_params(top=False) + +# Disable y-axis labels and ticks +plt.gca().set_yticklabels([]) +plt.gca().set_yticks([]) + +plot_tools.add_cms_decor(ax, args.cmsDecor, data=True, lumi=lumi, loc=args.logoPos) + +outname = "summary" +if args.full: + outname = f"total_{outname}" +if args.postfix: + outname += f"_{args.postfix}" +plot_tools.save_pdf_and_png(outdir, outname) + +xsec_summary = {} +for name, mappings, channel, selection in xsec_keys: + df_g = df.loc[df["name"] == name] + value = df_g["value"].values[0] + total = df_g.loc[df_g["label"] == "Total"]["impact"].values[0] + stat = df_g.loc[df_g["label"] == "stat"]["impact"].values[0] + + entry = { + "value": value, + "stat": stat, + "total": total, + } + + for pdf_name in all_1d_pdf_names: + if pdf_name in df_g.columns: + pdf_entry = { + "value": df_g[pdf_name].values[0], + "total": df_g[f"{pdf_name}_error"].values[0], + } + if f"{pdf_name}_pdf" in df_g.columns: + pdf_entry["pdf"] = df_g[f"{pdf_name}_pdf"].values[0] + entry[pdf_name] = pdf_entry + + if args.includeSMP20004: + entry["JHEP04(2025)162"] = { + "value": smp_20_004[name][0], + "total": smp_20_004[name][1], + } + + xsec_summary[name] = entry + +output_tools.write_index_and_log( + outdir, + outname, + analysis_meta_info={ + "CombinetfOutput": meta["meta_info"], + "xsec_summary": xsec_summary, + }, + args=args, +) + + +# make plot 2D ellipses + + +def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): + """ + Plots an `nstd` sigma ellipse based on the covariance matrix (`cov`) + centered at position `pos`. + """ + # Eigenvalue decomposition + eigvals, eigvecs = np.linalg.eigh(cov) + + # Sort eigenvalues and eigenvectors + order = eigvals.argsort()[::-1] + eigvals, eigvecs = eigvals[order], eigvecs[:, order] + + # Compute angle in degrees + theta = np.degrees(np.arctan2(*eigvecs[:, 0][::-1])) + + prob = chi2.cdf(nstd**2, df=1) # Get the probability of the 1D n-sigma + scale = np.sqrt(chi2.ppf(prob, df=2)) # Find the 2D equivalent scale + + # Width and height are "2*nstd" standard deviations + width, height = 2 * scale * np.sqrt(eigvals) + + # Create ellipse + ellipse = Ellipse(xy=pos, width=width, height=height, angle=theta, **kwargs) + return ellipse + + +plot_2d_configs = { + False: ( # fiducial phase space + ("WpWm", xsec_keys[0], xsec_keys[1], "pb", (3065, 3200), (3975, 4200)), + ("WZ", xsec_keys[2], xsec_keys[3], "pb", (7050, 7375), (585, 615)), + ("R", xsec_keys[4], xsec_keys[5], None, (1.297, 1.304), (11.9, 12.2)), + ), + True: ( # total phase space + ("WpWm", xsec_keys[0], xsec_keys[1], "pb", (7600, 9600), (10200, 12600)), + ("WZ", xsec_keys[2], xsec_keys[3], "pb", (18000, 22000), (1720, 2100)), + ("R", xsec_keys[4], xsec_keys[5], None, (1.32, 1.41), (10.1, 10.85)), + ), +} + +for name, channel0, channel1, unit, xlim, ylim in plot_2d_configs[args.full]: + + plt.clf() + fig = plt.figure() + fig.subplots_adjust(left=0.15, right=0.99, top=0.99, bottom=0.125) + ax = fig.add_subplot(111) + + for pdf_name, result in comp_result.items(): + + if len(channel0[1]) == 2: + mappings0 = channel0[1] + mappings1 = channel1[1] + + if args.full and pdf_name != "TOTAL": + mapping0 = mappings0[1] + mapping1 = mappings1[1] + else: + mapping0 = mappings0[0] + mapping1 = mappings1[0] + else: + mapping0 = channel0[1] + mapping1 = channel1[1] + + ckey0 = mapping0 + " " + channel0[2] + ckey1 = mapping1 + " " + channel1[2] + + found_x = False + found_y = False + ibin = 0 + for k, r in result["channels"].items(): + fittype = "postfit" if f"hist_postfit_inclusive" in r.keys() else "prefit" + + hi = r[f"hist_{fittype}_inclusive"].get() + if getattr(hi, "axes", False) and "yield" in hi.axes.name: + hi = hi[{"yield": hist.sum}] + + if k == ckey0 or k == ckey0.replace(identifier, ""): + sel = channel0[-1] + found_x = True + if sel is not None: + x = hi[sel].value + ix = ibin + [i for i in sel.values()][0] + else: + x = hi.value + ix = ibin + + if k == ckey1 or k == ckey1.replace(identifier, ""): + sel = channel1[-1] + found_y = True + if sel is not None: + y = hi[sel].value + iy = ibin + [i for i in sel.values()][0] + else: + y = hi.value + iy = ibin + + ibin += hi.size if hasattr(hi, "size") else 1 + + if not found_x or not found_y: + raise RuntimeError(f"Not found x {found_x} or y {found_y}") + + cov = result[f"hist_{fittype}_inclusive_cov"].get().values() + cov = cov[np.ix_([ix, iy], [ix, iy])] + + # for pos, cov in zip(points, covs): + if fittype == "postfit": + ell = plot_cov_ellipse( + cov, + np.array([x, y]), + nstd=1, + edgecolor="none", + facecolor="grey", + label="Measurement", + ) + ax.add_patch(ell) + ax.plot(x, y, color="black", marker="P") + else: + icol = pdf_colors[pdf_name] + ell = plot_cov_ellipse( + cov, + np.array([x, y]), + nstd=1, + edgecolor=icol, + facecolor="none", + linewidth=2, + label=pdf_name, + ) + ax.add_patch(ell) + ax.plot(x, y, color=icol, marker="o", alpha=0) + + if args.includeSMP20004: + # cross sections from SMP-20-004 — correlations unknown, show as 2D error bars + x = smp_20_004[channel0[0]][0] + x_err = smp_20_004[channel0[0]][1] + y = smp_20_004[channel1[0]][0] + y_err = smp_20_004[channel1[0]][1] + + ax.errorbar( + x, + y, + xerr=x_err, + yerr=y_err, + fmt="x", + color="black", + ecolor="black", + capsize=4, + capthick=1.5, + label="JHEP04(2025)162", + ) + if xlim is not None: + ax.set_xlim(*xlim) + + if ylim is not None: + ax.set_ylim(*ylim) + else: + xlim = ax.get_xlim() + ylim = ax.get_ylim() + yrange = ylim[1] - ylim[0] + + ax.set_ylim(ylim[0], ylim[1] + yrange * 0.25) + + if unit is not None: + plt.xlabel(rf"$\sigma({channel0[0].replace('$', '')})$ [pb]") + plt.ylabel(rf"$\sigma({channel1[0].replace('$', '')})$ [pb]") + else: + plt.xlabel(channel0[0]) + plt.ylabel(channel1[0]) + # plt.title("2D Covariance Ellipses") + # plt.grid(True) + # plt.show() + + plot_tools.addLegend( + ax, + ncols=args.legCols, + text_size="small", + bbox_to_anchor=None, + loc="upper left", + reverse=False, + # markerfirst=True, + labelcolor="black", + # extra_handles=extra_handles, + # extra_labels=extra_labels, + # extra_entries_first=False, + # custom_handlers=( + # ["doubleband"] + # ), + padding_loc="auto", + ) + + plot_tools.add_cms_decor(ax, args.cmsDecor, data=True, loc=args.logoPos) + + outname = f"summary_2D_{name}" + if args.full: + outname = f"total_{outname}" + if args.postfix: + outname += f"_{args.postfix}" + plot_tools.save_pdf_and_png(outdir, outname) + + output_tools.write_index_and_log( + outdir, + outname, + analysis_meta_info={"CombinetfOutput": meta["meta_info"]}, + args=args, + ) + + +if output_tools.is_eosuser_path(args.outpath) and args.eoscp: + output_tools.copy_to_eos(args.outpath, args.outfolder) diff --git a/scripts/analysisTools/unfolding/limits.py b/scripts/analysisTools/unfolding/limits.py new file mode 100644 index 000000000..c16dff35c --- /dev/null +++ b/scripts/analysisTools/unfolding/limits.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +import argparse + +import mplhep as hep +import numpy as np + +from wums import logging, output_tools, plot_tools + +hep.style.use(hep.style.ROOT) + + +def parseArgs(): + parser = argparse.ArgumentParser() + + parser.add_argument("xsecfile", type=str, help="File with mass, xsec") + parser.add_argument("limitfile", type=str, help="File with mass, limits") + parser.add_argument( + "--model", + required=True, + type=str, + choices=["zprime", "hnl"], + help="Model for comprison", + ) + + parser.add_argument("-o", "--outpath", type=str, default="./plots") + parser.add_argument("--title", default="CMS", type=str) + parser.add_argument("--subtitle", default="", type=str) + parser.add_argument("--xlabel", default=r"$m_{Z'}$ [GeV]") + parser.add_argument("--ylabel", default=r"$g_{Z'}$") + parser.add_argument("--verbose", type=int, default=3) + parser.add_argument("--noColorLogger", action="store_true") + parser.add_argument( + "--legPos", type=str, default="upper right", help="Set legend position" + ) + parser.add_argument( + "--legSize", + type=str, + default="small", + help="Legend text size (small: axis ticks size, large: axis label size, number)", + ) + parser.add_argument( + "--legCols", type=int, default=2, help="Number of columns in legend" + ) + parser.add_argument( + "-p", "--postfix", type=str, help="Postfix for output file name" + ) + parser.add_argument( + "--ylim", + type=float, + nargs=2, + help="Min and max values for y axis (if not specified, range set automatically)", + ) + parser.add_argument( + "--upperLimit", + type=float, + nargs="+", + help="Upper limit of measurements", + ) + return parser.parse_args() + + +def main(): + args = parseArgs() + logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) + + outdir = output_tools.make_plot_dir(args.outpath) + + # --- load data ------------------------------------------------------------ + mass, xsec = np.loadtxt(args.xsecfile, unpack=True) + mass2, limits = np.loadtxt(args.limitfile, unpack=True) + + # expected limits from xsec + expected = np.array(args.upperLimit) / (xsec * 100) + if args.model == "zprime": + expected = np.sqrt(expected) + + # --- build figure via plot_tools ----------------------------------------- + fig, ax = plot_tools.figure( + None, + args.xlabel, + args.ylabel, + automatic_scale=False, + xlim=(0, max(mass.max(), mass2.max()) * 1.05), + ylim=args.ylim, + logy=True, + ) + + if args.model == "hnl": + first = 17 + ax.plot( + mass, + expected, + color="black", + marker="", + linestyle="-", + label="This analysis (expected)", + ) + ax.plot( + mass2[:first], + limits[:first], + color="brown", + marker="", + linestyle="-", + label="Prompt $3\ell = (e, \mu, \tau)$ \n arXiv:2403.00100", + ) + ax.plot( + mass2[first - 1 :], + limits[first - 1 :], + color="purple", + marker="", + linestyle="-", + label="Prompt 1 + 1 displaced & jet \n CMS-PAS-EXO-21-011", + ) + elif args.model == "zprime": + ax.plot( + mass, + expected, + color="black", + marker="", + linestyle="-", + label="Limit from $\sigma=100pb$", + ) + ax.plot( + mass2, + limits, + color="brown", + marker="", + linestyle="-", + label="Phys.Rev.D 110 (2024) 072008, 2024.", + ) + + plot_tools.add_decor( + ax, + args.title, + args.subtitle, + data=True, + lumi=None, + loc=2, + ) + + plot_tools.addLegend( + ax, + ncols=args.legCols, + loc=args.legPos, + text_size=args.legSize, + ) + + to_join = ["limits_comparison", args.postfix] + outfile = "_".join(filter(lambda x: x, to_join)) + if args.subtitle == "Preliminary": + outfile += "_preliminary" + + plot_tools.save_pdf_and_png(outdir, outfile) + + # write index + log + output_tools.write_index_and_log( + outdir, + outfile, + analysis_meta_info={ + "xsecfile": args.xsecfile, + "limitfile": args.limitfile, + }, + args=args, + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/plotting/unfolding_massbias.py b/scripts/analysisTools/unfolding/unfolding_massbias.py similarity index 100% rename from scripts/plotting/unfolding_massbias.py rename to scripts/analysisTools/unfolding/unfolding_massbias.py diff --git a/scripts/histmakers/mw_lowPU.py b/scripts/histmakers/mw_lowPU.py index 0ed17fa27..7f4863322 100644 --- a/scripts/histmakers/mw_lowPU.py +++ b/scripts/histmakers/mw_lowPU.py @@ -138,11 +138,13 @@ "transverseMass", ] ## was transverseMass -theory_helpers_procs = theory_corrections.make_theory_helpers( +helicity_smoothing_helpers_procs = theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr ) -axis_ptVgen = theory_helpers_procs["W"]["qcdScale"].hist.axes["ptVgen"] -axis_chargeVgen = theory_helpers_procs["W"]["qcdScale"].hist.axes["chargeVgen"] +axis_ptVgen = helicity_smoothing_helpers_procs["W"]["qcdScale"].hist.axes["ptVgen"] +axis_chargeVgen = helicity_smoothing_helpers_procs["W"]["qcdScale"].hist.axes[ + "chargeVgen" +] groups_to_aggregate = args.aggregateGroups @@ -190,9 +192,9 @@ def build_graph(df, dataset): results = [] isQCDMC = dataset.group == "QCD" - theory_helpers = None + helicity_smoothing_helpers = None if dataset.name in samples.vprocs: - theory_helpers = theory_helpers_procs[dataset.name[0]] + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] if dataset.is_data: df = df.DefinePerSample("weight", "1.0") @@ -246,7 +248,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in unfolding_axes[level] if a.name != "acceptance"], [c for c in unfolding_cols[level] if c != f"{level}_acceptance"], base_name=level, @@ -416,7 +418,11 @@ def build_graph(df, dataset): df = df.Define("exp_weight", "SFMC") df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) else: df = df.DefinePerSample("nominal_weight", "1.0") @@ -524,7 +530,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, a, c, base_name=n, diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index a2b85c2ac..0d8415083 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -312,13 +312,13 @@ ), name="mt", underflow=False, - overflow=True, + overflow=False, ) axis_isoCat = hist.axis.Variable( binning.get_binning_fakes_relIso(high_iso_bins=False), name="relIso", underflow=False, - overflow=True, + overflow=False, ) axes_abcd = [axis_mtCat, axis_isoCat] @@ -449,9 +449,14 @@ args.fitresult, channel="ch1_masked" ) -theory_helpers_procs = theory_corrections.make_theory_helpers( - args.pdfs, args.theoryCorr, procs=["Z", "W"] -) +if args.skipByHelicityCorrection: + helicity_smoothing_helpers_procs = {} +else: + helicity_smoothing_helpers_procs = ( + theory_corrections.make_helicity_smoothing_helpers( + args.pdfs, args.theoryCorr, procs=["Z", "W"] + ) + ) if args.theoryAgnostic: theoryAgnostic_axes, theoryAgnostic_cols = binning.get_theoryAgnostic_axes( @@ -722,10 +727,8 @@ def build_graph(df, dataset): isW = dataset.group in ["Wmunu", "Wtaunu"] isBSM = dataset.name.startswith("WtoNMu") isWmunu = isBSM or dataset.group in ["Wmunu"] - isZ = dataset.group in [ - "Zmumu", - "Ztautau", - ] + isZ = dataset.group in ["Zmumu", "Ztautau"] + isZmumu = dataset.group in ["Zmumu"] isZveto = isZ or dataset.group in ["DYlowMass"] isWorZ = isW or isZ isTop = dataset.group == "Top" @@ -735,9 +738,10 @@ def build_graph(df, dataset): hist.storage.Double() ) # turn off sum weight square for systematic histograms - theory_helpers = {} - if isWorZ: - theory_helpers = theory_helpers_procs[dataset.name[0]] + if isWorZ and dataset.name[0] in helicity_smoothing_helpers_procs.keys(): + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] + else: + helicity_smoothing_helpers = {} # disable auxiliary histograms when unfolding to reduce memory consumptions, or when doing the original theory agnostic without --poiAsNoi auxiliary_histograms = True @@ -833,7 +837,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [], [], base_name="gen", @@ -890,7 +894,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in unfolding_axes[level] if a.name != "acceptance"], [ c @@ -911,7 +915,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in unfolding_axes[level] if a.name != "acceptance"], [ c @@ -925,21 +929,19 @@ def build_graph(df, dataset): cols = [*nominal_cols, *unfolding_cols[level]] break - elif dataset.name == "Zmumu_2016PostVFP": - if args.unfolding and dataset.name == "Zmumu_2016PostVFP": - df = unfolder_z.add_gen_histograms( - args, df, results, dataset, corr_helpers, theory_helpers - ) - - if not unfolder_z.poi_as_noi: - axes = [ - *nominal_axes, - *unfolder_z.unfolding_axes[unfolder_z.unfolding_levels[-1]], - ] - cols = [ - *nominal_cols, - *unfolder_z.unfolding_cols[unfolder_z.unfolding_levels[-1]], - ] + elif isZmumu: + df = unfolder_z.add_gen_histograms( + args, df, results, dataset, corr_helpers, helicity_smoothing_helpers + ) + if not unfolder_z.poi_as_noi: + axes = [ + *nominal_axes, + *unfolder_z.unfolding_axes[unfolder_z.unfolding_levels[-1]], + ] + cols = [ + *nominal_cols, + *unfolder_z.unfolding_cols[unfolder_z.unfolding_levels[-1]], + ] if isWorZ: df = generator_level_definitions.define_prefsr_vars(df) @@ -1338,7 +1340,11 @@ def build_graph(df, dataset): logger.debug(f"Exp weight defined: {weight_expr}") df = df.Define("exp_weight", weight_expr) df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) if ( @@ -2349,7 +2355,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, axes, cols, for_wmass=True, diff --git a/scripts/histmakers/mz_dilepton.py b/scripts/histmakers/mz_dilepton.py index 906eab55e..215a74bf8 100644 --- a/scripts/histmakers/mz_dilepton.py +++ b/scripts/histmakers/mz_dilepton.py @@ -361,14 +361,20 @@ muon_prefiring_helper, muon_prefiring_helper_stat, muon_prefiring_helper_syst = ( muon_prefiring.make_muon_prefiring_helpers(era=era) ) -procs = [ - p - for p, grp in (("W", samples.wprocs), ("Z", samples.zprocs)) - if any(d.name in grp for d in datasets) -] -theory_helpers_procs = theory_corrections.make_theory_helpers( - args.pdfs, args.theoryCorr, procs=procs -) + +if args.skipByHelicityCorrection: + helicity_smoothing_helpers_procs = {} +else: + procs = [ + p + for p, grp in (("W", samples.wprocs), ("Z", samples.zprocs)) + if any(d.name in grp for d in datasets) + ] + helicity_smoothing_helpers_procs = ( + theory_corrections.make_helicity_smoothing_helpers( + args.pdfs, args.theoryCorr, procs=procs + ) + ) # extra axes which can be used to label tensor_axes if args.binnedScaleFactors: @@ -532,9 +538,10 @@ def build_graph(df, dataset): isZ = dataset.name in samples.zprocs isWorZ = isW or isZ - theory_helpers = {} - if isWorZ: - theory_helpers = theory_helpers_procs[dataset.name[0]] + if isWorZ and dataset.name[0] in helicity_smoothing_helpers_procs.keys(): + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] + else: + helicity_smoothing_helpers = {} cvh_helper = data_calibration_helper if dataset.is_data else mc_calibration_helper jpsi_helper = data_jpsi_crctn_helper if dataset.is_data else mc_jpsi_crctn_helper @@ -576,8 +583,14 @@ def build_graph(df, dataset): cols = [*cols, "run"] if args.unfolding and dataset.group == "Zmumu": + print(f"name = {dataset.name}; group = {dataset.group}") df = unfolder_z.add_gen_histograms( - args, df, results, dataset, corr_helpers, theory_helpers=theory_helpers + args, + df, + results, + dataset, + corr_helpers, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) if not unfolder_z.poi_as_noi: @@ -598,7 +611,11 @@ def build_graph(df, dataset): df_gen = df df_gen = df_gen.DefinePerSample("exp_weight", "1.0") df_gen = theory_corrections.define_theory_weights_and_corrs( - df_gen, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df_gen, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) for obs in auxiliary_gen_axes: @@ -613,7 +630,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [all_axes[obs]], [obs], base_name=f"gen_{obs}", @@ -921,7 +938,11 @@ def build_graph(df, dataset): logger.debug(f"Experimental weight defined: {weight_expr}") df = df.Define("exp_weight", weight_expr) df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) results.append( @@ -1059,7 +1080,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, obs_axes, obs, base_name=obs_name, @@ -1081,7 +1102,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [all_axes[obs]], [obs], base_name=f"nominal_{obs}", @@ -1282,7 +1303,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, axes, cols, for_wmass=False, diff --git a/scripts/histmakers/mz_lowPU.py b/scripts/histmakers/mz_lowPU.py index cafdf1243..6d11be073 100644 --- a/scripts/histmakers/mz_lowPU.py +++ b/scripts/histmakers/mz_lowPU.py @@ -103,11 +103,13 @@ axes_mt = [axis_mt] cols_mt = ["transverseMass"] -theory_helpers_procs = theory_corrections.make_theory_helpers( +helicity_smoothing_helpers_procs = theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr ) -axis_ptVgen = theory_helpers_procs["Z"]["qcdScale"].hist.axes["ptVgen"] -axis_chargeVgen = theory_helpers_procs["Z"]["qcdScale"].hist.axes["chargeVgen"] +axis_ptVgen = helicity_smoothing_helpers_procs["Z"]["qcdScale"].hist.axes["ptVgen"] +axis_chargeVgen = helicity_smoothing_helpers_procs["Z"]["qcdScale"].hist.axes[ + "chargeVgen" +] if args.unfolding: @@ -151,9 +153,9 @@ def build_graph(df, dataset): results = [] - theory_helpers = None + helicity_smoothing_helpers = None if dataset.name in samples.vprocs: - theory_helpers = theory_helpers_procs[dataset.name[0]] + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] if dataset.is_data: df = df.DefinePerSample("weight", "1.0") @@ -168,7 +170,7 @@ def build_graph(df, dataset): if args.unfolding and dataset.group == base_group: df = unfolder_z.add_gen_histograms( - args, df, results, dataset, corr_helpers, theory_helpers + args, df, results, dataset, corr_helpers, helicity_smoothing_helpers ) if not unfolder_z.poi_as_noi: @@ -356,7 +358,11 @@ def build_graph(df, dataset): df = df.Define("exp_weight", "SFMC") df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) else: df = df.DefinePerSample("nominal_weight", "1.0") @@ -539,7 +545,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, a, c, base_name=n, diff --git a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py index 77b3cb8e2..ef3043439 100644 --- a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py +++ b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py @@ -232,17 +232,14 @@ ) # for isoMt region validation and related tests -# use very high upper edge as a proxy for infinity (cannot exploit overflow bins in the fit) -# can probably use numpy infinity, but this is compatible with the root conversion -# FIXME: now we can probably use overflow bins in the fit axis_mtCat = hist.axis.Variable( - [0, int(args.mtCut / 2.0), args.mtCut, 1000], + [0, int(args.mtCut / 2.0), args.mtCut, np.inf], name="mt", underflow=False, overflow=False, ) axis_isoCat = hist.axis.Variable( - [0, 0.15, 0.3, 100], name="relIso", underflow=False, overflow=False + [0, 0.15, 0.3, np.inf], name="relIso", underflow=False, overflow=False ) if args.addIsoMtAxes: @@ -306,7 +303,7 @@ muon_prefiring.make_muon_prefiring_helpers(era=era) ) -theory_helpers_procs = theory_corrections.make_theory_helpers( +helicity_smoothing_helpers_procs = theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr, corrs=["qcdScale", "alphaS", "pdf"] ) @@ -470,9 +467,9 @@ def build_graph(df, dataset): isZ = dataset.name in samples.zprocs isWorZ = isW or isZ - theory_helpers = None + helicity_smoothing_helpers = None if isWorZ: - theory_helpers = theory_helpers_procs[dataset.name[0]] + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] if dataset.is_data: df = df.DefinePerSample("weight", "1.0") @@ -596,7 +593,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in unfolding_axes[level] if a.name != "acceptance"], [c for c in unfolding_cols[level] if c != f"{level}_acceptance"], base_name=level, @@ -924,7 +921,11 @@ def build_graph(df, dataset): logger.debug(f"Exp weight defined: {weight_expr}") df = df.Define("exp_weight", weight_expr) df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers=theory_helpers + df, + dataset.name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) results.append( @@ -1451,7 +1452,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, axes, cols, for_wmass=False, diff --git a/scripts/histmakers/w_z_gen_dists.py b/scripts/histmakers/w_z_gen_dists.py index ffec8ecb6..239b990e4 100644 --- a/scripts/histmakers/w_z_gen_dists.py +++ b/scripts/histmakers/w_z_gen_dists.py @@ -192,7 +192,7 @@ corrs.append("qcdScale") if args.centralBosonPDFWeight: corrs.append("pdf_central") -theory_helpers_procs = theory_corrections.make_theory_helpers( +helicity_smoothing_helpers_procs = theory_corrections.make_helicity_smoothing_helpers( args.pdfs, args.theoryCorr, procs=["Z", "W"], corrs=corrs ) @@ -215,9 +215,9 @@ def build_graph(df, dataset): ] # in samples.zprocs if isW or isZ: - theory_helpers = theory_helpers_procs[dataset.name[0]] + helicity_smoothing_helpers = helicity_smoothing_helpers_procs[dataset.name[0]] else: - theory_helpers = {} + helicity_smoothing_helpers = {} theoryAgnostic_axes, _ = binning.get_theoryAgnostic_axes( ptV_flow=True, absYV_flow=True, wlike="Z" in dataset.name @@ -316,7 +316,7 @@ def build_graph(df, dataset): df = df.Define("isEvenEvent", "event % 2 == 0") df = theory_corrections.define_theory_weights_and_corrs( - df, dataset.name, corr_helpers, args, theory_helpers + df, dataset.name, corr_helpers, args, helicity_smoothing_helpers ) if isZ or dataset.group in [ @@ -957,7 +957,7 @@ def build_graph(df, dataset): args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, nominal_axes, nominal_cols, base_name="nominal_gen", diff --git a/scripts/plotting/inclusive_xsec_summary.py b/scripts/plotting/inclusive_xsec_summary.py deleted file mode 100644 index 83c8cd289..000000000 --- a/scripts/plotting/inclusive_xsec_summary.py +++ /dev/null @@ -1,570 +0,0 @@ -# On results of fiducial inclusive cross sections and their ratios -# make a summary plot with different theory predictions -# make a latex summary table with the breakdown of uncertainties - -import math - -import hist -import matplotlib.pyplot as plt -import mplhep as hep -import numpy as np -import pandas as pd -from matplotlib.patches import Ellipse, Polygon - -import rabbit.io_tools -from wremnants.utilities import parsing -from wremnants.utilities.io_tools import tex_tools - -from wums import logging, output_tools, plot_tools # isort: skip - -parser = parsing.plot_parser() -parser.add_argument("infile", type=str, help="Rabbit fitresult file") -parser.add_argument( - "--pdfFiles", - type=str, - nargs="*", - default=[], - help="Rabbit fitresult files with alternative pdf predictions", -) -parser.add_argument( - "--config", - type=str, - default=None, - help="Path to config file for style formatting", -) -args = parser.parse_args() - -logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) - -config = plot_tools.load_config(args.config) - -grouping = getattr(config, "nuisance_grouping", {}).get("xsecs", None) -translate_label = getattr(config, "systematics_labels", {}) - -fitresult, meta = rabbit.io_tools.get_fitresult(args.infile, meta=True) -result = fitresult["mappings"] - -pdf_results = {} -comp_result = {} -for pdf_file in args.pdfFiles: - pdf_name = pdf_file.split("/")[-2].split("_")[-1] - pdf_name = pdf_name.upper().replace("AN3LO", "an3lo") - - logger.info(f"Now at PDF {pdf_name}") - - pdf_result, pdf_meta = rabbit.io_tools.get_fitresult(pdf_file, meta=True) - - pdf_mapping = pdf_result["mappings"] - - if "CompositeMapping" in pdf_mapping.keys(): - comp_result[pdf_name] = pdf_mapping["CompositeMapping"] - else: - pdf_results[pdf_name] = pdf_mapping - -nPDFs = len(args.pdfFiles) -pdf_colors = { - "CT18": "#2ca02c", - "CT18Z": "#E42536", - "PDF4LHC21": "#9467bd", - "MSHT20": "#7f7f7f", - "MSHT20an3lo": "#8c564b", - "NNPDF31": "#e377c2", - "NNPDF40": "#17becf", -} - -# mapping, channel, bin -xsec_keys = [ - (r"$\mathrm{W}^{-}$", "Project ch1_masked qGen", "ch1_masked", {"qGen": 0}), - (r"$\mathrm{W}^{+}$", "Project ch1_masked qGen", "ch1_masked", {"qGen": 1}), - (r"$\mathrm{W}$", "Project ch1_masked", "ch1_masked", None), - (r"$\mathrm{Z}$", "Project ch0_masked", "ch0_masked", None), - ( - r"$\mathrm{W}^{+}/\mathrm{W}^{-}$", - "Ratio ch1_masked ch1_masked qGen:1,ptGen:sum,absEtaGen:sum qGen:0,ptGen:sum,absEtaGen:sum", - "ch1_masked", - None, - ), - ( - r"$\mathrm{W/Z}$", - "Ratio ch1_masked ch0_masked qGen:sum,ptGen:sum,absEtaGen:sum ptVGen:sum,absYVGen:sum", - "ch1_masked_ch0_masked", - None, - ), -] - -lumi = meta["meta_info_input"]["channel_info"]["ch0"]["lumi"] - -custom_order = [ - "Total", - "stat", - "binByBinStat", - "luminosity", - "Fake", - "CMS_background", - "muon_eff_syst", - "muon_eff_stat", - "prefire", - "muonCalibration", - "recoil", - "pdfCT18Z", - "angularCoeffs", - "pTModeling", - "theory_ew", - "massAndWidths", -] - -dfs = [] -for name, mapping, channel, selection in xsec_keys: - hp = result[mapping]["channels"][channel]["hist_prefit_inclusive"].get() - h1 = result[mapping]["channels"][channel]["hist_postfit_inclusive"].get() - hi = result[mapping]["channels"][channel][ - "hist_postfit_inclusive_global_impacts_grouped" - ].get() - if selection is not None: - hp = hp[selection] - h1 = h1[selection] - hi = hi[selection] - if getattr(h1, "axes", False) and "yield" in h1.axes.name: - hp = hp[{"yield": hist.sum}] - h1 = h1[{"yield": hist.sum}] - hi = hi[{"yield": hist.sum}] - - prefit = hp.value - prefit_error = hp.variance**0.5 - - value = h1.value - error = h1.variance**0.5 - - impacts = hi.values() - - labels = np.array(hi.axes["impacts"]) - mask = np.isin(labels, grouping) - - labels = labels[mask] - impacts = impacts[mask] - - if np.sum(impacts**2) ** 0.5 / error - 1 > 10e-10: - raise RuntimeError( - f"Sources don't add up to total error, got a difference of {np.sum(impacts**2)**0.5/error - 1}" - ) - - labels = np.append(labels, "Total") - impacts = np.append(impacts, error) - - df = pd.DataFrame(np.array(impacts, dtype=np.float64).T, columns=["impact"]) - - df["label"] = labels - - for label, combine_labels in { - "binByBinStat": ["binByBinStatW", "binByBinStatZ", "binByBinStat"], - "theory_ew": ["theory_ew", "massShift", "sin2thetaZ", "widthW", "widthZ"], - }.items(): - - subset = df[df["label"].isin(combine_labels)] - subset = subset.fillna(0) - combined = np.sqrt((subset[["impact"]] ** 2).sum()) - - # df = df.drop(columns=["binByBinStatW", "binByBinStatZ", "massShift", "sin2thetaZ", "widthW", "widthZ"]) - - new_row = pd.DataFrame( - { - "label": [label], - "impact": [combined["impact"]], - } - ) - - # Remove old rows and append the new one - df = df[~df["label"].isin(combine_labels)] - df = pd.concat([df, new_row], ignore_index=True) - - df["name"] = name - df["value"] = value - - df["prefit"] = prefit - df["prefit_error"] = prefit_error - - for pdf_name, pdf_res in pdf_results.items(): - channel_mappings = pdf_res[mapping.replace("_masked", "")]["channels"][ - channel.replace("_masked", "") - ] - hr = channel_mappings["hist_prefit_inclusive"].get() - hr_impacts = channel_mappings[ - "hist_prefit_inclusive_global_impacts_grouped" - ].get() - - if selection is not None: - hr = hr[selection] - hr_impacts = hr_impacts[selection] - if getattr(hr, "axes", False) and "yield" in hr.axes.name: - hr = hr[{"yield": hist.sum}] - hr_impacts = hr_impacts[{"yield": hist.sum}] - - df[pdf_name] = hr.value - df[f"{pdf_name}_error"] = hr.variance**0.5 - df[f"{pdf_name}_pdf"] = hr_impacts[{"impacts": f"pdf{pdf_name}"}] - - # Convert 'labels' column to categorical with the custom order - df["label"] = pd.Categorical(df["label"], categories=custom_order, ordered=True) - - df["source"] = df["label"].apply(lambda l: translate_label.get(l, l)) - - df = df.sort_values("label", ascending=False) - - dfs.append(df) - -df = pd.concat(dfs) - -names = [k[0] for k in xsec_keys] - -outdir = output_tools.make_plot_dir(args.outpath, eoscp=args.eoscp) - -# make latex table -outname = "summary_table" -if args.postfix: - outname += f"_{args.postfix}" - -df_t = df.copy() -relative = True # compute relative uncertainty -percentage = True # numbers in percentage - -if relative: - df_t["impact"] /= df_t["value"] -if percentage: - df_t["impact"] *= 100 - -# sorting -cat_dtype = pd.CategoricalDtype(categories=names, ordered=True) -df_t["name"] = df_t["name"].astype(cat_dtype) - -tex_tools.make_latex_table( - df_t, - output_dir=outdir, - output_name=outname, - column_title=None, - caption="Uncertainties in percentage.", - label="", - sublabel="", - column_name="name", - row_name="source", - cell_columns=["impact"], - cell_format=lambda x: f"${round(x,2)}$", - sort="impact", -) - -# make plot 1D -hep.style.use(hep.style.ROOT) - -plt.clf() -fig = plt.figure() -fig.subplots_adjust(left=0.15, right=0.99, top=0.99, bottom=0.125) -ax = fig.add_subplot(111) - -# x axis range -lo, hi = 0.925, 1.085 - -# totals = [] -# stats = [] -norms = [] -for i, name in enumerate(names[::-1]): - df_g = df.loc[df["name"] == name] - - norm = df_g["value"].values[0] - total = df_g.loc[df_g["label"] == "Total"]["impact"].values[0] - stat = df_g.loc[df_g["label"] == "stat"]["impact"].values[0] - total_rel = total / norm - stat_rel = stat / norm - - prefit = df_g["prefit"].values[0] / norm - prefit_err = df_g["prefit_error"].values[0] / norm - - norms.append(norm) - # totals.append(total) - # stats.append(stat) - - x1 = ax.bar( - 1.0, height=1, bottom=i, width=2 * total_rel, color="silver" # , label="Total" - ) - x2 = ax.bar( - 1.0, height=1, bottom=i, width=2 * stat_rel, color="gold" # , label="Stat" - ) - - # ax.errorbar([prefit], [i+0.5], xerr=prefit_err, color="red", marker="o", label="Prefit" if i ==0 else None) - - for j, pdf_name in enumerate(pdf_results.keys()): - pdf_value = df_g[pdf_name].values[0] / norm - pdf_error = df_g[f"{pdf_name}_error"].values[0] / norm - ax.errorbar( - [pdf_value], - [i + 1 - (j + 1) / (nPDFs + 1)], - xerr=pdf_error, - color=pdf_colors[pdf_name], - marker="o", - label=pdf_name if i == 0 else None, - ) - # # only plot PDF uncertainties - # pdf_error_pdf = df_g[f"{pdf_name}_pdf"].values[0] / norm - # ax.errorbar( - # [pdf_value], - # [i + 1 - (j + 1) / (nPDFs + 1)], - # xerr=pdf_error_pdf, - # color=pdf_colors[pdf_name], - # capsize=5, - # capthick=2, - # marker="o", - # ) - - # round to two significant digits in total uncertainty - sig_digi = 2 - int(math.floor(math.log10(abs(total)))) - 1 - - if sig_digi <= 0: - norm = int(norm) - total = int(total) - else: - norm = round(norm, sig_digi) - total = round(total, sig_digi) - - ax.text( - lo + 0.005, - i + 0.5, - name, - fontsize=20, - verticalalignment="bottom", - horizontalalignment="left", - ) - title = rf"${norm} \pm {total}" - if "/" in name: - title += "$" - else: - title += r"\,\mathrm{pb}$" - ax.text( - hi - 0.04, - i + 0.5, - title, - fontsize=20, - verticalalignment="bottom", - horizontalalignment="left", - ) - -# ax.text( -# hi - 0.04, -# len(names) + 0.5, -# r"$\mathrm{Measured} \pm {unc}$", -# fontsize=20, -# verticalalignment="bottom", -# horizontalalignment="left", -# ) - -x0 = ax.plot([1.0, 1.0], [0, len(norms)], color="black") -ax.plot([lo, hi], [len(norms), len(norms)], color="black") - -p = Polygon( - [[0, 0], [0, 0], [0, 0], [0, 0]], - facecolor="silver", - linestyle="solid", - edgecolor="black", - linewidth=2, - alpha=0.6, -) - -p.outer_color = "grey" -p.outer_alpha = 0.5 -p.inner_color = "gold" -p.inner_alpha = 1 - -extra_handles = [(p,)] - -extra_labels = ["Measurement"] - -plot_tools.addLegend( - ax, - ncols=args.legCols, - text_size="small", - bbox_to_anchor=None, - loc="upper left", - reverse=False, - markerfirst=True, - labelcolor="black", - extra_handles=extra_handles, - extra_labels=extra_labels, - extra_entries_first=False, - custom_handlers=(["doubleband"]), - padding_loc="auto", -) - -ax.set_xlim([lo, hi]) -ax.set_ylim([0, len(norms) + 2]) - -ax.set_xlabel("Prediction / Measurement") - -# Disable ticks on the top and right axes -ax.tick_params(top=False) - -# Disable y-axis labels and ticks -plt.gca().set_yticklabels([]) -plt.gca().set_yticks([]) - -plot_tools.add_cms_decor(ax, args.cmsDecor, data=True, lumi=lumi, loc=args.logoPos) - -outname = "summary" -if args.postfix: - outname += f"_{args.postfix}" -plot_tools.save_pdf_and_png(outdir, outname) - -output_tools.write_index_and_log( - outdir, - outname, - analysis_meta_info={"CombinetfOutput": meta["meta_info"]}, - args=args, -) - - -# make plot 2D ellipses - - -def plot_cov_ellipse(cov, pos, nstd=1, **kwargs): - """ - Plots an `nstd` sigma ellipse based on the covariance matrix (`cov`) - centered at position `pos`. - """ - # Eigenvalue decomposition - eigvals, eigvecs = np.linalg.eigh(cov) - - # Sort eigenvalues and eigenvectors - order = eigvals.argsort()[::-1] - eigvals, eigvecs = eigvals[order], eigvecs[:, order] - - # Compute angle in degrees - theta = np.degrees(np.arctan2(*eigvecs[:, 0][::-1])) - - # Width and height are "2*nstd" standard deviations - width, height = 2 * nstd * np.sqrt(eigvals) - - # Create ellipse - ellipse = Ellipse(xy=pos, width=width, height=height, angle=theta, **kwargs) - return ellipse - - -for name, channel0, channel1, unit in ( - ("WpWm", xsec_keys[0], xsec_keys[1], "pb"), - ("WZ", xsec_keys[2], xsec_keys[3], "pb"), - ("R", xsec_keys[4], xsec_keys[5], None), -): - - plt.clf() - fig = plt.figure() - fig.subplots_adjust(left=0.15, right=0.99, top=0.99, bottom=0.125) - ax = fig.add_subplot(111) - - for pdf_name, result in comp_result.items(): - ckey0 = channel0[1] + " " + channel0[2] - ckey1 = channel1[1] + " " + channel1[2] - - ibin = 0 - for k, r in result["channels"].items(): - fittype = "postfit" if f"hist_postfit_inclusive" in r.keys() else "prefit" - - hi = r[f"hist_{fittype}_inclusive"].get() - if getattr(hi, "axes", False) and "yield" in hi.axes.name: - hi = hi[{"yield": hist.sum}] - - if k == ckey0 or k == ckey0.replace("_masked", ""): - sel = channel0[-1] - - if sel is not None: - x = hi[sel].value - ix = ibin + [i for i in sel.values()][0] - else: - x = hi.value - ix = ibin - - if k == ckey1 or k == ckey1.replace("_masked", ""): - sel = channel1[-1] - if sel is not None: - y = hi[sel].value - iy = ibin + [i for i in sel.values()][0] - else: - y = hi.value - iy = ibin - - ibin += hi.size if hasattr(hi, "size") else 1 - - cov = result[f"hist_{fittype}_inclusive_cov"].get().values() - cov = cov[np.ix_([ix, iy], [ix, iy])] - - # for pos, cov in zip(points, covs): - if fittype == "postfit": - ell = plot_cov_ellipse( - cov, - np.array([x, y]), - nstd=1, - edgecolor="none", - facecolor="grey", - label="Measurement", - ) - ax.add_patch(ell) - ax.plot(x, y, color="black", marker="P") - else: - icol = pdf_colors[pdf_name] - ell = plot_cov_ellipse( - cov, - np.array([x, y]), - nstd=1, - edgecolor=icol, - facecolor="none", - linewidth=2, - label=pdf_name, - ) - ax.add_patch(ell) - ax.plot(x, y, color=icol, marker="o", alpha=0) - - xlim = ax.get_xlim() - ylim = ax.get_ylim() - yrange = ylim[1] - ylim[0] - - ax.set_xlim(*xlim) - ax.set_ylim(ylim[0], ylim[1] + yrange * 0.25) - if unit is not None: - plt.xlabel(rf"$\sigma({channel0[0].replace("$","")})$ [pb]") - plt.ylabel(rf"$\sigma({channel1[0].replace("$","")})$ [pb]") - else: - plt.xlabel(channel0[0]) - plt.ylabel(channel1[0]) - # plt.title("2D Covariance Ellipses") - # plt.grid(True) - # plt.show() - - plot_tools.addLegend( - ax, - ncols=args.legCols, - text_size="small", - bbox_to_anchor=None, - loc="upper left", - reverse=False, - # markerfirst=True, - labelcolor="black", - # extra_handles=extra_handles, - # extra_labels=extra_labels, - # extra_entries_first=False, - # custom_handlers=( - # ["doubleband"] - # ), - padding_loc="auto", - ) - - plot_tools.add_cms_decor(ax, args.cmsDecor, data=True, loc=args.logoPos) - - outname = f"summary_2D_{name}" - if args.postfix: - outname += f"_{args.postfix}" - plot_tools.save_pdf_and_png(outdir, outname) - - output_tools.write_index_and_log( - outdir, - outname, - analysis_meta_info={"CombinetfOutput": meta["meta_info"]}, - args=args, - ) - - -if output_tools.is_eosuser_path(args.outpath) and args.eoscp: - output_tools.copy_to_eos(args.outpath, args.outfolder) diff --git a/scripts/plotting/makeScetlibComparisons.py b/scripts/plotting/makeScetlibComparisons.py index 59c1d9108..1036e3e00 100644 --- a/scripts/plotting/makeScetlibComparisons.py +++ b/scripts/plotting/makeScetlibComparisons.py @@ -5,7 +5,7 @@ import lz4.frame from matplotlib import cm -from wremnants.postprocessing import theory_tools +from wremnants.production import helicity_utils from wremnants.utilities import parsing from wremnants.utilities.io_tools import input_tools from wums import boostHistHelpers as hh @@ -45,7 +45,7 @@ "sigma4_ptV": { "hist": "helicity_moments_scale", "axis": "ptVgen", - "action": lambda x: theory_tools.scale_angular_moments( + "action": lambda x: helicity_utils.helicity_xsec_to_angular_coeffs( x[ { "muRfact": 1.0j, @@ -58,7 +58,7 @@ "sigma4_absYV": { "hist": "helicity_moments_scale", "axis": "absYVgen", - "action": lambda x: theory_tools.scale_angular_moments( + "action": lambda x: helicity_utils.helicity_xsec_to_angular_coeffs( x[ { "muRfact": 1.0j, diff --git a/scripts/rabbit/regen_smoothing_params.py b/scripts/rabbit/regen_smoothing_params.py new file mode 100644 index 000000000..cb0ceeb43 --- /dev/null +++ b/scripts/rabbit/regen_smoothing_params.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +"""Dump per-region smoothing polynomial coefficients for SmoothExtendedABCD. + +Refit the linearized smoothing polynomial from a nominal fake histogram and +save the per-region polynomial coefficients in the SmoothExtendedABCD +power-series basis. The output can be loaded as initial parameter values in a +rabbit fit via ``params:PATH`` in the ``--paramModel SmoothExtendedABCDIsoMT`` +CLI token. + +Typical use:: + + python scripts/rabbit/regen_smoothing_params.py \\ + -i mw_with_mu_eta_pt_scetlib_dyturbo.hdf5 \\ + -o /path/to/params.hdf5 +""" + +import os + +import h5py +import numpy as np + +from wremnants.postprocessing.datagroups.datagroups import Datagroups +from wremnants.postprocessing.histselections import FakeSelectorSimpleABCD +from wremnants.postprocessing.regression import Regressor +from wremnants.utilities import common, parsing +from wums import ioutils, logging, output_tools + +logger = logging.child_logger(__name__) + + +def make_parser(): + parser = parsing.base_parser() + parser.description = __doc__ + parser.add_argument( + "-i", + "--inputFile", + required=True, + type=str, + help="Input HDF5 histogram file (output of a histmaker).", + ) + parser.add_argument( + "-o", + "--outpath", + required=True, + type=str, + help="Output path for the params HDF5 file (.hdf5 appended if missing).", + ) + parser.add_argument( + "--inputBaseName", + default="nominal", + type=str, + help="Name of the nominal histogram inside the input file.", + ) + parser.add_argument( + "--fakerateAxes", + nargs="+", + default=["eta", "pt", "charge"], + help="Axes for the fakerate binning.", + ) + parser.add_argument( + "--fakeEstimation", + type=str, + default="extended1D", + choices=["simple", "extrapolate", "extended1D", "extended2D"], + help="Fake estimation mode (must match what will be used in setupRabbit).", + ) + parser.add_argument( + "--fakeSmoothingMode", + type=str, + default="full", + choices=FakeSelectorSimpleABCD.smoothing_modes, + help="Smoothing mode for fake estimate.", + ) + parser.add_argument( + "--fakeSmoothingOrder", + type=int, + default=3, + help="Polynomial order for the spectrum smoothing.", + ) + parser.add_argument( + "--fakeSmoothingPolynomial", + type=str, + default="chebyshev", + choices=Regressor.polynomials, + help="Polynomial type for the spectrum smoothing.", + ) + parser.add_argument( + "--excludeProcGroups", + type=str, + nargs="*", + default=["QCD"], + help="Process groups to exclude when building Datagroups.", + ) + parser.add_argument( + "--filterProcGroups", + type=str, + nargs="*", + default=None, + help="If set, keep only these process groups when building Datagroups.", + ) + return parser + + +def dump_smoothing_params( + outpath, fakeselector, datagroups, inputBaseName, meta_data_dict=None, postfix="" +): + """ + Re-fit the linearized smoothing polynomial for the nominal fake histogram + (without the region reduction step) and save the per-region polynomial + coefficients in the SmoothExtendedABCD power-series basis to *outpath*. + + The saved array has shape (5 * n_outer * (order+1),) with the layout + [A_params, B_params, C_params, Ax_params, Bx_params], matching the + internal layout expected by SmoothExtendedABCD as initial_params. + + Flat ABCD index ordering for the 5 regions with signal_region=False + (FakeSelector1DExtendedABCD, with flow=True, after y-axis flip so tight iso is last): + 0=Ax (low mt, fail iso), 1=Bx (low mt, pass iso), + 2=A (mid mt, fail iso), 3=B (mid mt, pass iso), + 4=C (signal mt, fail iso) ← application region, rabbit's free parameter + Note: signal_region=False drops flat index 5 = D (signal mt, pass iso = signal + region), which is rabbit's predicted region. + + Histselections ↔ SmoothExtendedABCD model name mapping: + histsel "application region" (signal mt + fail iso) = C_model (free) + histsel "signal region" (signal mt + pass iso) = D_model (predicted) + + Mapping to model order [A=0, B=1, C=2, Ax=3, Bx=4]: + model_A ← sideband flat 2 + model_B ← sideband flat 3 + model_C ← sideband flat 4 + model_Ax ← sideband flat 0 + model_Bx ← sideband flat 1 + """ + g = datagroups.fakeName + # Build the combined fake histogram (data - prompt MC) the same way the normal + # histogram loading does, but without applying the histselector. + datagroups.loadHistsForDatagroups( + inputBaseName, syst="", procsToRead=[g], label="_dump_tmp", applySelection=False + ) + h_fakes = datagroups.groups[g].hists["_dump_tmp"] + + # Single call with signal_region=False: returns 5 regions [Ax=0, Bx=1, A=2, B=3, C=4] + # The dropped 6th flat element (D = signal mt + pass iso) is the predicted region. + fakeselector.calculate_fullABCD_smoothed(h_fakes, signal_region=False) + if not hasattr(fakeselector, "_params_before_reduce"): + raise RuntimeError( + "dump_smoothing_params: _params_before_reduce not found on fakeselector. " + "Ensure fakeSmoothingMode='full' is used." + ) + # Shape: (*outer_dims, 5, order+1) — indices [Ax=0, Bx=1, A=2, B=3, C=4] + params_5d = fakeselector._params_before_reduce.copy() + + reg = fakeselector.spectrum_regressor + order = reg.order + + # Flatten outer dims → (n_outer_flat, n_abcd, order+1) + outer_shape = params_5d.shape[:-2] + n_outer_flat = int(np.prod(outer_shape)) + params_5d_flat = params_5d.reshape(n_outer_flat, 5, order + 1) + + # Assemble the 5 model regions in model order [A, B, C, Ax, Bx] + # A ← flat 2, B ← flat 3, C ← flat 4, Ax ← flat 0, Bx ← flat 1 + params_model = np.stack( + [ + params_5d_flat[:, 2, :], # A (mid mt, fail iso) + params_5d_flat[:, 3, :], # B (mid mt, pass iso) + params_5d_flat[:, 4, :], # C (signal mt, fail iso) = application region + params_5d_flat[:, 0, :], # Ax (low mt, fail iso) + params_5d_flat[:, 1, :], # Bx (low mt, pass iso) + ], + axis=1, + ) # (n_outer, 5, order+1) + + # The regressor was fit with polynomial=chebyshev, normalised x to [-1,1] + # via x_cheby = 2*(x - center)/(max-min). + # The SmoothExtendedABCD model uses a power-series basis normalised to [0,1] + # via x_norm = (x - x_min)/(x_max - x_min). + # Conversion: evaluate the Chebyshev polynomial at the model's x_norm points + # and refit with the power-series Vandermonde. + smooth_ax = h_fakes.axes[fakeselector.smoothing_axis_name] + x_centers = np.array(smooth_ax.centers) + bin_widths = np.array(smooth_ax.widths) + x_min_model = float(x_centers.min()) + x_max_model = float(x_centers.max()) + x_norm = ( + (x_centers - x_min_model) / (x_max_model - x_min_model) + if x_max_model > x_min_model + else np.zeros_like(x_centers) + ) + + # Transform x_centers to the regressor's Chebyshev domain [-1, 1] + x_cheby = reg.transform_x(x_centers) # shape (n_smooth,) + + # Evaluate Chebyshev polynomials T_k at x_cheby: shape (n_smooth, order+1) + n_smooth = len(x_cheby) + T = np.zeros((n_smooth, order + 1)) + T[:, 0] = 1.0 + if order >= 1: + T[:, 1] = x_cheby + for k in range(2, order + 1): + T[:, k] = 2.0 * x_cheby * T[:, k - 1] - T[:, k - 2] + + # Evaluate regressor log-rate at each pt bin: (n_outer, 5, n_smooth) + log_rate = np.einsum("oak,sk->oas", params_model, T) + + # The regressor fits log(data_yield / bin_width). + # The model (with mc = ones) predicts yield = exp(-q), so: + # q(x) = -log(data_yield) = -(log_rate + log(bin_width)) + log_yield = log_rate + np.log(bin_widths) # broadcast over (n_outer, 5, n_smooth) + target = -log_yield # q values to represent as a power series + + # Build power-series Vandermonde for x_norm: shape (n_smooth, order+1) + V = np.column_stack([x_norm**k for k in range(order + 1)]) + + # Solve V @ q_coeffs = target for each (outer, abcd) in least-squares sense + target_mat = target.transpose(2, 0, 1).reshape(n_smooth, n_outer_flat * 5) + q_mat = np.linalg.lstsq(V, target_mat, rcond=None)[0] # (order+1, n_outer*5) + q_model = q_mat.T.reshape(n_outer_flat, 5, order + 1) # (n_outer, 5, order+1) + + # Flatten to model's layout [A_block, B_block, C_block, Ax_block, Bx_block] + # Each block: n_outer × (order+1) in C-order + params_out = q_model.transpose(1, 0, 2).reshape(-1) # (5 * n_outer * (order+1),) + if outpath and not os.path.isdir(outpath): + os.makedirs(outpath) + + outfile = f"{outpath}/params" + if postfix: + outfile += f"_{postfix}" + + outfile += ".hdf5" + + with h5py.File(outfile, mode="w") as f: + f.create_dataset("params", data=params_out) + f.create_dataset("order", data=np.array(order)) + f.create_dataset( + "smoothing_axis_name", + data=np.array(fakeselector.smoothing_axis_name, dtype=h5py.string_dtype()), + ) + f.create_dataset("n_outer", data=np.array(n_outer_flat)) + f.create_dataset("outer_shape", data=np.array(outer_shape, dtype="int64")) + if meta_data_dict is not None: + ioutils.pickle_dump_h5py("meta", meta_data_dict, f) + + logger.info( + f"Saved smoothing initial params to {outfile} " + f"(shape {params_out.shape}, n_outer={n_outer_flat}, n_abcd=5, order={order})" + ) + + +def main(): + parser = make_parser() + args = parser.parse_args() + logging.setup_logger(__file__, args.verbose, args.noColorLogger) + + dg = Datagroups( + args.inputFile, + excludeGroups=args.excludeProcGroups if args.excludeProcGroups else None, + filterGroups=args.filterProcGroups if args.filterProcGroups else None, + ) + + dg.fakerate_axes = args.fakerateAxes + dg.set_histselectors( + dg.getNames(), + args.inputBaseName, + mode=args.fakeEstimation, + smoothing_mode=args.fakeSmoothingMode, + smoothingOrderSpectrum=args.fakeSmoothingOrder, + smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, + mcCorr=None, + integrate_x=True, + forceGlobalScaleFakes=False, + abcdExplicitAxisEdges={}, + fakeTransferAxis="", + fakeTransferCorrFileName=None, + histAxesRemovedBeforeFakes=[], + ) + + fakeselector = dg.groups[dg.fakeName].histselector + logger.info(f"fakeselector type: {type(fakeselector).__name__}") + + meta_data_dict = { + "meta_info": output_tools.make_meta_info_dict( + args=args, + wd=common.base_dir, + ), + "meta_info_input": dg.getMetaInfo(), + } + + dump_smoothing_params( + args.outpath, + fakeselector, + dg, + args.inputBaseName, + meta_data_dict=meta_data_dict, + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/rabbit/setupRabbit.py b/scripts/rabbit/setupRabbit.py index 4132ab077..4c3705694 100644 --- a/scripts/rabbit/setupRabbit.py +++ b/scripts/rabbit/setupRabbit.py @@ -208,6 +208,11 @@ def make_subparsers(parser, argv=None): action="store_true", help="Include underflow/overflow in masked channels (for iterative unfolding)", ) + parser.add_argument( + "--unfoldingFullPhaseSpace", + action="store_true", + help="Include masked channel with extrapolation to the full phase space", + ) parser.add_argument( "--unfoldSimultaneousWandZ", action="store_true", @@ -215,8 +220,10 @@ def make_subparsers(parser, argv=None): ) parser.add_argument( "--constrainNOIs", - action="store_true", - help="Constrain NOI variation", + type=str, + default=[], + nargs="+", + help="Constrain NOI variation for given groups", ) parser = parsing.set_parser_default(parser, "massVariation", 10) @@ -576,7 +583,15 @@ def make_parser(parser=None, argv=None): type=str, help="Set the mode for the fake estimation", default="extended1D", - choices=["mc", "closure", "simple", "extrapolate", "extended1D", "extended2D"], + choices=[ + "none", + "mc", + "closure", + "simple", + "extrapolate", + "extended1D", + "extended2D", + ], ) parser.add_argument( "--forceGlobalScaleFakes", @@ -709,15 +724,20 @@ def make_parser(parser=None, argv=None): help="Scale the PDF hessian uncertainties by this factor (by default take the value in the pdfInfo map)", ) parser.add_argument( - "--pdfUncFromCorr", + "--pdfUncFromUncorr", action="store_true", - help="Take PDF uncertainty from correction hist (requires having run that correction)", + help="Take PDF uncertainty from uncorrected hist (by default it reads it from the correction hist, but requires having run that correction)", ) parser.add_argument( "--asUncFromUncorr", action="store_true", help="Take alpha_S uncertainty from uncorrected hist (by default it reads it from the correction hist, but requires having run that correction)", ) + parser.add_argument( + "--minnloUncFromUncorr", + action="store_true", + help="Take minnlo muR-muF uncertainty from uncorrected hist (by default it reads it from the correction hist, but requires having run that correction)", + ) parser.add_argument( "--scaleMinnloScale", default=1.0, @@ -1069,6 +1089,7 @@ def setup( fitresult_data=None, unfolding_scalemap=None, base_group=None, + unfolding_with_flow=False, ): isUnfolding = args.analysisMode == "unfolding" isTheoryAgnostic = args.analysisMode in [ @@ -1304,7 +1325,7 @@ def setup( member_filter=lambda x: not x.name.endswith("OOA"), fitvar=fitvar, histToReadAxes=args.unfoldingLevel, - disable_flow_fit_axes=not (datagroups.xnorm and args.unfoldingWithFlow), + disable_flow_fit_axes=not (datagroups.xnorm and unfolding_with_flow), ) # out of acceptance contribution @@ -1321,38 +1342,49 @@ def setup( if args.qcdProcessName: datagroups.fakeName = args.qcdProcessName - abcdExplicitAxisEdges = {} - if len(args.ABCDedgesByAxis): - for item in args.ABCDedgesByAxis: - ax_name, ax_edges = item.split("=") - abcdExplicitAxisEdges[ax_name] = [float(x) for x in ax_edges.split(",")] - if wmass and not datagroups.xnorm: - datagroups.fakerate_axes = args.fakerateAxes - # datagroups.fakeTransferAxis = args.fakeTransferAxis if args.fakeTransferAxis in args.fakerateAxes else "" - # datagroups.fakeTransferCorrFileName = args.fakeTransferCorrFileName - histselector_kwargs = dict( - mode=args.fakeEstimation, - smoothing_mode=args.fakeSmoothingMode, - smoothingOrderSpectrum=args.fakeSmoothingOrder, - smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, - mcCorr=args.fakeMCCorr, - integrate_x="mt" not in fitvar, - forceGlobalScaleFakes=args.forceGlobalScaleFakes, - abcdExplicitAxisEdges=abcdExplicitAxisEdges, - fakeTransferAxis=( - args.fakeTransferAxis - if args.fakeTransferAxis in args.fakerateAxes - else "" - ), - fakeTransferCorrFileName=args.fakeTransferCorrFileName, - histAxesRemovedBeforeFakes=( - [str(x[0]) for x in args.presel] if args.presel else [] - ), - ) - datagroups.set_histselectors( - datagroups.getNames(), inputBaseName, **histselector_kwargs - ) + if args.fakeEstimation not in [None, "none"]: + abcdExplicitAxisEdges = {} + if len(args.ABCDedgesByAxis): + for item in args.ABCDedgesByAxis: + ax_name, ax_edges = item.split("=") + abcdExplicitAxisEdges[ax_name] = [ + float(x) for x in ax_edges.split(",") + ] + + datagroups.fakerate_axes = args.fakerateAxes + # datagroups.fakeTransferAxis = args.fakeTransferAxis if args.fakeTransferAxis in args.fakerateAxes else "" + # datagroups.fakeTransferCorrFileName = args.fakeTransferCorrFileName + histselector_kwargs = dict( + mode=args.fakeEstimation, + smoothing_mode=args.fakeSmoothingMode, + smoothingOrderSpectrum=args.fakeSmoothingOrder, + smoothingPolynomialSpectrum=args.fakeSmoothingPolynomial, + mcCorr=args.fakeMCCorr, + integrate_x="mt" not in fitvar, + forceGlobalScaleFakes=args.forceGlobalScaleFakes, + abcdExplicitAxisEdges=abcdExplicitAxisEdges, + fakeTransferAxis=( + args.fakeTransferAxis + if args.fakeTransferAxis in args.fakerateAxes + else "" + ), + fakeTransferCorrFileName=args.fakeTransferCorrFileName, + histAxesRemovedBeforeFakes=( + [str(x[0]) for x in args.presel] if args.presel else [] + ), + ) + datagroups.set_histselectors( + datagroups.getNames(), inputBaseName, **histselector_kwargs + ) + else: + from wremnants.postprocessing import histselections as sel + + g = datagroups.fakeName + members = datagroups.groups[g].members[:] + if len(members) == 0: + raise RuntimeError(f"No member found for group {g}") + datagroups.groups[g].histselector = sel.OnesSelector() logger.debug(f"Making datacards with these processes: {datagroups.getProcesses()}") @@ -1555,12 +1587,14 @@ def setup( args.pseudoDataProcsRegexp, ) + if datagroups.xnorm and isUnfolding and unfolding_with_flow: + masked_flow_axes = ["ptGen", "ptVGen"] + if "_full" in datagroups.channel: + masked_flow_axes.extend(["absEtaGen", "absYVGen"]) + else: + masked_flow_axes = [] + if args.correlateSignalMCstat and datagroups.xnorm: - masked_flow_axes = ( - ["ptGen", "ptVGen"] - if (datagroups.xnorm and isUnfolding and args.unfoldingWithFlow) - else [] - ) rabbit_helpers.add_nominal_with_correlated_BinByBinStat( datagroups, wmass, @@ -1569,20 +1603,21 @@ def setup( masked_flow_axes=masked_flow_axes, ) else: + if isUnfolding: + bin_by_bin_stat_scale = 1.0 + elif wmass: + bin_by_bin_stat_scale = args.binByBinStatScaleForMW + elif dilepton: + bin_by_bin_stat_scale = args.binByBinStatScaleForDilepton + else: + bin_by_bin_stat_scale = 1.0 + datagroups.addNominalHistograms( real_data=args.realData, - bin_by_bin_stat_scale=( - args.binByBinStatScaleForMW - if wmass - else args.binByBinStatScaleForDilepton if dilepton else 1.0 - ), + bin_by_bin_stat_scale=bin_by_bin_stat_scale, fitresult_data=fitresult_data, masked=datagroups.xnorm and fitresult_data is None, - masked_flow_axes=( - ["ptGen", "ptVGen"] - if (datagroups.xnorm and isUnfolding and args.unfoldingWithFlow) - else [] - ), + masked_flow_axes=masked_flow_axes, ) if stat_only and isUnfolding and not isPoiAsNoi: @@ -1621,6 +1656,8 @@ def setup( decorwidth=decorwidth, ) + add_theory_uncertainties = not stat_only and not args.noTheoryUnc + # this appears within doStatOnly because technically these nuisances should be part of it if isPoiAsNoi: if isTheoryAgnostic: @@ -1642,6 +1679,11 @@ def setup( theoryAgnostic_helper.add_theoryAgnostic_uncertainty() elif isUnfolding: + signal_groups = datagroups.expandProcesses("signal_samples") + if len(signal_groups) != 1: + raise NotImplementedError( + f"noi variations currently only works for 1 signal group but got {len(signal_groups)}" + ) rabbit_helpers.add_noi_unfolding_variations( datagroups, label, @@ -1651,7 +1693,7 @@ def setup( scale_norm=args.scaleNormXsecHistYields, gen_level=args.unfoldingLevel, fitresult=unfolding_scalemap, - constrained=args.constrainNOIs, + constrained=signal_groups[0] in args.constrainNOIs, ) if args.muRmuFPolVar and not isTheoryAgnosticPolVar: @@ -1667,7 +1709,10 @@ def setup( if args.correlateSignalMCstat and datagroups.xnorm and args.fitresult is None: # use variations from reco histogram and apply them to xnorm - source = ("nominal", f"{inputBaseName}_yieldsUnfolding_theory_weight") + source = ( + "nominal", + f"{inputBaseName.replace('_full','')}_yieldsUnfolding_theory_weight", + ) # need to find the reco variables that correspond to the reco fit, reco fit must be done with variables in same order as gen bins gen2reco = { "qGen": "charge", @@ -1699,7 +1744,7 @@ def setup( ) if ("zwidth" in args.noi and not wmass) or ( - not datagroups.xnorm and not stat_only and not args.noTheoryUnc + not datagroups.xnorm and add_theory_uncertainties ): # Variation from EW fit (mostly driven by alphas unc.) datagroups.addSystematic( @@ -1719,7 +1764,7 @@ def setup( ) # TODO: move closer to W mass uncertainty? - if wmass and ("wwidth" in args.noi or (not stat_only and not args.noTheoryUnc)): + if wmass and ("wwidth" in args.noi or add_theory_uncertainties): rabbit_helpers.add_W_width_uncertainty( datagroups, signal_samples_forMass, @@ -1728,7 +1773,7 @@ def setup( label=label, ) - if "sin2thetaW" in args.noi or (not stat_only and not args.noTheoryUnc): + if "sin2thetaW" in args.noi or add_theory_uncertainties: datagroups.addSystematic( "sin2thetaWeightZ", name=f"Sin2thetaZ0p00003", @@ -1745,7 +1790,7 @@ def setup( passToFakes=passSystToFakes, ) - if "alphaS" in args.noi or (not stat_only and not args.noTheoryUnc): + if "alphaS" in args.noi or add_theory_uncertainties: theorySystSamples = ["signal_samples_inctau"] if wmass: if args.helicityFitTheoryUnc: @@ -1769,8 +1814,9 @@ def setup( np_model=args.npUnc, tnp_scale=args.scaleTNP, mirror_tnp=False, - pdf_from_corr=args.pdfUncFromCorr, + pdf_from_corr=not args.pdfUncFromUncorr, as_from_corr=not args.asUncFromUncorr, + minnlo_from_corr=not args.minnloUncFromUncorr, scale_pdf_unc=args.scalePdf, samples=theorySystSamples, minnlo_unc=args.minnloScaleUnc, @@ -1778,16 +1824,14 @@ def setup( from_hels=not args.noTheoryCorrsViaHelicities, theory_symmetrize=args.symmetrizeTheoryUnc, pdf_symmetrize=args.symmetrizePdfUnc, + helicity_fit_unc=args.helicityFitTheoryUnc, ) theory_helper.add_pdf_alphas_variation( noi="alphaS" in args.noi, ) - - if not stat_only and not args.noTheoryUnc: - theory_helper.add_all_theory_unc( - helicity_fit_unc=args.helicityFitTheoryUnc, - ) + if add_theory_uncertainties: + theory_helper.add_all_theory_unc() if stat_only: # print a card with only mass weights @@ -1796,7 +1840,7 @@ def setup( ) return datagroups - if not args.noTheoryUnc: + if add_theory_uncertainties: if wmass and not datagroups.xnorm: if args.massConstraintModeZ == "automatic": constrainMassZ = True @@ -3306,8 +3350,25 @@ def outputFolderName(outfolder, datagroups, doStatOnly, postfix): stat_only=args.doStatOnly or args.doStatOnlyMasked, channel=f"{channel}_masked", unfolding_scalemap=unfolding_scalemap, + unfolding_with_flow=args.unfoldingWithFlow, ) + if args.unfoldingFullPhaseSpace: + # add masked channel in full phase space + datagroups_xnorm = setup( + writer, + args, + ifile, + f"{args.unfoldingLevel}_full", + iLumiScale, + genvar, + genvar=genvar, + stat_only=args.doStatOnly or args.doStatOnlyMasked, + channel=f"{channel}_full_masked", + unfolding_scalemap=unfolding_scalemap, + unfolding_with_flow=True, + ) + if args.unfoldSimultaneousWandZ and datagroups.mode == "w_mass": # for simultaneous unfolding of W and Z we need to add the noi variations on the Z background in the single lepton channel diff --git a/wremnants/postprocessing/histselections.py b/wremnants/postprocessing/histselections.py index 117e9a3a5..2ccc1ef9a 100644 --- a/wremnants/postprocessing/histselections.py +++ b/wremnants/postprocessing/histselections.py @@ -152,9 +152,9 @@ def __init__( # or if abcdExplicitAxisEdges is provided from outside self.abcd_thresholds = { "pt": [26, 28, 30], - "mt": [0, 40] if self.ABCDmode == "simple" else [0, 20, 40], + "mt": [0, 40, np.inf] if self.ABCDmode == "simple" else [0, 20, 40, np.inf], "iso": [0, 4, 8, 12], - "relIso": [0, 0.15, 0.3, 0.45], + "relIso": [0, 0.15, np.inf], "relJetLeptonDiff": [0, 0.2, 0.35, 0.5], "dxy": [0, 0.01, 0.02, 0.03], } @@ -403,6 +403,23 @@ def get_hist(self, h, is_nominal=False): return self.get_hist_passX_passY(h) +class OnesSelector(object): + """ + Don't actually select anything, just set the values of the histogram to ones + This is e.g. to perform the ABCD method in the simultaneous fit of all regions to data + """ + + def __init__(self, *args, **kwargs): + pass + + # signal region selection + def get_hist(self, h, is_nominal=False): + h_new = h.copy() + h_new.values()[...] = np.ones_like(h.values()) + h_new.variances()[...] = np.zeros_like(h.values()) + return h_new + + class FakeSelectorSimpleABCD(HistselectorABCD): # supported smoothing mode choices smoothing_modes = ["binned", "fakerate", "hybrid", "full"] @@ -811,6 +828,11 @@ def smoothen( logger.debug("Reduce is " + str(reduce)) + # Always save params before (optional) reduction so the dump can access + # per-region polynomial coefficients for both signal_region=False and + # signal_region=True calls. + self._params_before_reduce = regressor.params.copy() + if reduce: # add up parameters from smoothing of individual sideband regions if type(self) == FakeSelectorSimpleABCD: @@ -1552,13 +1574,15 @@ def __init__( # min and max (in application region) for transformation of bernstain polynomials into interval [0,1] axis_x_min = h[{self.name_x: self.sel_x}].axes[self.name_x].edges[0] if self.name_x == "mt": - # mt does not have an upper bound, cap at 100 + # mt may extend to infinity; use the last finite edge for the regressor edges = ( self.rebin_x if self.rebin_x is not None else h.axes[self.name_x].edges ) - axis_x_max = extend_edges(h.axes[self.name_x].traits, edges)[-1] + all_edges = extend_edges(h.axes[self.name_x].traits, edges) + finite_edges = all_edges[np.isfinite(all_edges)] + axis_x_max = finite_edges[-1] if len(finite_edges) > 0 else 100.0 elif self.name_x in ["iso", "relIso", "relJetLeptonDiff", "dxy"]: # iso and dxy have a finite lower and upper bound in the application region axis_x_max = self.abcd_thresholds[self.name_x][1] diff --git a/wremnants/postprocessing/rabbit_helpers.py b/wremnants/postprocessing/rabbit_helpers.py index a96a17e1a..ca96bb841 100644 --- a/wremnants/postprocessing/rabbit_helpers.py +++ b/wremnants/postprocessing/rabbit_helpers.py @@ -587,13 +587,16 @@ def add_explicit_BinByBinStat( } # integrate out gen axes for bin by bin uncertainties integration_var["acceptance"] = hist.sum + if "_full" in datagroups.channel: + sel = {"acceptance": hist.sum} + else: + sel = {"acceptance": True} + hnom = datagroups.groups[proc].hists[datagroups.nominalName] hnom = hnom.project(*datagroups.gen_axes_names) hvar = datagroups.groups[proc].hists["syst"] - hvar_acc = action_sel(hvar, {"acceptance": True}).project( - *recovar, *datagroups.gen_axes_names - ) + hvar_acc = action_sel(hvar, sel).project(*recovar, *datagroups.gen_axes_names) hvar_reco = action_sel(hvar, integration_var) rel_unc = np.sqrt(hvar_reco.variances(flow=True)) / hvar_reco.values(flow=True) @@ -602,10 +605,17 @@ def add_explicit_BinByBinStat( hvar_acc, rel_unc[..., *[np.newaxis] * len(datagroups.gen_axes_names)] ) + if hasattr(datagroups, "axes_disable_flow") and len(datagroups.axes_disable_flow): + hvar = hh.disableFlow(hvar, datagroups.axes_disable_flow) + # disable for for reco axes + hvar = hh.disableFlow( + hvar, [n for n in hvar.axes.name if n not in datagroups.gen_axes_names] + ) + datagroups.writer.add_beta_variations( hvar, proc, - source_channel=datagroups.channel.replace("_masked", ""), + source_channel=datagroups.channel.replace("_masked", "").replace("_full", ""), dest_channel=datagroups.channel, ) @@ -631,8 +641,13 @@ def add_nominal_with_correlated_BinByBinStat( sumFakesPartial=True, ) + if base_name.endswith("_full"): + sel = {"acceptance": hist.sum} + else: + sel = {"acceptance": True} + # load generator level nominal - gen_name = f"{base_name}_yieldsUnfolding_theory_weight" + gen_name = f"{base_name.replace('_full','')}_yieldsUnfolding_theory_weight" datagroups.loadHistsForDatagroups( baseName="nominal", syst=gen_name, @@ -644,12 +659,11 @@ def add_nominal_with_correlated_BinByBinStat( for proc in datagroups.predictedProcesses(): logger.info(f"Add process {proc} in channel {datagroups.channel}") - # nominal histograms of prediction norm_proc_hist_reco = datagroups.groups[proc].hists[gen_name] norm_proc_hist = datagroups.groups[proc].hists[datagroups.nominalName] - norm_proc_hist_reco = action_sel(norm_proc_hist_reco, {"acceptance": True}) + norm_proc_hist_reco = action_sel(norm_proc_hist_reco, sel) if norm_proc_hist_reco.axes.name != datagroups.fit_axes: norm_proc_hist_reco = norm_proc_hist_reco.project(*datagroups.fit_axes) @@ -877,9 +891,6 @@ def add_noi_unfolding_variations( poi_axes_syst = [f"_{n}" for n in poi_axes] if datagroups.xnorm else poi_axes[:] noi_args = dict( - histname=( - gen_level if datagroups.xnorm else f"nominal_{gen_level}_yieldsUnfolding" - ), name=f"nominal_{gen_level}_yieldsUnfolding", baseName=f"{label}_", group=f"normXsec{label}", @@ -913,21 +924,51 @@ def add_noi_unfolding_variations( if datagroups.xnorm: - def make_poi_xnorm_variations(h, poi_axes, poi_axes_syst, norm, h_scale=None): - h = hh.disableFlow( - h, - [ - "absYVGen", - "absEtaGen", - ], - ) + def make_poi_xnorm_variations( + hVar, hNom, poi_axes, poi_axes_syst, norm, h_scale=None + ): + if "_full" not in datagroups.channel: + # include flow in rapidity for total phase space measurement + hNom = hh.disableFlow( + hNom, + [ + "absYVGen", + "absEtaGen", + ], + ) + hVar = hh.disableFlow( + hVar, + [ + "absYVGen", + "absEtaGen", + ], + ) + hVar = hh.expand_hist_by_duplicate_axes( - h, poi_axes[::-1], poi_axes_syst[::-1] + hVar, poi_axes[::-1], poi_axes_syst[::-1] ) + if "_full" in datagroups.channel: + # Do not assign unconstrained parameters in these rapidity flow bins. i.e. disable flow of expanded axes + hVar = hh.disableFlow( + hVar, + [ + "_absYVGen", + "_absEtaGen", + ], + ) + # set the default scaling values for those to 1 + h_scale = hh.setFlow( + h_scale, + ["absYVGen", "absEtaGen"], + under=False, + over=True, + default_values=1.0, + ) + if h_scale is not None: hVar = hh.multiplyHists(hVar, h_scale) - return hh.addHists(h, hVar, scale2=norm) + return hh.addHists(hNom, hVar, scale2=norm) if scalemap is None: scalemap = get_scalemap( @@ -936,11 +977,14 @@ def make_poi_xnorm_variations(h, poi_axes, poi_axes_syst, norm, h_scale=None): gen_level, rename_axes={o: n for o, n in zip(poi_axes, poi_axes_syst)}, ) - + nominalName = datagroups.nominalName.replace("_full", "") datagroups.addSystematic( **noi_args, + histname=nominalName, + nominalName=nominalName, systAxesFlow=[f"_{n}" for n in poi_axes if n in poi_axes_flow], action=make_poi_xnorm_variations, + actionRequiresNomi=True, actionArgs=dict( poi_axes=poi_axes, poi_axes_syst=poi_axes_syst, @@ -971,6 +1015,7 @@ def make_poi_variations(h, poi_axes, norm, h_scale=None): datagroups.addSystematic( **noi_args, + histname=f"nominal_{gen_level}_yieldsUnfolding", systAxesFlow=[n for n in poi_axes if n in poi_axes_flow], preOpMap={ m.name: make_poi_variations diff --git a/wremnants/postprocessing/rabbit_theory_helper.py b/wremnants/postprocessing/rabbit_theory_helper.py index bbb579b81..292204661 100644 --- a/wremnants/postprocessing/rabbit_theory_helper.py +++ b/wremnants/postprocessing/rabbit_theory_helper.py @@ -73,7 +73,7 @@ def __init__(self, label, datagroups, args, hasNonsigSamples=False): self.datagroups = datagroups corr_hists = self.datagroups.args_from_metadata("theoryCorr") - self.corr_hist_name = (corr_hists[0] + "_Corr") if corr_hists else None + self.corr_hist_name = (corr_hists[0] + "_Corr") if corr_hists else "" self.syst_ax = "vars" self.corr_hist = None @@ -109,6 +109,7 @@ def configure( mirror_tnp=True, as_from_corr=True, pdf_from_corr=False, + minnlo_from_corr=True, pdf_operation=None, samples=[], scale_pdf_unc=-1.0, @@ -117,6 +118,7 @@ def configure( from_hels=False, theory_symmetrize="quadratic", pdf_symmetrize="quadratic", + helicity_fit_unc=False, ): self.set_resum_unc_type(resumUnc) @@ -128,11 +130,12 @@ def configure( self.tnp_scale = tnp_scale self.mirror_tnp = mirror_tnp self.pdf_from_corr = pdf_from_corr - self.as_from_corr = pdf_from_corr or as_from_corr + self.as_from_corr = as_from_corr + self.minnlo_from_corr = minnlo_from_corr self.pdf_operation = pdf_operation self.scale_pdf_unc = scale_pdf_unc self.samples = samples - self.helicity_fit_unc = False + self.helicity_fit_unc = helicity_fit_unc self.minnlo_scale = minnlo_scale self.from_hels = from_hels @@ -140,10 +143,10 @@ def configure( self.theory_symmetrize = convert_none(theory_symmetrize) self.pdf_symmetrize = convert_none(pdf_symmetrize) - def add_all_theory_unc(self, helicity_fit_unc=False): - self.helicity_fit_unc = helicity_fit_unc + def add_all_theory_unc(self): self.add_nonpert_unc(model=self.np_model) self.add_resum_unc(scale=self.tnp_scale) + self.add_fixed_order_unc() if "nnlojet" in self.corr_hist_name: self.add_stat_unc() # additional uncertainty for effect of shower and intrinsic kt on angular coeffs @@ -254,6 +257,7 @@ def add_resum_unc(self, scale=1): transition=self.transitionUnc, ) + def add_fixed_order_unc(self): if self.minnlo_unc and self.minnlo_unc not in ["none", None]: # sigma_-1 uncertainty is covered by scetlib-dyturbo uncertainties if they are used helicities_to_exclude = None if self.resumUnc == "minnlo" else [-1] @@ -266,7 +270,7 @@ def add_resum_unc(self, scale=1): # orthogonality of chebychev polynomials means that there is no # double counting with fully correlated uncertainty scale_inclusive = 1.0 - else: + elif "Pt" in self.minnlo_unc: fine_pt_binning = binning.ptV_binning[::2] nptfine = len(fine_pt_binning) - 1 scale_inclusive = np.sqrt((nptfine - 1) / nptfine) @@ -279,6 +283,8 @@ def add_resum_unc(self, scale=1): scale=self.minnlo_scale, symmetrize=self.theory_symmetrize, ) + else: + scale_inclusive = 1.0 self.add_minnlo_scale_uncertainty( sample_group, @@ -293,7 +299,6 @@ def add_minnlo_scale_uncertainty( self, sample_group, extra_name="", - use_hel_hist=True, rebin_pt=None, helicities_to_exclude=None, pt_min=None, @@ -306,40 +311,38 @@ def add_minnlo_scale_uncertainty( ) return - helicity = "Helicity" in self.minnlo_unc - pt_binned = "Pt" in self.minnlo_unc - scale_hist = ( - "qcdScale" if not (helicity or use_hel_hist) else "qcdScaleByHelicity" - ) - if "helicity" in scale_hist.lower(): - use_hel_hist = True + group_name = f"QCDscale{self.sample_label(sample_group)}" + base_name = f"{group_name}{extra_name}" - # All possible syst_axes + pt_binned = "Pt" in self.minnlo_unc # TODO: Move the axes to common and refer to axis_chargeVgen etc by their name attribute, not just # assuming the name is unchanged obs = self.datagroups.fit_axes pt_ax = "ptVgen" if "ptVgen" not in obs else "ptVgenAlt" - syst_axes = [pt_ax, "vars"] - syst_ax_labels = ["PtV", "var"] - format_with_values = ["edges", "center"] + if self.minnlo_from_corr: + scale_hist = "qcdScaleByHelicity" - group_name = f"QCDscale{self.sample_label(sample_group)}" - base_name = f"{group_name}{extra_name}" + syst_axes = ["vars"] + syst_ax_labels = ["var"] - skip_entries = [] - preop_map = {} - - # skip nominal - skip_entries.append({"vars": "nominal"}) + skip_entries = [] + # skip nominal + skip_entries.append({"vars": "nominal"}) + # skip pythia shower and kt variations since they are handled elsewhere + skip_entries.append({"vars": "pythia_shower_kt"}) - # skip pythia shower and kt variations since they are handled elsewhere - skip_entries.append({"vars": "pythia_shower_kt"}) + # All possible syst_axes + syst_axes = [pt_ax, *syst_axes] + syst_ax_labels = ["PtV", *syst_ax_labels] + format_with_values = ["edges", "center"] - if helicities_to_exclude: - for helicity in helicities_to_exclude: - skip_entries.append({"vars": f"helicity_{helicity}_Down"}) - skip_entries.append({"vars": f"helicity_{helicity}_Up"}) + if helicities_to_exclude: + for helicity in helicities_to_exclude: + skip_entries.append({"vars": f"helicity_{helicity}_Down"}) + skip_entries.append({"vars": f"helicity_{helicity}_Up"}) + else: + scale_hist = "qcdScale" # NOTE: The map needs to be keyed on the base procs not the group names, which is # admittedly a bit nasty @@ -347,9 +350,6 @@ def add_minnlo_scale_uncertainty( logger.debug(f"using {scale_hist} histogram for QCD scale systematics") logger.debug(f"expanded_samples: {expanded_samples}") - preop_map = {} - preop_args = {} - if pt_binned: signal_samples = self.datagroups.procGroups["signal_samples"] binning = np.array(rebin_pt) if rebin_pt else None @@ -382,29 +382,34 @@ def add_minnlo_scale_uncertainty( pt_idx = np.argmax(binning >= pt_min) skip_entries.extend([{pt_ax: complex(0, x)} for x in binning[:pt_idx]]) - func = ( + preop = ( syst_tools.gen_hist_to_variations if pt_ax == "ptVgenAlt" else syst_tools.hist_to_variations ) - preop_map = {proc: func for proc in expanded_samples} - preop_args["gen_axes"] = [pt_ax] - preop_args["rebin_axes"] = [pt_ax] - preop_args["rebin_edges"] = [binning] + + preop_args = { + "gen_axes": [pt_ax], + "rebin_axes": [pt_ax], + "rebin_edges": [binning], + } if pt_ax == "ptVgenAlt": preop_args["gen_obs"] = ["ptVgen"] + else: + preop = None + preop_args = {} # Skip MiNNLO unc. - if self.resumUnc and not (pt_binned or helicity): + if self.resumUnc and not (pt_binned or "Helicity" in self.minnlo_unc): logger.warning( "Without pT or helicity splitting, only the SCETlib uncertainty will be applied!" ) - else: + elif self.minnlo_from_corr: # FIXME Maybe put W and Z nuisances in the same group group_name += f"MiNNLO" self.datagroups.addSystematic( scale_hist, - preOpMap=preop_map, + preOp=preop, preOpArgs=preop_args, symmetrize=symmetrize, processes=[sample_group], @@ -425,6 +430,40 @@ def add_minnlo_scale_uncertainty( scale=scale, name=base_name, # Needed to allow it to be called multiple times ) + else: + # vary muR or muF one at a time + for var, other in [["muRfact", "muFfact"], ["muFfact", "muRfact"]]: + group_name += f"MiNNLO" + self.datagroups.addSystematic( + scale_hist, + preOp=lambda h, *args, **kwargs: ( + preop(h[{other: 1}], *args, **kwargs) + if preop + else h[{other: 1}] + ), + preOpArgs=preop_args, + symmetrize=symmetrize, + processes=[sample_group], + groups=[ + group_name, + "QCDscale", + "angularCoeffs", + "theory", + "theory_qcd", + ], + splitGroup={}, + systAxes=[var], + systNameReplace=[ + [f"{var}0p5", f"{var}Down"], + [f"{var}2", f"{var}Up"], + ], + skipEntries=[{var: 1}], + baseName=base_name + "_", + formatWithValue=["center"], + passToFakes=self.propagate_to_fakes, + scale=scale, + name=base_name, # Needed to allow it to be called multiple times + ) def add_helicity_shower_kt_uncertainty(self): # select the proper variation and project over gen pt unless it is one of the fit variables diff --git a/wremnants/postprocessing/syst_tools.py b/wremnants/postprocessing/syst_tools.py index 13ad732a0..a76336869 100644 --- a/wremnants/postprocessing/syst_tools.py +++ b/wremnants/postprocessing/syst_tools.py @@ -902,6 +902,9 @@ def hist_to_variations( # all the axes have already been projected out, nothing else to do return hist_in + if "vars" not in hist_in.axes.name: + return hist_in + nom_hist = hist_in[{"vars": 0}] nom_hist_sum = nom_hist[gen_sum_expr] diff --git a/wremnants/postprocessing/theory_fit_writer.py b/wremnants/postprocessing/theory_fit_writer.py index fb32781dc..c34b41f01 100644 --- a/wremnants/postprocessing/theory_fit_writer.py +++ b/wremnants/postprocessing/theory_fit_writer.py @@ -37,16 +37,18 @@ def _keep_systematic(self, name): return True def set_reference(self, channel, h, lumi=1.0, scale=1.0, postOp=None): + ptV_name = self.get_ptV_axis_name(h) + absYV_name = self.get_absYV_axis_name(h) self.ref[channel] = { "h": h, "lumi": lumi, "scale": scale, "postOp": postOp, - "ptV_name": self.get_ptV_axis_name(h), - "absYV_name": self.get_absYV_axis_name(h), + "ptV_name": ptV_name, + "absYV_name": absYV_name, "chargeV_name": self.get_charge_axis_name(h), - "ptV_bins": h.axes[self.get_ptV_axis_name(h)].edges, - "absYV_bins": h.axes[self.get_absYV_axis_name(h)].edges, + "ptV_bins": h.axes[ptV_name].edges, + "absYV_bins": h.axes[absYV_name].edges, } self.logger.debug("Initialized channel %s with parameters", channel) self.logger.debug(pprint.pformat(self.ref[channel])) @@ -264,13 +266,13 @@ def get_ptV_axis_name(self, h): for name in ["ptVgen", "ptVGen", "qT"]: if name in h.axes.name: return name - self.logger.debug("Did not find pT axis. Available axes: %s", h.axes.name) + raise ValueError(f"Did not find pT axis. Available axes: {list(h.axes.name)}") def get_absYV_axis_name(self, h): for name in ["absYVgen", "absYVGen", "absY"]: if name in h.axes.name: return name - self.logger.debug("Did not find absY axis. Available axes: %s", h.axes.name) + raise ValueError(f"Did not find absY axis. Available axes: {list(h.axes.name)}") def get_charge_axis_name(self, h): for name in ["chargeVgen", "charge", "qGen"]: diff --git a/wremnants/production/generator_level_definitions.py b/wremnants/production/generator_level_definitions.py index 22101262b..4676c2658 100644 --- a/wremnants/production/generator_level_definitions.py +++ b/wremnants/production/generator_level_definitions.py @@ -402,10 +402,10 @@ def define_postfsr_vars(df, mode=None): else: df = df.Alias("postfsrMET_pt", "MET_fiducialGenPt") df = df.Alias("postfsrMET_phi", "MET_fiducialGenPhi") - df = df.Define( - "postfsrPTV", - "wrem::pt_2(postfsrLep_pt, postfsrLep_phi, postfsrMET_pt, postfsrMET_phi)", - ) + df = df.Define( + "postfsrPTV", + "wrem::pt_2(postfsrLep_pt, postfsrLep_phi, postfsrMET_pt, postfsrMET_phi)", + ) df = df.Define( "postfsrMT", diff --git a/wremnants/production/systematics.py b/wremnants/production/systematics.py index 28b3d1bce..e16d1a251 100644 --- a/wremnants/production/systematics.py +++ b/wremnants/production/systematics.py @@ -31,7 +31,7 @@ def add_syst_hist( axes, cols, tensor_name=None, - tensor_axes=[], + tensor_axes=None, addhelicity=False, propagateToHelicity=False, nhelicity=6, @@ -53,7 +53,9 @@ def add_syst_hist( 2) templates corresponding to each helicity cross section differential in relevant quantities that are sensitive to polarization (e.g. cos(theta*), phi*, lepton eta) -> Reweight event by event via "helWeight_tensor" """ - if not isinstance(tensor_axes, (list, tuple)): + if tensor_axes is None: + tensor_axes = [] + elif not isinstance(tensor_axes, (list, tuple)): tensor_axes = [tensor_axes] if addhelicity: if len(tensor_axes) == 0: @@ -1347,7 +1349,7 @@ def add_theory_hists( args, dataset_name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, axes, cols, base_name="nominal", @@ -1418,21 +1420,21 @@ def add_theory_hists( if for_wmass or isZ: - if theory_helpers.get("qcdScale") is not None: + if helicity_smoothing_helpers.get("qcdScale") is not None: logger.debug(f"Make QCD scale histograms for {dataset_name}") # there is no W backgrounds for the Wlike, make QCD scale histograms only for Z # should probably remove the charge here, because the Z only has a single charge and the pt distribution does not depend on which charged lepton is selected add_qcdScaleByHelicityUnc_hist( results, df, - theory_helpers.get("qcdScale"), + helicity_smoothing_helpers.get("qcdScale"), scale_axes, scale_cols, **info, ) - if theory_helpers.get("pdf") is not None: - pdf_helpers = theory_helpers.get("pdf") + if helicity_smoothing_helpers.get("pdf") is not None: + pdf_helpers = helicity_smoothing_helpers.get("pdf") for pdf in args.pdfs: pdf_name = theory_utils.pdfMap[pdf]["name"] if pdf_name not in pdf_helpers.keys(): @@ -1453,7 +1455,7 @@ def add_theory_hists( cols, **info, ) - for pdf_name, pdf_from_corr_helper in theory_helpers.get( + for pdf_name, pdf_from_corr_helper in helicity_smoothing_helpers.get( "pdf_from_corr", {} ).items(): logger.debug( @@ -1470,7 +1472,7 @@ def add_theory_hists( **info, ) - for k, v in theory_helpers.get("alphaS", {}).items(): + for k, v in helicity_smoothing_helpers.get("alphaS", {}).items(): logger.debug( f"Make alphaS uncertainty by helicity histogram for {dataset_name} and alphaS from correction {k}" ) diff --git a/wremnants/production/theory_corrections.py b/wremnants/production/theory_corrections.py index 139515dd6..eafc03985 100644 --- a/wremnants/production/theory_corrections.py +++ b/wremnants/production/theory_corrections.py @@ -380,7 +380,9 @@ def define_pdf_columns(df, dataset_name, pdfs, noAltUnc): return df -def define_central_pdf_weight_from_helicities(df, dataset_name, pdf, theory_helpers): +def define_central_pdf_weight_from_helicities( + df, dataset_name, pdf, helicity_smoothing_helpers +): logger.info("Using PDF weights from helicities for the central PDF weight") pdf_name = theory_utils.pdfMap[pdf]["name"] @@ -390,7 +392,7 @@ def define_central_pdf_weight_from_helicities(df, dataset_name, pdf, theory_help df = df.DefinePerSample("unity", "1.") df = df.Define( tensorName, - theory_helpers["pdf_central"], + helicity_smoothing_helpers["pdf_central"], [ "massVgen", "absYVgen", @@ -430,7 +432,9 @@ def define_central_pdf_weight(df, dataset_name, pdf): ) -def define_theory_weights_and_corrs(df, dataset_name, helpers, args, theory_helpers={}): +def define_theory_weights_and_corrs( + df, dataset_name, helpers, args, helicity_smoothing_helpers={} +): if "LHEPart_status" in df.GetColumnNames(): df = generator_level_definitions.define_lhe_vars(df) @@ -455,15 +459,15 @@ def define_theory_weights_and_corrs(df, dataset_name, helpers, args, theory_help df = df.DefinePerSample("theory_weight_truncate", "10.") if ( - theory_helpers - and "pdf_central" in theory_helpers.keys() - and theory_helpers["pdf_central"] is not None + helicity_smoothing_helpers + and "pdf_central" in helicity_smoothing_helpers.keys() + and helicity_smoothing_helpers["pdf_central"] is not None ): df = define_central_pdf_weight_from_helicities( df, dataset_name, args.pdfs[0] if len(args.pdfs) >= 1 else None, - theory_helpers, + helicity_smoothing_helpers, ) else: # if no boson-parametrized weights are available df = define_central_pdf_weight( @@ -1033,19 +1037,19 @@ def make_corr_by_helicity( return corr_coeffs -def make_theory_helpers( +def make_helicity_smoothing_helpers( pdfs, theory_corr=[], procs=["Z", "W"], corrs=["qcdScale", "pdf", "pdf_from_corr", "alphaS", "pdf_central"], ): - theory_helpers_procs = {p: {} for p in procs} + helicity_smoothing_helpers_procs = {p: {} for p in procs} - for proc in theory_helpers_procs.keys(): + for proc in helicity_smoothing_helpers_procs.keys(): if "qcdScale" in corrs: - theory_helpers_procs[proc]["qcdScale"] = ( + helicity_smoothing_helpers_procs[proc]["qcdScale"] = ( make_qcd_uncertainty_helper_by_helicity( is_z=proc == "Z", rebin_ptVgen=False, @@ -1054,7 +1058,7 @@ def make_theory_helpers( ) if "pdf" in corrs: - theory_helpers_procs[proc]["pdf"] = ( + helicity_smoothing_helpers_procs[proc]["pdf"] = ( make_pdfs_uncertainties_helper_by_helicity( proc=proc, pdfs=pdfs, @@ -1062,7 +1066,7 @@ def make_theory_helpers( ) if "pdf_from_corr" in corrs: pdf_from_corrs = [x + "_Corr" for x in theory_corr if "pdfvar" in x] - theory_helpers_procs[proc]["pdf_from_corr"] = ( + helicity_smoothing_helpers_procs[proc]["pdf_from_corr"] = ( make_pdfs_from_corrs_uncertainties_helper_by_helicity( proc=proc, pdfs_from_corrs=pdf_from_corrs, @@ -1070,14 +1074,14 @@ def make_theory_helpers( ) if "alphaS" in corrs: as_vars = [x + "_Corr" for x in theory_corr if "pdfas" in x] - theory_helpers_procs[proc]["alphaS"] = ( + helicity_smoothing_helpers_procs[proc]["alphaS"] = ( make_alphaS_uncertainties_helper_by_helicity( proc=proc, as_vars=as_vars, ) ) if "pdf_central" in corrs: - theory_helpers_procs[proc]["pdf_central"] = ( + helicity_smoothing_helpers_procs[proc]["pdf_central"] = ( make_uncertainty_helper_by_helicity( proc=proc, nom=theory_utils.pdfMap[pdfs[0]]["name"], @@ -1088,7 +1092,7 @@ def make_theory_helpers( ) ) - return theory_helpers_procs + return helicity_smoothing_helpers_procs def make_qcd_uncertainty_helper_by_helicity( diff --git a/wremnants/production/unfolding_tools.py b/wremnants/production/unfolding_tools.py index b6b9b1ca9..1aaa0a5ad 100644 --- a/wremnants/production/unfolding_tools.py +++ b/wremnants/production/unfolding_tools.py @@ -41,9 +41,9 @@ def define_gen_level(df, dataset_name, gen_levels=["prefsr", "postfsr"], mode="w singlelep = mode[0] == "w" or "wlike" in mode - if "prefsr" in gen_levels: - df = generator_level_definitions.define_prefsr_vars(df) + df = generator_level_definitions.define_prefsr_vars(df) + if "prefsr" in gen_levels: # # needed for fiducial phase space definition df = df.Alias("prefsrV_mass", "massVgen") df = df.Alias("prefsrV_pt", "ptVgen") @@ -182,7 +182,7 @@ def add_xnorm_histograms( args, dataset_name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, unfolding_axes, unfolding_cols, base_name="xnorm", @@ -193,7 +193,11 @@ def add_xnorm_histograms( df_xnorm = df_xnorm.DefinePerSample("exp_weight", "1.0") df_xnorm = theory_corrections.define_theory_weights_and_corrs( - df_xnorm, dataset_name, corr_helpers, args, theory_helpers=theory_helpers + df_xnorm, + dataset_name, + corr_helpers, + args, + helicity_smoothing_helpers=helicity_smoothing_helpers, ) df_xnorm = df_xnorm.Define("xnorm", "0.5") @@ -231,7 +235,7 @@ def add_xnorm_histograms( args, dataset_name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, xnorm_axes, xnorm_cols, base_name=base_name, @@ -341,6 +345,7 @@ def rebin_pt(edges): if len(new_edges) % 2: # in case it's an odd number of edges, last two bins are overflow edges = edges[:-1] + new_edges = np.array([*edges[:2], *edges[3::2]]) return new_edges @@ -436,7 +441,7 @@ def __init__( ) def add_gen_histograms( - self, args, df, results, dataset, corr_helpers, theory_helpers={} + self, args, df, results, dataset, corr_helpers, helicity_smoothing_helpers={} ): df = define_gen_level( df, dataset.name, self.unfolding_levels, mode=self.analysis_label @@ -454,47 +459,53 @@ def add_gen_histograms( ) else: for level in self.unfolding_levels: - # add full phase space histograms for inclusive cross section - df_full = df.Filter(f"{level}V_mass > 60") - df_full = df_full.Filter(f"{level}V_mass < 120") + df = select_fiducial_space( + df, + level, + mode=self.analysis_label, + selections=self.unfolding_selections[level], + select=not self.poi_as_noi, + accept=True, + **self.cutsmap, + ) + + if self.unfolding_corr_helper: + logger.debug("Apply reweighting based on unfolded result") + df = df.Define( + "unfoldingWeight_tensor", + self.unfolding_corr_helper, + [*self.unfolding_corr_helper.hist.axes.name[:-1], "unity"], + ) + # fitresult reweighting only supported for a single unfolding level + # (enforced in __init__); use index 0 explicitly rather than relying + # on loop-variable persistence from the select_fiducial_space loop above + df = df.Define( + "central_weight", + f"{self.unfolding_levels[0]}_acceptance ? unfoldingWeight_tensor(0) : unity", + ) + + for level in self.unfolding_levels: + # add full phase space histograms for inclusive cross section, + # the mass cuts has to be preFSR to compare to SMP-20-004 + df_full = df.Filter("massVgen > 60") + df_full = df_full.Filter("massVgen < 120") add_xnorm_histograms( results, df_full, args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in self.unfolding_axes[level] if a.name != "acceptance"], [ c for c in self.unfolding_cols[level] if c != f"{level}_acceptance" ], + add_helicity_axis=self.add_helicity_axis, base_name=f"{level}_full", ) - df = select_fiducial_space( - df, - level, - mode=self.analysis_label, - selections=self.unfolding_selections[level], - select=not self.poi_as_noi, - accept=True, - **self.cutsmap, - ) - - if self.unfolding_corr_helper: - logger.debug("Apply reweighting based on unfolded result") - df = df.Define( - "unfoldingWeight_tensor", - self.unfolding_corr_helper, - [*self.unfolding_corr_helper.hist.axes.name[:-1], "unity"], - ) - df = df.Define( - "central_weight", - f"{level}_acceptance ? unfoldingWeight_tensor(0) : unity", - ) - if self.poi_as_noi: df_xnorm = df.Filter(f"{level}_acceptance") else: @@ -506,7 +517,7 @@ def add_gen_histograms( args, dataset.name, corr_helpers, - theory_helpers, + helicity_smoothing_helpers, [a for a in self.unfolding_axes[level] if a.name != "acceptance"], [ c diff --git a/wremnants/utilities/binning.py b/wremnants/utilities/binning.py index 3c027ab61..cde0eb72a 100644 --- a/wremnants/utilities/binning.py +++ b/wremnants/utilities/binning.py @@ -448,6 +448,8 @@ def get_unfolding_dilepton_axes( 2, -2.0, 2.0, underflow=False, overflow=False, name="qVGen" ) ) + elif v in ["massVGen"]: + axes.append(hist.axis.Regular(1, 60.0, 120.0, name="massVGen")) else: raise NotImplementedError(f"Unfolding dilepton axis {v} is not supported.") @@ -536,7 +538,7 @@ def make_bw_binning( # axes with only a few bins for beyond simple ABCD methods axis_isoCat = hist.axis.Variable([0, 4, 8], name="iso", underflow=False, overflow=True) axis_relIsoCat = hist.axis.Variable( - [0, 0.15, 0.3], name="relIso", underflow=False, overflow=True + [0, 0.15, 0.3, np.inf], name="relIso", underflow=False, overflow=False ) @@ -578,6 +580,8 @@ def get_binning_fakes_mt(mt_cut=40, high_mt_bins=False, fine_mt_binning=False): edges = np.append( edges, np.linspace(mt_cut + step, end, int((end - mt_cut) / step)) ) + # last bin captures everything above the highest finite edge (no overflow needed) + edges = np.append(edges, np.inf) return edges @@ -586,6 +590,8 @@ def get_binning_fakes_relIso(high_iso_bins=False): if high_iso_bins: # needed for extended 2D method edges.append(0.3) + # last bin captures everything above the highest finite edge (no overflow needed) + edges.append(np.inf) return edges diff --git a/wremnants/utilities/parsing.py b/wremnants/utilities/parsing.py index 0e75abe50..381e0996b 100644 --- a/wremnants/utilities/parsing.py +++ b/wremnants/utilities/parsing.py @@ -203,10 +203,15 @@ def __call__(self, parser, namespace, values, option_string=None): ], help="Add EW theory corrections without modifying the default theoryCorr list. Will be appended to args.theoryCorr", ) + parser.add_argument( + "--skipByHelicityCorrection", + action="store_true", + help="Apply the QCD corrections and uncertainties from MiNNLO event weights, otherwise use by-helicity reweighting", + ) parser.add_argument( "--skipHelicity", action="store_true", - help="Skip the qcdScaleByHelicity histogram (it can be huge)", + help="Skip the qcdScaleByHelicity histogram production (it can be huge)", ) parser.add_argument( "--noRecoil", action="store_true", help="Don't apply recoil correction" @@ -704,6 +709,7 @@ def __call__(self, parser, namespace, values, option_string=None): "qVGen", "ptVGen", "absYVGen", + "massVGen", "helicitySig", ], help="Generator level variable", diff --git a/wums b/wums index c7e8a11a1..23d5b2034 160000 --- a/wums +++ b/wums @@ -1 +1 @@ -Subproject commit c7e8a11a136b4b9d0df1f6fcb21d6127a4feea72 +Subproject commit 23d5b20343c022d31c30b480e6b40e2d830dfe1f