From b8dd970eb7fd48ffbd742d4202a177491852ca5f Mon Sep 17 00:00:00 2001 From: Luca Lavezzo Date: Mon, 13 Apr 2026 09:23:29 -0400 Subject: [PATCH 1/4] udpates --- scripts/corrections/make_theory_corr_ew.py | 6 ++ scripts/rabbit/feedRabbitSigmaUL.py | 71 +++++++++++++++++-- .../postprocessing/theory_variation_labels.py | 5 ++ wremnants/utilities/styles/styles.py | 2 + 4 files changed, 78 insertions(+), 6 deletions(-) diff --git a/scripts/corrections/make_theory_corr_ew.py b/scripts/corrections/make_theory_corr_ew.py index 28af0c87f..ad8093f91 100644 --- a/scripts/corrections/make_theory_corr_ew.py +++ b/scripts/corrections/make_theory_corr_ew.py @@ -45,6 +45,12 @@ type=str, help="axes to project to", ) +parser.add_argument( + "--proc", + required=True, + type=str, + help="String to append to output file name to distinguish different corrections, e.g. 'horace' or 'winhac'", +) parser.add_argument( "--outname", type=str, default=None, help="Output name for correction file" ) diff --git a/scripts/rabbit/feedRabbitSigmaUL.py b/scripts/rabbit/feedRabbitSigmaUL.py index 9503c05a8..2d326a2ad 100644 --- a/scripts/rabbit/feedRabbitSigmaUL.py +++ b/scripts/rabbit/feedRabbitSigmaUL.py @@ -272,15 +272,33 @@ def add_resummation_and_np_variations(args, writer): ) -def add_pdf_variations(args, writer, pdf_name): +def add_pdf_variations(args, writer, pdf_name, pdf_scale: float | None = None): pdf_var_key = _pdfvars_generator_name(args.predGenerator) + keys_to_load = [pdf_var_key] + + pdfInfo = theory_utils.pdf_info_map("Zmumu_2016PostVFP", args.pdfs[0]) + pdfName = pdfInfo["name"] + + if pdf_scale is not None: + scale = pdf_scale + else: + scale = pdfInfo.get("inflation_factor_alphaS", 1) + scale = pdfInfo.get("scale", 1) * scale + + pdf_var_key_ext = None + if pdfName == "pdfHERAPDF20": + pdf_var_key_ext = pdf_var_key.replace("HERAPDF20", "HERAPDF20EXT") + keys_to_load.append(pdf_var_key_ext) + corr_helpers = theory_corrections.load_corr_helpers( ["Z"], - [pdf_var_key], + keys_to_load, make_tensor=False, minnlo_ratio=False, ) + pdf_groups = [pdf_name, f"{pdf_name}NoAlphaS", "theory", "theory_qcd"] + h = corr_helpers["Z"][pdf_var_key] for ivar in range(1, len(h.axes[-1]), 2): writer.add_systematic( @@ -289,10 +307,45 @@ def add_pdf_variations(args, writer, pdf_name): PROCESS_NAME, SIGMAUL_CHANNEL, symmetrize="quadratic", - kfactor=1 / 1.645, - groups=[pdf_name, f"{pdf_name}NoAlphaS", "theory", "theory_qcd"], + kfactor=scale, + groups=pdf_groups, ) + if pdf_var_key_ext is not None: + h_ext = corr_helpers["Z"][pdf_var_key_ext] + extInfo = theory_utils.pdfMap["herapdf20ext"] + n_entries = extInfo["entries"] + n_sym = 3 + n_asym_entries = n_entries - n_sym + + ext_suffix = "HERAPDF20EXT" + + # Asymmetric hessian variations + for ivar in range(1, n_asym_entries, 2): + writer.add_systematic( + [h_ext[{"vars": ivar + 1}], h_ext[{"vars": ivar}]], + f"pdf{int((ivar + 1) / 2)}{ext_suffix}", + PROCESS_NAME, + SIGMAUL_CHANNEL, + symmetrize="quadratic", + kfactor=scale, + groups=pdf_groups, + ) + + # Symmetric hessian variations (mirrored) + n_asym_pairs = (n_asym_entries - 1) // 2 + for j, ivar in enumerate(range(n_asym_entries, n_entries)): + writer.add_systematic( + h_ext[{"vars": ivar}], + f"pdf{n_asym_pairs + j + 1}{ext_suffix}", + PROCESS_NAME, + SIGMAUL_CHANNEL, + symmetrize="quadratic", + kfactor=scale, + mirror=True, + groups=pdf_groups, + ) + def add_ew_isr_variation(args, writer): ew_isr_name = "pythiaew_ISR" @@ -398,6 +451,12 @@ def make_parser(): default="normal", help="Probability density for systematic variations.", ) + parser.add_argument( + "--pdfScale", + type=float, + default=None, + help="Manually set the scale factor for PDF variations (overrides any scale specified in the theory corrections metadata).", + ) return parser @@ -439,11 +498,11 @@ def main(): logger.info("Adding direct-theory sigmaUL systematics from %s", args.predGenerator) add_resummation_and_np_variations(args, writer) - logger.info("Adding lattice-corrected b/c quark-mass variations") + logger.info("Adding b/c quark-mass variations") add_bc_quark_mass_variations(writer) logger.info("Adding PDF variations") - add_pdf_variations(args, writer, pdf_name) + add_pdf_variations(args, writer, pdf_name, args.pdfScale) logger.info("Adding EW ISR variation") add_ew_isr_variation(args, writer) diff --git a/wremnants/postprocessing/theory_variation_labels.py b/wremnants/postprocessing/theory_variation_labels.py index 33353753f..63eed1c1e 100644 --- a/wremnants/postprocessing/theory_variation_labels.py +++ b/wremnants/postprocessing/theory_variation_labels.py @@ -16,6 +16,11 @@ "delta_lambda2-0.02", "chargeVgenNP0scetlibNPDelta_Lambda2", ], + # [ + # "delta_lambda20.105", + # "delta_lambda20.145", + # "chargeVgenNP0scetlibNPDelta_Lambda2", + # ], ] STANDARD_GAMMA_NP_UNCERTAINTIES = [ diff --git a/wremnants/utilities/styles/styles.py b/wremnants/utilities/styles/styles.py index 3a06d0781..756dfa8ca 100644 --- a/wremnants/utilities/styles/styles.py +++ b/wremnants/utilities/styles/styles.py @@ -438,6 +438,7 @@ def translate_html_to_latex(n): "alphaS": common_groups + [ "pdfCT18ZNoAlphaS", + "pdfHERAPDF20NoAlphaS", "resumTNP", "resumNonpert", "resumTransition", @@ -627,6 +628,7 @@ def translate_html_to_latex(n): "pdfMSHT20AlphaS": "αS PDF", "pdfCT18ZAlphaS": "αS PDF", "pdfCT18ZNoAlphaS": "PDF", + "pdfHERAPDF20NoAlphaS": "PDF", "pTModeling": "pTV modelling", "resum": "Resummation", "resumTNP": "TNP", From 316d6d185c34efee211c37d1e2b5bc58a52384f6 Mon Sep 17 00:00:00 2001 From: Luca Lavezzo Date: Mon, 13 Apr 2026 22:29:24 -0400 Subject: [PATCH 2/4] add support for other PDFs in the direct theory fit --- scripts/rabbit/feedRabbitSigmaUL.py | 64 +++++++++++++++---- wremnants/postprocessing/theory_fit_writer.py | 4 +- 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/scripts/rabbit/feedRabbitSigmaUL.py b/scripts/rabbit/feedRabbitSigmaUL.py index 2d326a2ad..b4e4063f5 100644 --- a/scripts/rabbit/feedRabbitSigmaUL.py +++ b/scripts/rabbit/feedRabbitSigmaUL.py @@ -181,7 +181,14 @@ def add_alphas_variation(args, writer, pdf_name): ) -def add_bc_quark_mass_variations(writer): +def add_pdf_bc_quark_mass_variations(args, writer, logger, pdf_name): + + if pdf_name == theory_utils.pdfMap["herapdf20"]["name"] and not args.noHERAPDF20EXT: + logger.info( + "Skipping PDF b/c quark-mass variations since using HERAPDF20EXT already includes them." + ) + return + corr_helpers = theory_corrections.load_corr_helpers( ["Z"], [helper_name for helper_name, *_ in BC_QUARK_MASS_VARIATIONS], @@ -191,9 +198,7 @@ def add_bc_quark_mass_variations(writer): for helper_name, nuisance_name, down_var, up_var in BC_QUARK_MASS_VARIATIONS: h = corr_helpers["Z"][helper_name] - # Use the same multiplicative construction as add_scale_systematic: - # nominal * (quark_mass_var / quark_mass_nominal). - writer.add_scale_systematic( + writer.add_shape_systematic( [ h[{"vars": up_var}], h[{"vars": down_var}], @@ -272,21 +277,22 @@ def add_resummation_and_np_variations(args, writer): ) -def add_pdf_variations(args, writer, pdf_name, pdf_scale: float | None = None): +def add_pdf_variations(args, writer, logger, pdf_name, scale_pdf: float | None = None): pdf_var_key = _pdfvars_generator_name(args.predGenerator) keys_to_load = [pdf_var_key] pdfInfo = theory_utils.pdf_info_map("Zmumu_2016PostVFP", args.pdfs[0]) pdfName = pdfInfo["name"] - if pdf_scale is not None: - scale = pdf_scale + if scale_pdf is not None: + scale = scale_pdf else: scale = pdfInfo.get("inflation_factor_alphaS", 1) scale = pdfInfo.get("scale", 1) * scale + logger.debug(f"Using scale {scale}.") pdf_var_key_ext = None - if pdfName == "pdfHERAPDF20": + if pdfName == "pdfHERAPDF20" and not args.noHERAPDF20EXT: pdf_var_key_ext = pdf_var_key.replace("HERAPDF20", "HERAPDF20EXT") keys_to_load.append(pdf_var_key_ext) @@ -359,7 +365,7 @@ def add_ew_isr_variation(args, writer): "Z", f"{ew_isr_name}_den", ) - writer.add_scale_systematic( + writer.add_shape_systematic( [corrh_num, corrh_den], f"{ew_isr_name}_Corr", PROCESS_NAME, @@ -371,11 +377,33 @@ def add_ew_isr_variation(args, writer): ) +def add_mb_fo_variations(args, writer): + mb_fo_name = "MiNNLO_Zbb" + numh = theory_corrections.load_corr_hist( + f"{common.data_dir}/TheoryCorrections/{mb_fo_name}_CorrZ.pkl.lz4", + "Z", + f"{mb_fo_name}_hist", + ) + denh = theory_corrections.load_corr_hist( + f"{common.data_dir}/TheoryCorrections/{mb_fo_name}_CorrZ.pkl.lz4", + "Z", + f"minnlo_ref_hist", + ) + writer.add_shape_systematic( + [numh[{"vars": "mb_up"}], denh[{"vars": "mb_up"}]], + "mb_fo", + PROCESS_NAME, + SIGMAUL_CHANNEL, + mirror=True, + groups=["bcQuarkMass", "theory"], + ) + + def output_name(args): name = args.outname name += f"_{args.predGenerator}" name += f"_{'_'.join(args.nois)}" - if args.postfix: + if args.postfix and len(args.postfix) > 0: name += f"_{args.postfix}" return name @@ -452,11 +480,16 @@ def make_parser(): help="Probability density for systematic variations.", ) parser.add_argument( - "--pdfScale", + "--scalePdf", type=float, default=None, help="Manually set the scale factor for PDF variations (overrides any scale specified in the theory corrections metadata).", ) + parser.add_argument( + "--noHERAPDF20EXT", + action="store_true", + help="Exclude the HERAPDF20EXT variations (only applicable if using a HERAPDF20-based PDF). Useful for comparing to simultaneous PDF and alphaS fit, where this parametrization isn't available.", + ) return parser @@ -498,11 +531,14 @@ def main(): logger.info("Adding direct-theory sigmaUL systematics from %s", args.predGenerator) add_resummation_and_np_variations(args, writer) - logger.info("Adding b/c quark-mass variations") - add_bc_quark_mass_variations(writer) + logger.info("Adding PDF b/c quark-mass variations") + add_pdf_bc_quark_mass_variations(args, writer, logger, pdf_name) + + logger.info("Adding mb FO variations") + add_mb_fo_variations(args, writer) logger.info("Adding PDF variations") - add_pdf_variations(args, writer, pdf_name, args.pdfScale) + add_pdf_variations(args, writer, logger, pdf_name, args.scalePdf) logger.info("Adding EW ISR variation") add_ew_isr_variation(args, writer) diff --git a/wremnants/postprocessing/theory_fit_writer.py b/wremnants/postprocessing/theory_fit_writer.py index fb32781dc..cd3924d03 100644 --- a/wremnants/postprocessing/theory_fit_writer.py +++ b/wremnants/postprocessing/theory_fit_writer.py @@ -106,7 +106,7 @@ def add_systematic( super().add_systematic(h, name, process, channel, **kwargs) - def add_scale_systematic( + def add_shape_systematic( self, h, name, @@ -114,7 +114,7 @@ def add_scale_systematic( channel, rebin_pt=True, rebin_y=True, - normalize=True, + normalize=False, apply_postOp=True, format=True, **kwargs, From 51da3c4eed8b7402490fdc04cba3ec2e66947e44 Mon Sep 17 00:00:00 2001 From: Luca Lavezzo Date: Mon, 13 Apr 2026 22:34:00 -0400 Subject: [PATCH 3/4] bump wremnants-data --- wremnants-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wremnants-data b/wremnants-data index 9d970c372..aa0e1369a 160000 --- a/wremnants-data +++ b/wremnants-data @@ -1 +1 @@ -Subproject commit 9d970c372abd8b1787af0a4567aa8d0cda805cbf +Subproject commit aa0e1369a4e1ac89850549f88b87b543bfec3ae1 From 0d30683bf1ccde31c48df75f94092c2e1e96fa7e Mon Sep 17 00:00:00 2001 From: Luca Lavezzo Date: Tue, 14 Apr 2026 16:09:56 -0400 Subject: [PATCH 4/4] david's comments --- scripts/corrections/make_theory_corr_ew.py | 4 +- scripts/rabbit/feedRabbitSigmaUL.py | 459 ++---------------- wremnants/postprocessing/theory_fit_writer.py | 376 ++++++++++++++ .../postprocessing/theory_variation_labels.py | 5 - 4 files changed, 424 insertions(+), 420 deletions(-) diff --git a/scripts/corrections/make_theory_corr_ew.py b/scripts/corrections/make_theory_corr_ew.py index ad8093f91..8fcad372f 100644 --- a/scripts/corrections/make_theory_corr_ew.py +++ b/scripts/corrections/make_theory_corr_ew.py @@ -46,7 +46,7 @@ help="axes to project to", ) parser.add_argument( - "--proc", + "--postfix", required=True, type=str, help="String to append to output file name to distinguish different corrections, e.g. 'horace' or 'winhac'", @@ -253,7 +253,7 @@ def make_correction(h1, h2, name): outname = name.replace("-", "") if args.postfix: outname += f"_{args.postfix}" - outfile = f"{args.outpath}/{outname}Corr{args.proc}.pkl.lz4" + outfile = f"{args.outpath}/{outname}Corr{args.postfix}.pkl.lz4" if "Zmumu" in corr_dict: output_tools.write_lz4_pkl_output( diff --git a/scripts/rabbit/feedRabbitSigmaUL.py b/scripts/rabbit/feedRabbitSigmaUL.py index b4e4063f5..629ca9963 100644 --- a/scripts/rabbit/feedRabbitSigmaUL.py +++ b/scripts/rabbit/feedRabbitSigmaUL.py @@ -12,36 +12,10 @@ import os -import rabbit.io_tools from wremnants.postprocessing.theory_fit_writer import SigmaULTheoryFitWriter -from wremnants.postprocessing.theory_variation_labels import ( - BC_QUARK_MASS_VARIATIONS, - LATTICE_CORRELATED_NP_UNCERTAINTIES, - LATTICE_GAMMA_NP_UNCERTAINTIES, - STANDARD_CORRELATED_NP_UNCERTAINTIES, - STANDARD_GAMMA_NP_UNCERTAINTIES, - TNP_UNCERTAINTIES, - TRANSITION_FO_UNCERTAINTIES, -) -from wremnants.production import theory_corrections -from wremnants.utilities import common, parsing, theory_utils +from wremnants.utilities import common, parsing from wums import logging, output_tools -SIGMAUL_CHANNEL = "chSigmaUL" -PROCESS_NAME = "Zmumu" - - -def _pdfas_generator_name(pred_generator): - if pred_generator.endswith("_pdfas"): - return pred_generator - return f"{pred_generator}_pdfas" - - -def _pdfvars_generator_name(pred_generator): - if pred_generator.endswith("_pdfvars"): - return pred_generator - return f"{pred_generator}_pdfvars" - def _join_cli_tokens(value): if isinstance(value, (list, tuple)): @@ -49,362 +23,12 @@ def _join_cli_tokens(value): return value -def _select_baseline_variation(h, axis_name="vars", nominal_entry="pdf0"): - if axis_name not in h.axes.name: - raise KeyError( - f"Expected axis '{axis_name}' in theory histogram, found axes {h.axes.name}" - ) - - try: - return h[{axis_name: nominal_entry}] - except Exception as exc: - available_entries = list(h.axes[axis_name]) - raise KeyError( - f"Expected nominal entry '{nominal_entry}' in axis '{axis_name}', " - f"found entries {available_entries}" - ) from exc - - -def _resolve_sigmaul_channel(fitresult, mapping_name, requested_channel): - channels = fitresult["mappings"][mapping_name]["channels"] - if requested_channel in channels: - return requested_channel - - if " " in requested_channel: - fallback = requested_channel.rsplit(" ", 1)[-1] - if fallback in channels: - return fallback - - matches = [name for name in channels if name.endswith(requested_channel)] - if len(matches) == 1: - return matches[0] - - raise KeyError( - f"Unable to resolve sigmaUL channel '{requested_channel}' in mapping '{mapping_name}'. " - f"Available channels: {list(channels.keys())}" - ) - - -def load_sigmaul_data(args, writer, logger): - if args.pseudodataGenerator: - infile = f"{common.data_dir}/TheoryCorrections/{args.pseudodataGenerator}_CorrZ.pkl.lz4" - logger.info("Loading sigmaUL pseudodata from %s", infile) - h_data = theory_corrections.load_corr_hist( - infile, - "Z", - f"{args.pseudodataGenerator}_hist", - ) - h_data = _select_baseline_variation(h_data) - h_data = h_data.project("qT", "absY") - writer.add_channel(h_data.axes, SIGMAUL_CHANNEL) - writer.add_data(h_data, SIGMAUL_CHANNEL) - writer.set_reference(SIGMAUL_CHANNEL, h_data) - return None - - fitresult, meta = rabbit.io_tools.get_fitresult( - args.infile, result="asimov", meta=True - ) - logger.debug( - "Available models in fit result: %s", list(fitresult["mappings"].keys()) - ) - - h_data_cov = fitresult["mappings"][args.fitresultMapping][ - "hist_postfit_inclusive_cov" - ].get() - writer.add_data_covariance(h_data_cov) - - channel_name = _resolve_sigmaul_channel( - fitresult, args.fitresultMapping, args.channelSigmaUL - ) - logger.debug( - "Using sigmaUL channel '%s' in mapping '%s'", - channel_name, - args.fitresultMapping, - ) - h_data = fitresult["mappings"][args.fitresultMapping]["channels"][channel_name][ - "hist_postfit_inclusive" - ].get() - writer.add_channel(h_data.axes, SIGMAUL_CHANNEL) - writer.add_data(h_data, SIGMAUL_CHANNEL) - writer.set_reference(SIGMAUL_CHANNEL, h_data) - return meta - - -def build_output_metadata(args, input_meta): - meta = { - "meta_info": output_tools.make_meta_info_dict( - args=args, - wd=common.base_dir, - ), - } - if input_meta is not None: - meta["meta_info_input"] = input_meta - return meta - - -def add_sigmaul_process(args, writer): - h_sig_sigmaul = theory_corrections.load_corr_hist( - f"{common.data_dir}/TheoryCorrections/{args.predGenerator}_CorrZ.pkl.lz4", - "Z", - f"{args.predGenerator}_hist", - ) - h_sig_sigmaul = _select_baseline_variation(h_sig_sigmaul) - writer.add_process(h_sig_sigmaul, PROCESS_NAME, SIGMAUL_CHANNEL, signal=False) - - -def add_alphas_variation(args, writer, pdf_name): - symmetrize = "average" if "alphaS" in args.nois else "quadratic" - alphas_var_name = _pdfas_generator_name(args.predGenerator) - alphas_vars = theory_corrections.load_corr_helpers( - ["Z"], - [alphas_var_name], - make_tensor=False, - minnlo_ratio=False, - ) - writer.add_systematic( - [ - alphas_vars["Z"][alphas_var_name][{"vars": 2}], - alphas_vars["Z"][alphas_var_name][{"vars": 1}], - ], - "pdfAlphaS", - PROCESS_NAME, - SIGMAUL_CHANNEL, - noi=("alphaS" in args.nois), - constrained=not ("alphaS" in args.nois), - symmetrize=symmetrize, - kfactor=1.0, - groups=( - [pdf_name, f"{pdf_name}AlphaS", "theory", "theory_qcd"] - if "alphaS" not in args.nois - else [pdf_name] - ), - ) - - -def add_pdf_bc_quark_mass_variations(args, writer, logger, pdf_name): - - if pdf_name == theory_utils.pdfMap["herapdf20"]["name"] and not args.noHERAPDF20EXT: - logger.info( - "Skipping PDF b/c quark-mass variations since using HERAPDF20EXT already includes them." - ) - return - - corr_helpers = theory_corrections.load_corr_helpers( - ["Z"], - [helper_name for helper_name, *_ in BC_QUARK_MASS_VARIATIONS], - make_tensor=False, - minnlo_ratio=False, - ) - - for helper_name, nuisance_name, down_var, up_var in BC_QUARK_MASS_VARIATIONS: - h = corr_helpers["Z"][helper_name] - writer.add_shape_systematic( - [ - h[{"vars": up_var}], - h[{"vars": down_var}], - _select_baseline_variation(h), - ], - nuisance_name, - PROCESS_NAME, - SIGMAUL_CHANNEL, - symmetrize="quadratic", - groups=["bcQuarkMass", "pTModeling", "theory", "theory_qcd"], - ) - - -def add_resummation_and_np_variations(args, writer): - generator_vars = theory_corrections.load_corr_helpers( - ["Z"], - [ - args.predGenerator, - ], - make_tensor=False, - minnlo_ratio=False, - ) - nominal = generator_vars["Z"][args.predGenerator] - - if "lattice" in args.predGenerator.lower(): - corr_np_uncs = LATTICE_CORRELATED_NP_UNCERTAINTIES - gamma_np_uncs = LATTICE_GAMMA_NP_UNCERTAINTIES - else: - corr_np_uncs = STANDARD_CORRELATED_NP_UNCERTAINTIES - gamma_np_uncs = STANDARD_GAMMA_NP_UNCERTAINTIES - - for up_var, down_var, nuisance_name in corr_np_uncs: - writer.add_systematic( - [nominal[{"vars": up_var}], nominal[{"vars": down_var}]], - nuisance_name, - PROCESS_NAME, - SIGMAUL_CHANNEL, - symmetrize="average", - groups=["resumNonpert", "resum", "pTModeling", "theory", "theory_qcd"], - ) - - for up_var, down_var, nuisance_name in gamma_np_uncs: - writer.add_systematic( - [nominal[{"vars": up_var}], nominal[{"vars": down_var}]], - nuisance_name, - PROCESS_NAME, - SIGMAUL_CHANNEL, - symmetrize="average", - groups=["resumTNP", "resum", "pTModeling", "theory", "theory_qcd"], - ) - - for up_var, down_var in TNP_UNCERTAINTIES: - writer.add_systematic( - [nominal[{"vars": up_var}], nominal[{"vars": down_var}]], - f"resumTNP_{down_var.split('-')[0]}", - PROCESS_NAME, - SIGMAUL_CHANNEL, - symmetrize="average", - groups=["resumTNP", "resum", "pTModeling", "theory", "theory_qcd"], - ) - - for up_var, down_var, nuisance_name in TRANSITION_FO_UNCERTAINTIES: - writer.add_systematic( - [nominal[{"vars": up_var}], nominal[{"vars": down_var}]], - nuisance_name, - PROCESS_NAME, - SIGMAUL_CHANNEL, - symmetrize="quadratic", - groups=[ - "resumTransitionFOScale", - "resum", - "pTModeling", - "theory", - "theory_qcd", - ], - ) - - -def add_pdf_variations(args, writer, logger, pdf_name, scale_pdf: float | None = None): - pdf_var_key = _pdfvars_generator_name(args.predGenerator) - keys_to_load = [pdf_var_key] - - pdfInfo = theory_utils.pdf_info_map("Zmumu_2016PostVFP", args.pdfs[0]) - pdfName = pdfInfo["name"] - - if scale_pdf is not None: - scale = scale_pdf - else: - scale = pdfInfo.get("inflation_factor_alphaS", 1) - scale = pdfInfo.get("scale", 1) * scale - logger.debug(f"Using scale {scale}.") - - pdf_var_key_ext = None - if pdfName == "pdfHERAPDF20" and not args.noHERAPDF20EXT: - pdf_var_key_ext = pdf_var_key.replace("HERAPDF20", "HERAPDF20EXT") - keys_to_load.append(pdf_var_key_ext) - - corr_helpers = theory_corrections.load_corr_helpers( - ["Z"], - keys_to_load, - make_tensor=False, - minnlo_ratio=False, - ) - - pdf_groups = [pdf_name, f"{pdf_name}NoAlphaS", "theory", "theory_qcd"] - - h = corr_helpers["Z"][pdf_var_key] - for ivar in range(1, len(h.axes[-1]), 2): - writer.add_systematic( - [h[{"vars": ivar + 1}], h[{"vars": ivar}]], - f"pdf{int((ivar + 1) / 2)}{args.pdfs[0].upper()}", - PROCESS_NAME, - SIGMAUL_CHANNEL, - symmetrize="quadratic", - kfactor=scale, - groups=pdf_groups, - ) - - if pdf_var_key_ext is not None: - h_ext = corr_helpers["Z"][pdf_var_key_ext] - extInfo = theory_utils.pdfMap["herapdf20ext"] - n_entries = extInfo["entries"] - n_sym = 3 - n_asym_entries = n_entries - n_sym - - ext_suffix = "HERAPDF20EXT" - - # Asymmetric hessian variations - for ivar in range(1, n_asym_entries, 2): - writer.add_systematic( - [h_ext[{"vars": ivar + 1}], h_ext[{"vars": ivar}]], - f"pdf{int((ivar + 1) / 2)}{ext_suffix}", - PROCESS_NAME, - SIGMAUL_CHANNEL, - symmetrize="quadratic", - kfactor=scale, - groups=pdf_groups, - ) - - # Symmetric hessian variations (mirrored) - n_asym_pairs = (n_asym_entries - 1) // 2 - for j, ivar in enumerate(range(n_asym_entries, n_entries)): - writer.add_systematic( - h_ext[{"vars": ivar}], - f"pdf{n_asym_pairs + j + 1}{ext_suffix}", - PROCESS_NAME, - SIGMAUL_CHANNEL, - symmetrize="quadratic", - kfactor=scale, - mirror=True, - groups=pdf_groups, - ) - - -def add_ew_isr_variation(args, writer): - ew_isr_name = "pythiaew_ISR" - corrh_num = theory_corrections.load_corr_hist( - f"{common.data_dir}/TheoryCorrections/{ew_isr_name}_CorrZ.pkl.lz4", - "Z", - f"{ew_isr_name}_num", - ) - corrh_den = theory_corrections.load_corr_hist( - f"{common.data_dir}/TheoryCorrections/{ew_isr_name}_CorrZ.pkl.lz4", - "Z", - f"{ew_isr_name}_den", - ) - writer.add_shape_systematic( - [corrh_num, corrh_den], - f"{ew_isr_name}_Corr", - PROCESS_NAME, - SIGMAUL_CHANNEL, - kfactor=2, - mirror=True, - symmetrize="average", - groups=["theory_ew", "theory"], - ) - - -def add_mb_fo_variations(args, writer): - mb_fo_name = "MiNNLO_Zbb" - numh = theory_corrections.load_corr_hist( - f"{common.data_dir}/TheoryCorrections/{mb_fo_name}_CorrZ.pkl.lz4", - "Z", - f"{mb_fo_name}_hist", - ) - denh = theory_corrections.load_corr_hist( - f"{common.data_dir}/TheoryCorrections/{mb_fo_name}_CorrZ.pkl.lz4", - "Z", - f"minnlo_ref_hist", - ) - writer.add_shape_systematic( - [numh[{"vars": "mb_up"}], denh[{"vars": "mb_up"}]], - "mb_fo", - PROCESS_NAME, - SIGMAUL_CHANNEL, - mirror=True, - groups=["bcQuarkMass", "theory"], - ) - - -def output_name(args): - name = args.outname - name += f"_{args.predGenerator}" - name += f"_{'_'.join(args.nois)}" - if args.postfix and len(args.postfix) > 0: - name += f"_{args.postfix}" +def output_name(outname, predGenerator, nois, postfix): + name = outname + name += f"_{predGenerator}" + name += f"_{'_'.join(nois)}" + if postfix and len(postfix) > 0: + name += f"_{postfix}" return name @@ -493,12 +117,12 @@ def make_parser(): return parser -def _validate_args(args): +def _validate_args(pdf, predGenerator): """ - Make sure the the first PDF (the only one used) matches the args.predGenerator. + Make sure the the first PDF (the only one used) matches the predGenerator. TODO: at some point, we should have a dataclass for each theory correction that specifies which PDF it belongs to, so we don't have to rely on string parsing of the generator name. """ - if args.pdfs[0].lower() not in args.predGenerator.lower(): + if pdf.lower() not in predGenerator.lower(): raise ValueError( f"Make sure the that the PDF you pass (--pdfs) matches the --predGenerator name." ) @@ -509,48 +133,57 @@ def main(): args = parser.parse_args() args.fitresultMapping = _join_cli_tokens(args.fitresultMapping) args.channelSigmaUL = _join_cli_tokens(args.channelSigmaUL) - logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) + logging.setup_logger(__file__, args.verbose, args.noColorLogger) + + _validate_args(args.pdfs[0], args.predGenerator) - _validate_args(args) + exclude_nuisances = args.excludeNuisances + if args.noHERAPDF20EXT: + exclude_nuisances = ( + f"{exclude_nuisances}|HERAPDF20EXT" if exclude_nuisances else "HERAPDF20EXT" + ) writer = SigmaULTheoryFitWriter( + predGenerator=args.predGenerator, + nois=args.nois, + pdf=args.pdfs[0], sparse=args.sparse, systematic_type=args.systematicType, allow_negative_expectation=False, - exclude_nuisances=args.excludeNuisances, + exclude_nuisances=exclude_nuisances, keep_nuisances=args.keepNuisances, ) - input_meta = load_sigmaul_data(args, writer, logger) - add_sigmaul_process(args, writer) - - pdf_name = theory_utils.pdfMap[args.pdfs[0]]["name"] - logger.info("Adding alphaS variation") - add_alphas_variation(args, writer, pdf_name) - - logger.info("Adding direct-theory sigmaUL systematics from %s", args.predGenerator) - add_resummation_and_np_variations(args, writer) - - logger.info("Adding PDF b/c quark-mass variations") - add_pdf_bc_quark_mass_variations(args, writer, logger, pdf_name) - - logger.info("Adding mb FO variations") - add_mb_fo_variations(args, writer) - - logger.info("Adding PDF variations") - add_pdf_variations(args, writer, logger, pdf_name, args.scalePdf) - - logger.info("Adding EW ISR variation") - add_ew_isr_variation(args, writer) + input_meta = writer.load_sigmaul_data( + args.pseudodataGenerator, + args.infile, + args.fitresultMapping, + args.channelSigmaUL, + ) + writer.add_sigmaul_process() + writer.add_alphas_variation() + writer.add_resummation_and_np_variations() + writer.add_pdf_bc_quark_mass_variations() + writer.add_mb_fo_variations() + writer.add_pdf_variations(args.scalePdf) + writer.add_ew_isr_variation() outfolder = args.outfolder or "./" - meta = build_output_metadata(args, input_meta) + meta = { + "meta_info": output_tools.make_meta_info_dict( + args=args, + wd=common.base_dir, + ), + } + if input_meta is not None: + meta["meta_info_input"] = input_meta + outname = output_name(args.outname, args.predGenerator, args.nois, args.postfix) writer.write( outfolder=outfolder, - outfilename=output_name(args), + outfilename=outname, meta_data_dict=meta, ) - logger.info("Written to %s.hdf5", os.path.join(outfolder, output_name(args))) + writer.logger.info("Written to %s.hdf5", os.path.join(outfolder, outname)) if __name__ == "__main__": diff --git a/wremnants/postprocessing/theory_fit_writer.py b/wremnants/postprocessing/theory_fit_writer.py index cd3924d03..5fc352520 100644 --- a/wremnants/postprocessing/theory_fit_writer.py +++ b/wremnants/postprocessing/theory_fit_writer.py @@ -3,15 +3,57 @@ import re from rabbit.tensorwriter import TensorWriter +from wremnants.postprocessing.theory_variation_labels import ( + BC_QUARK_MASS_VARIATIONS, + LATTICE_CORRELATED_NP_UNCERTAINTIES, + LATTICE_GAMMA_NP_UNCERTAINTIES, + STANDARD_CORRELATED_NP_UNCERTAINTIES, + STANDARD_GAMMA_NP_UNCERTAINTIES, + TNP_UNCERTAINTIES, + TRANSITION_FO_UNCERTAINTIES, +) +from wremnants.production import theory_corrections +from wremnants.utilities import common, theory_utils from wums import boostHistHelpers as hh from wums import logging +def _pdfas_generator_name(pred_generator): + if pred_generator.endswith("_pdfas"): + return pred_generator + return f"{pred_generator}_pdfas" + + +def _pdfvars_generator_name(pred_generator): + if pred_generator.endswith("_pdfvars"): + return pred_generator + return f"{pred_generator}_pdfvars" + + +def _select_baseline_variation(h, axis_name="vars", nominal_entry="pdf0"): + if axis_name not in h.axes.name: + raise KeyError( + f"Expected axis '{axis_name}' in theory histogram, found axes {h.axes.name}" + ) + + try: + return h[{axis_name: nominal_entry}] + except Exception as exc: + available_entries = list(h.axes[axis_name]) + raise KeyError( + f"Expected nominal entry '{nominal_entry}' in axis '{axis_name}', " + f"found entries {available_entries}" + ) from exc + + class SigmaULTheoryFitWriter(TensorWriter): """Tensor writer for the direct-theory sigmaUL fit.""" def __init__( self, + predGenerator, + nois, + pdf, exclude_nuisances="", keep_nuisances="", process_name="Zmumu", @@ -24,6 +66,10 @@ def __init__( self.ref = {} self.process_name = process_name self.sigmaul_channel = sigmaul_channel + self.predGenerator = predGenerator + self.nois = nois + self.pdf = pdf + self.pdf_name = theory_utils.pdfMap[pdf]["name"] self._exclude_nuisances = ( re.compile(exclude_nuisances) if exclude_nuisances else None ) @@ -302,3 +348,333 @@ def apply_selections(self, h, process, channel): h = h[{"helicity": -1.0j}] return h + + def load_sigmaul_data( + self, pseudodataGenerator, infile, fitresultMapping, channelSigmaUL + ): + if pseudodataGenerator: + corrfile = f"{common.data_dir}/TheoryCorrections/{pseudodataGenerator}_CorrZ.pkl.lz4" + self.logger.info("Loading sigmaUL pseudodata from %s", corrfile) + h_data = theory_corrections.load_corr_hist( + corrfile, + "Z", + f"{pseudodataGenerator}_hist", + ) + h_data = _select_baseline_variation(h_data) + h_data = h_data.project("qT", "absY") + self.add_channel(h_data.axes, self.sigmaul_channel) + self.add_data(h_data, self.sigmaul_channel) + self.set_reference(self.sigmaul_channel, h_data) + return None + + import rabbit.io_tools + + fitresult, meta = rabbit.io_tools.get_fitresult( + infile, result="asimov", meta=True + ) + self.logger.debug( + "Available models in fit result: %s", list(fitresult["mappings"].keys()) + ) + + h_data_cov = fitresult["mappings"][fitresultMapping][ + "hist_postfit_inclusive_cov" + ].get() + self.add_data_covariance(h_data_cov) + + channel_name = _resolve_sigmaul_channel( + fitresult, fitresultMapping, channelSigmaUL + ) + self.logger.debug( + "Using sigmaUL channel '%s' in mapping '%s'", + channel_name, + fitresultMapping, + ) + h_data = fitresult["mappings"][fitresultMapping]["channels"][channel_name][ + "hist_postfit_inclusive" + ].get() + self.add_channel(h_data.axes, self.sigmaul_channel) + self.add_data(h_data, self.sigmaul_channel) + self.set_reference(self.sigmaul_channel, h_data) + return meta + + def add_sigmaul_process(self): + h_sig_sigmaul = theory_corrections.load_corr_hist( + f"{common.data_dir}/TheoryCorrections/{self.predGenerator}_CorrZ.pkl.lz4", + "Z", + f"{self.predGenerator}_hist", + ) + h_sig_sigmaul = _select_baseline_variation(h_sig_sigmaul) + self.add_process( + h_sig_sigmaul, self.process_name, self.sigmaul_channel, signal=False + ) + + def add_alphas_variation(self): + self.logger.info("Adding alphaS variation") + symmetrize = "average" if "alphaS" in self.nois else "quadratic" + alphas_var_name = _pdfas_generator_name(self.predGenerator) + alphas_vars = theory_corrections.load_corr_helpers( + ["Z"], + [alphas_var_name], + make_tensor=False, + minnlo_ratio=False, + ) + self.add_systematic( + [ + alphas_vars["Z"][alphas_var_name][{"vars": 2}], + alphas_vars["Z"][alphas_var_name][{"vars": 1}], + ], + "pdfAlphaS", + self.process_name, + self.sigmaul_channel, + noi=("alphaS" in self.nois), + constrained=not ("alphaS" in self.nois), + symmetrize=symmetrize, + kfactor=1.0, + groups=( + [self.pdf_name, f"{self.pdf_name}AlphaS", "theory", "theory_qcd"] + if "alphaS" not in self.nois + else [self.pdf_name] + ), + ) + + def add_pdf_bc_quark_mass_variations(self): + self.logger.info("Adding PDF b/c quark-mass variations") + + if self.pdf_name == theory_utils.pdfMap["herapdf20"][ + "name" + ] and self._keep_systematic("HERAPDF20EXT"): + self.logger.info( + "Skipping PDF b/c quark-mass variations since using HERAPDF20EXT already includes them." + ) + return + + corr_helpers = theory_corrections.load_corr_helpers( + ["Z"], + [helper_name for helper_name, *_ in BC_QUARK_MASS_VARIATIONS], + make_tensor=False, + minnlo_ratio=False, + ) + + for helper_name, nuisance_name, down_var, up_var in BC_QUARK_MASS_VARIATIONS: + h = corr_helpers["Z"][helper_name] + self.add_shape_systematic( + [ + h[{"vars": up_var}], + h[{"vars": down_var}], + _select_baseline_variation(h), + ], + nuisance_name, + self.process_name, + self.sigmaul_channel, + symmetrize="quadratic", + groups=["bcQuarkMass", "pTModeling", "theory", "theory_qcd"], + ) + + def add_resummation_and_np_variations(self): + self.logger.info( + "Adding direct-theory sigmaUL systematics from %s", self.predGenerator + ) + generator_vars = theory_corrections.load_corr_helpers( + ["Z"], + [ + self.predGenerator, + ], + make_tensor=False, + minnlo_ratio=False, + ) + nominal = generator_vars["Z"][self.predGenerator] + + if "lattice" in self.predGenerator.lower(): + corr_np_uncs = LATTICE_CORRELATED_NP_UNCERTAINTIES + gamma_np_uncs = LATTICE_GAMMA_NP_UNCERTAINTIES + else: + corr_np_uncs = STANDARD_CORRELATED_NP_UNCERTAINTIES + gamma_np_uncs = STANDARD_GAMMA_NP_UNCERTAINTIES + + for up_var, down_var, nuisance_name in corr_np_uncs: + self.add_systematic( + [nominal[{"vars": up_var}], nominal[{"vars": down_var}]], + nuisance_name, + self.process_name, + self.sigmaul_channel, + symmetrize="average", + groups=["resumNonpert", "resum", "pTModeling", "theory", "theory_qcd"], + ) + + for up_var, down_var, nuisance_name in gamma_np_uncs: + self.add_systematic( + [nominal[{"vars": up_var}], nominal[{"vars": down_var}]], + nuisance_name, + self.process_name, + self.sigmaul_channel, + symmetrize="average", + groups=["resumTNP", "resum", "pTModeling", "theory", "theory_qcd"], + ) + + for up_var, down_var in TNP_UNCERTAINTIES: + self.add_systematic( + [nominal[{"vars": up_var}], nominal[{"vars": down_var}]], + f"resumTNP_{down_var.split('-')[0]}", + self.process_name, + self.sigmaul_channel, + symmetrize="average", + groups=["resumTNP", "resum", "pTModeling", "theory", "theory_qcd"], + ) + + for up_var, down_var, nuisance_name in TRANSITION_FO_UNCERTAINTIES: + self.add_systematic( + [nominal[{"vars": up_var}], nominal[{"vars": down_var}]], + nuisance_name, + self.process_name, + self.sigmaul_channel, + symmetrize="quadratic", + groups=[ + "resumTransitionFOScale", + "resum", + "pTModeling", + "theory", + "theory_qcd", + ], + ) + + def add_pdf_variations(self, scale_pdf: float | None = None): + self.logger.info("Adding PDF variations") + pdf_var_key = _pdfvars_generator_name(self.predGenerator) + keys_to_load = [pdf_var_key] + + pdfInfo = theory_utils.pdf_info_map("Zmumu_2016PostVFP", self.pdf) + pdfName = pdfInfo["name"] + + if scale_pdf is not None: + scale = scale_pdf + else: + scale = pdfInfo.get("inflation_factor_alphaS", 1) + scale = pdfInfo.get("scale", 1) * scale + self.logger.debug(f"Using scale {scale}.") + + pdf_var_key_ext = None + if pdfName == "pdfHERAPDF20" and self._keep_systematic("HERAPDF20EXT"): + pdf_var_key_ext = pdf_var_key.replace("HERAPDF20", "HERAPDF20EXT") + keys_to_load.append(pdf_var_key_ext) + + corr_helpers = theory_corrections.load_corr_helpers( + ["Z"], + keys_to_load, + make_tensor=False, + minnlo_ratio=False, + ) + + pdf_groups = [self.pdf_name, f"{self.pdf_name}NoAlphaS", "theory", "theory_qcd"] + + h = corr_helpers["Z"][pdf_var_key] + for ivar in range(1, len(h.axes[-1]), 2): + self.add_systematic( + [h[{"vars": ivar + 1}], h[{"vars": ivar}]], + f"pdf{int((ivar + 1) / 2)}{self.pdf.upper()}", + self.process_name, + self.sigmaul_channel, + symmetrize="quadratic", + kfactor=scale, + groups=pdf_groups, + ) + + if pdf_var_key_ext is not None: + h_ext = corr_helpers["Z"][pdf_var_key_ext] + extInfo = theory_utils.pdfMap["herapdf20ext"] + n_entries = extInfo["entries"] + n_sym = 3 + n_asym_entries = n_entries - n_sym + + ext_suffix = "HERAPDF20EXT" + + # Asymmetric hessian variations + for ivar in range(1, n_asym_entries, 2): + self.add_systematic( + [h_ext[{"vars": ivar + 1}], h_ext[{"vars": ivar}]], + f"pdf{int((ivar + 1) / 2)}{ext_suffix}", + self.process_name, + self.sigmaul_channel, + symmetrize="quadratic", + kfactor=scale, + groups=pdf_groups, + ) + + # Symmetric hessian variations (mirrored) + n_asym_pairs = (n_asym_entries - 1) // 2 + for j, ivar in enumerate(range(n_asym_entries, n_entries)): + self.add_systematic( + h_ext[{"vars": ivar}], + f"pdf{n_asym_pairs + j + 1}{ext_suffix}", + self.process_name, + self.sigmaul_channel, + symmetrize="quadratic", + kfactor=scale, + mirror=True, + groups=pdf_groups, + ) + + def add_ew_isr_variation(self): + self.logger.info("Adding EW ISR variation") + ew_isr_name = "pythiaew_ISR" + corrh_num = theory_corrections.load_corr_hist( + f"{common.data_dir}/TheoryCorrections/{ew_isr_name}_CorrZ.pkl.lz4", + "Z", + f"{ew_isr_name}_num", + ) + corrh_den = theory_corrections.load_corr_hist( + f"{common.data_dir}/TheoryCorrections/{ew_isr_name}_CorrZ.pkl.lz4", + "Z", + f"{ew_isr_name}_den", + ) + self.add_shape_systematic( + [corrh_num, corrh_den], + f"{ew_isr_name}_Corr", + self.process_name, + self.sigmaul_channel, + kfactor=2, + mirror=True, + symmetrize="average", + groups=["theory_ew", "theory"], + ) + + def add_mb_fo_variations(self): + self.logger.info("Adding mb FO variations") + mb_fo_name = "MiNNLO_Zbb" + numh = theory_corrections.load_corr_hist( + f"{common.data_dir}/TheoryCorrections/{mb_fo_name}_CorrZ.pkl.lz4", + "Z", + f"{mb_fo_name}_hist", + ) + denh = theory_corrections.load_corr_hist( + f"{common.data_dir}/TheoryCorrections/{mb_fo_name}_CorrZ.pkl.lz4", + "Z", + f"minnlo_ref_hist", + ) + self.add_shape_systematic( + [numh[{"vars": "mb_up"}], denh[{"vars": "mb_up"}]], + "mb_fo", + self.process_name, + self.sigmaul_channel, + mirror=True, + groups=["bcQuarkMass", "theory"], + ) + + +def _resolve_sigmaul_channel(fitresult, mapping_name, requested_channel): + channels = fitresult["mappings"][mapping_name]["channels"] + if requested_channel in channels: + return requested_channel + + if " " in requested_channel: + fallback = requested_channel.rsplit(" ", 1)[-1] + if fallback in channels: + return fallback + + matches = [name for name in channels if name.endswith(requested_channel)] + if len(matches) == 1: + return matches[0] + + raise KeyError( + f"Unable to resolve sigmaUL channel '{requested_channel}' in mapping '{mapping_name}'. " + f"Available channels: {list(channels.keys())}" + ) diff --git a/wremnants/postprocessing/theory_variation_labels.py b/wremnants/postprocessing/theory_variation_labels.py index 63eed1c1e..33353753f 100644 --- a/wremnants/postprocessing/theory_variation_labels.py +++ b/wremnants/postprocessing/theory_variation_labels.py @@ -16,11 +16,6 @@ "delta_lambda2-0.02", "chargeVgenNP0scetlibNPDelta_Lambda2", ], - # [ - # "delta_lambda20.105", - # "delta_lambda20.145", - # "chargeVgenNP0scetlibNPDelta_Lambda2", - # ], ] STANDARD_GAMMA_NP_UNCERTAINTIES = [