From e522379d0a479e3fc17c4f475dc5a1c6505216f4 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 21 Nov 2025 11:47:10 -0800 Subject: [PATCH 01/32] plot p.e. spectrum and get DCR --- src/mintanalysis/ana/__init__.py | 6 + src/mintanalysis/ana/plot_pe_spectrum.py | 140 +++++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 src/mintanalysis/ana/__init__.py create mode 100644 src/mintanalysis/ana/plot_pe_spectrum.py diff --git a/src/mintanalysis/ana/__init__.py b/src/mintanalysis/ana/__init__.py new file mode 100644 index 0000000..7ee3e1c --- /dev/null +++ b/src/mintanalysis/ana/__init__.py @@ -0,0 +1,6 @@ +""" +Routines for the final analysis of PMT data +""" + + +__all__ = [] \ No newline at end of file diff --git a/src/mintanalysis/ana/plot_pe_spectrum.py b/src/mintanalysis/ana/plot_pe_spectrum.py new file mode 100644 index 0000000..2692038 --- /dev/null +++ b/src/mintanalysis/ana/plot_pe_spectrum.py @@ -0,0 +1,140 @@ +# Imports +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +import awkward as ak +from iminuit import cost, Minuit +from numba_stats import truncnorm, truncexpon, uniform + + +# LEGEND specific imports +from lgdo import lh5 +from dspeed import build_dsp + +from scipy.signal import find_peaks + +def valley_index_strict(y): + """ + Return the index of the valley between the first strict peak + and the next rise. Returns None if not found. + """ + n = len(y) + if n < 3: + return None + + # 1. Find first strict peak + peak = None + for i in range(1, n-1): + if y[i] > y[i-1] and y[i] > y[i+1]: + peak = i + break + if peak is None: + return None + + # 2. Walk downward and track the minimum + valley = peak + i = peak + 1 + + while i < n: + if y[i] > y[i-1]: # rising again → done + break + if y[i] < y[valley]: + valley = i + i += 1 + + if i == n: # never rose again + return None + + return peak, valley + +def gaussian(x, amp, mu, sigma): + return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + + + +if __name__ == "__main__": + + f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020_2025_10_22_23_43_47.lh5" + f_dsp = f_raw.replace("raw","dsp") + + bin_size=20 + bins = np.arange(-100,10000,bin_size) + lim =20 + A4_LANDSCAPE = (11.69, 8.27) + result_dic ={} + + with PdfPages("pe_spectra.pdf") as pdf: + for ch in lh5.ls(f_dsp): + result_dic[ch] = {} + fig, ax = plt.subplots() + fig.set_figwidth(A4_LANDSCAPE[0]) + fig.set_figheight(A4_LANDSCAPE[1]) + + + d = lh5.read_as(f"{ch}/dsp",f_dsp,"ak") + vals =ak.to_numpy(d.nnls_solution.values,allow_missing=False) + pe_vals = np.nansum(np.where(vals>lim,vals,np.nan),axis=1) + + n, bins, patches = ax.hist(pe_vals,bins=bins,histtype="step",label=f"channel {ch} ON ($V_N$)") + + noise_peak, valley_idx = valley_index_strict(n) + pe_peak, _ = valley_index_strict(n[noise_peak:]) + ax.axvline(bins[valley_idx],color='red',ls='--') + ax.axvline(bins[noise_peak:][pe_peak],color='green',ls='--') + + bin_centers = 0.5*(bins[1:]+ bins[:-1]) + # Restrict range + x_min, x_max = bins[valley_idx], 2*bins[noise_peak:][pe_peak] #- bins[valley_idx] + mask = (bin_centers >= x_min) & (bin_centers <= x_max) + + bin_centers_fit = bin_centers[mask] + n_fit = n[mask] + + def nll(amp, mu, sigma): + """Poisson NLL for binned data""" + expected = gaussian(bin_centers_fit, amp, mu, sigma) + expected[expected <= 0] = 1e-10 # avoid log(0) + return np.sum(expected - n_fit * np.log(expected)) + + # Initial guesses + amp0 =n[noise_peak:][pe_peak] + mu0 = bins[noise_peak:][pe_peak] + sigma0 = 100.0 + + m = Minuit(nll, amp=amp0, mu=mu0, sigma=sigma0) + m.errordef = Minuit.LIKELIHOOD # important for Poisson NLL + m.migrad(iterate=10) # perform minimization + + fit_vals = m.values.to_dict() + fit_errs = m.errors.to_dict() + + result_dic[ch]["pe_peak"] = {"vals":fit_vals,"errs":fit_errs} + + + y_fit = gaussian(bin_centers, + amp=m.values["amp"], + mu=m.values["mu"], + sigma=m.values["sigma"]) + + ax.plot(bin_centers, y_fit, 'r-', label='NLL fit (Minuit)') + + dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:]) + + # timestamps in rawfile are currently broken + # ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np") + dt = 25.021 #from log file + + result_dic[ch]["dcr"] = {"dcr":{"dcr":dcts/dt,"counts":dcts,"runtime_s":dt}} + + ax.set_xlim(-10,2.5e3) + ax.set_ylim(0.5,None) + ax.set_yscale('log') + ax.set_ylabel(f"Counts/{bin_size} NNLS units") + ax.legend() + ax.set_xlabel("NNLS units") + ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)") + + pdf.savefig() + plt.close() + + print(result_dic) From 742f0dc24a36884d4af42c7c94ac83b218743361 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 22:53:00 +0000 Subject: [PATCH 02/32] style: pre-commit fixes --- src/mintanalysis/ana/__init__.py | 3 +- src/mintanalysis/ana/plot_pe_spectrum.py | 83 +++++++++++------------- 2 files changed, 40 insertions(+), 46 deletions(-) diff --git a/src/mintanalysis/ana/__init__.py b/src/mintanalysis/ana/__init__.py index 7ee3e1c..714298a 100644 --- a/src/mintanalysis/ana/__init__.py +++ b/src/mintanalysis/ana/__init__.py @@ -2,5 +2,4 @@ Routines for the final analysis of PMT data """ - -__all__ = [] \ No newline at end of file +__all__ = [] diff --git a/src/mintanalysis/ana/plot_pe_spectrum.py b/src/mintanalysis/ana/plot_pe_spectrum.py index 2692038..0a39d4a 100644 --- a/src/mintanalysis/ana/plot_pe_spectrum.py +++ b/src/mintanalysis/ana/plot_pe_spectrum.py @@ -1,17 +1,13 @@ # Imports -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.backends.backend_pdf import PdfPages import awkward as ak -from iminuit import cost, Minuit -from numba_stats import truncnorm, truncexpon, uniform - +import matplotlib.pyplot as plt +import numpy as np +from iminuit import Minuit # LEGEND specific imports from lgdo import lh5 -from dspeed import build_dsp +from matplotlib.backends.backend_pdf import PdfPages -from scipy.signal import find_peaks def valley_index_strict(y): """ @@ -24,8 +20,8 @@ def valley_index_strict(y): # 1. Find first strict peak peak = None - for i in range(1, n-1): - if y[i] > y[i-1] and y[i] > y[i+1]: + for i in range(1, n - 1): + if y[i] > y[i - 1] and y[i] > y[i + 1]: peak = i break if peak is None: @@ -36,32 +32,32 @@ def valley_index_strict(y): i = peak + 1 while i < n: - if y[i] > y[i-1]: # rising again → done + if y[i] > y[i - 1]: # rising again → done break if y[i] < y[valley]: valley = i i += 1 - if i == n: # never rose again + if i == n: # never rose again return None return peak, valley + def gaussian(x, amp, mu, sigma): return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2) - if __name__ == "__main__": f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020_2025_10_22_23_43_47.lh5" - f_dsp = f_raw.replace("raw","dsp") + f_dsp = f_raw.replace("raw", "dsp") - bin_size=20 - bins = np.arange(-100,10000,bin_size) - lim =20 + bin_size = 20 + bins = np.arange(-100, 10000, bin_size) + lim = 20 A4_LANDSCAPE = (11.69, 8.27) - result_dic ={} + result_dic = {} with PdfPages("pe_spectra.pdf") as pdf: for ch in lh5.ls(f_dsp): @@ -69,22 +65,23 @@ def gaussian(x, amp, mu, sigma): fig, ax = plt.subplots() fig.set_figwidth(A4_LANDSCAPE[0]) fig.set_figheight(A4_LANDSCAPE[1]) - - - d = lh5.read_as(f"{ch}/dsp",f_dsp,"ak") - vals =ak.to_numpy(d.nnls_solution.values,allow_missing=False) - pe_vals = np.nansum(np.where(vals>lim,vals,np.nan),axis=1) - - n, bins, patches = ax.hist(pe_vals,bins=bins,histtype="step",label=f"channel {ch} ON ($V_N$)") + + d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak") + vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False) + pe_vals = np.nansum(np.where(vals > lim, vals, np.nan), axis=1) + + n, bins, patches = ax.hist( + pe_vals, bins=bins, histtype="step", label=f"channel {ch} ON ($V_N$)" + ) noise_peak, valley_idx = valley_index_strict(n) pe_peak, _ = valley_index_strict(n[noise_peak:]) - ax.axvline(bins[valley_idx],color='red',ls='--') - ax.axvline(bins[noise_peak:][pe_peak],color='green',ls='--') + ax.axvline(bins[valley_idx], color="red", ls="--") + ax.axvline(bins[noise_peak:][pe_peak], color="green", ls="--") - bin_centers = 0.5*(bins[1:]+ bins[:-1]) + bin_centers = 0.5 * (bins[1:] + bins[:-1]) # Restrict range - x_min, x_max = bins[valley_idx], 2*bins[noise_peak:][pe_peak] #- bins[valley_idx] + x_min, x_max = bins[valley_idx], 2 * bins[noise_peak:][pe_peak] # - bins[valley_idx] mask = (bin_centers >= x_min) & (bin_centers <= x_max) bin_centers_fit = bin_centers[mask] @@ -95,9 +92,9 @@ def nll(amp, mu, sigma): expected = gaussian(bin_centers_fit, amp, mu, sigma) expected[expected <= 0] = 1e-10 # avoid log(0) return np.sum(expected - n_fit * np.log(expected)) - + # Initial guesses - amp0 =n[noise_peak:][pe_peak] + amp0 = n[noise_peak:][pe_peak] mu0 = bins[noise_peak:][pe_peak] sigma0 = 100.0 @@ -108,27 +105,25 @@ def nll(amp, mu, sigma): fit_vals = m.values.to_dict() fit_errs = m.errors.to_dict() - result_dic[ch]["pe_peak"] = {"vals":fit_vals,"errs":fit_errs} - + result_dic[ch]["pe_peak"] = {"vals": fit_vals, "errs": fit_errs} + + y_fit = gaussian( + bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"] + ) - y_fit = gaussian(bin_centers, - amp=m.values["amp"], - mu=m.values["mu"], - sigma=m.values["sigma"]) + ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)") - ax.plot(bin_centers, y_fit, 'r-', label='NLL fit (Minuit)') - dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:]) # timestamps in rawfile are currently broken # ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np") - dt = 25.021 #from log file + dt = 25.021 # from log file - result_dic[ch]["dcr"] = {"dcr":{"dcr":dcts/dt,"counts":dcts,"runtime_s":dt}} + result_dic[ch]["dcr"] = {"dcr": {"dcr": dcts / dt, "counts": dcts, "runtime_s": dt}} - ax.set_xlim(-10,2.5e3) - ax.set_ylim(0.5,None) - ax.set_yscale('log') + ax.set_xlim(-10, 2.5e3) + ax.set_ylim(0.5, None) + ax.set_yscale("log") ax.set_ylabel(f"Counts/{bin_size} NNLS units") ax.legend() ax.set_xlabel("NNLS units") From b46e608ce14e8f61f32a0159268eff698e3f804f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 23:01:54 +0000 Subject: [PATCH 03/32] style: pre-commit fixes --- src/mintanalysis/pmt/dsp/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mintanalysis/pmt/dsp/__init__.py b/src/mintanalysis/pmt/dsp/__init__.py index 76c15b0..810ab4c 100644 --- a/src/mintanalysis/pmt/dsp/__init__.py +++ b/src/mintanalysis/pmt/dsp/__init__.py @@ -3,7 +3,6 @@ """ from .build_nnls_matrix import build_nnls_matrix - from .lib_pulse_reco import pulse_analysis from .template_wf import build_template_waveform From c72c2a2ce1edb7469e0dadc9b944c29621bab093 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 24 Nov 2025 15:12:20 -0800 Subject: [PATCH 04/32] update paths --- src/mintanalysis/{ => pmt}/ana/__init__.py | 0 src/mintanalysis/{ => pmt}/ana/plot_pe_spectrum.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/mintanalysis/{ => pmt}/ana/__init__.py (100%) rename src/mintanalysis/{ => pmt}/ana/plot_pe_spectrum.py (100%) diff --git a/src/mintanalysis/ana/__init__.py b/src/mintanalysis/pmt/ana/__init__.py similarity index 100% rename from src/mintanalysis/ana/__init__.py rename to src/mintanalysis/pmt/ana/__init__.py diff --git a/src/mintanalysis/ana/plot_pe_spectrum.py b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py similarity index 100% rename from src/mintanalysis/ana/plot_pe_spectrum.py rename to src/mintanalysis/pmt/ana/plot_pe_spectrum.py From 1ce1685b5d40626d3c57977856df4092056cae76 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 24 Nov 2025 16:17:16 -0800 Subject: [PATCH 05/32] add temp dt from raw --- src/mintanalysis/pmt/ana/plot_pe_spectrum.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py index 0a39d4a..62cee3c 100644 --- a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py +++ b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py @@ -51,6 +51,7 @@ def gaussian(x, amp, mu, sigma): if __name__ == "__main__": f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020_2025_10_22_23_43_47.lh5" + # f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r018/r018_2025_09_26_18_44_17.lh5" f_dsp = f_raw.replace("raw", "dsp") bin_size = 20 @@ -115,9 +116,9 @@ def nll(amp, mu, sigma): dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:]) - # timestamps in rawfile are currently broken - # ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np") - dt = 25.021 # from log file + # TODO timestamps in rawfile are currently broken (they are sample numbers currently) + ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np") + dt = (ts.max() -ts.min())*4.8e-9 result_dic[ch]["dcr"] = {"dcr": {"dcr": dcts / dt, "counts": dcts, "runtime_s": dt}} From 51977b3b6f754acdc27a194af407555700f6f945 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Nov 2025 00:17:52 +0000 Subject: [PATCH 06/32] style: pre-commit fixes --- src/mintanalysis/pmt/ana/plot_pe_spectrum.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py index 62cee3c..d315b35 100644 --- a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py +++ b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py @@ -117,8 +117,8 @@ def nll(amp, mu, sigma): dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:]) # TODO timestamps in rawfile are currently broken (they are sample numbers currently) - ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np") - dt = (ts.max() -ts.min())*4.8e-9 + ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np") + dt = (ts.max() - ts.min()) * 4.8e-9 result_dic[ch]["dcr"] = {"dcr": {"dcr": dcts / dt, "counts": dcts, "runtime_s": dt}} From 7cef207b8e408135901541412089c772c64ff672 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 25 Nov 2025 14:50:21 -0800 Subject: [PATCH 07/32] add gain plot generation --- src/mintanalysis/pmt/ana/do_pmt_analysis.py | 247 +++++++++++++++++++ src/mintanalysis/pmt/ana/plot_pe_spectrum.py | 136 ---------- 2 files changed, 247 insertions(+), 136 deletions(-) create mode 100644 src/mintanalysis/pmt/ana/do_pmt_analysis.py delete mode 100644 src/mintanalysis/pmt/ana/plot_pe_spectrum.py diff --git a/src/mintanalysis/pmt/ana/do_pmt_analysis.py b/src/mintanalysis/pmt/ana/do_pmt_analysis.py new file mode 100644 index 0000000..5f22da5 --- /dev/null +++ b/src/mintanalysis/pmt/ana/do_pmt_analysis.py @@ -0,0 +1,247 @@ +import awkward as ak +import matplotlib.pyplot as plt +import numpy as np +import yaml +from iminuit import Minuit +from lgdo import lh5 +from matplotlib.backends.backend_pdf import PdfPages +from scipy.optimize import curve_fit + + +def valley_index_strict(y): + """ + Return the index of the valley between the first strict peak + and the next rise. Returns None if not found. + """ + n = len(y) + if n < 3: + return None + + # 1. Find first strict peak + peak = None + for i in range(1, n - 1): + if y[i] > y[i - 1] and y[i] > y[i + 1]: + peak = i + break + if peak is None: + return None + + # 2. Walk downward and track the minimum + valley = peak + i = peak + 1 + + while i < n: + if y[i] > y[i - 1]: # rising again → done + break + if y[i] < y[valley]: + valley = i + i += 1 + + if i == n: # never rose again + return None + + return peak, valley + + +def nll(amp, mu, sigma, bin_centers_fit, n_fit): + """Poisson NLL for binned data""" + expected = gaussian(bin_centers_fit, amp, mu, sigma) + expected[expected <= 0] = 1e-10 # avoid log(0) + return np.sum(expected - n_fit * np.log(expected)) + + +def gaussian(x, amp, mu, sigma): + return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + + +def linear_func(x, a, b): + return a * x + b + + +if __name__ == "__main__": + + with open( + "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml", + ) as file: + aux_dict = yaml.safe_load(file) + + raw_path = ( + "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020" + ) + out_folder = "/home/pkrause/software/mint-analysis/debug_out/" + + f_result = "result.yaml" + bin_size = 20 + bins = np.arange(-100, 10000, bin_size) + lim = 20 + A4_LANDSCAPE = (11.69, 8.27) + result_dic = {} + + for run in aux_dict: + result_dic[run] = {} + f_raw = raw_path + aux_dict[run]["daq"].split("/")[-1].replace("daq", "").replace( + "data", "lh5" + ) + f_dsp = f_raw.replace("raw", "dsp") + + with PdfPages(out_folder + f"pe_spectra_{run}.pdf") as pdf: + for ch in lh5.ls(f_dsp): + pmt = int(ch[2:]) + 1 + result_dic[run][pmt] = {} + result_dic[run][pmt]["voltages_in_V"] = aux_dict[run]["voltages_in_V"][int(ch[2:])] + fig, ax = plt.subplots() + fig.set_figwidth(A4_LANDSCAPE[0]) + fig.set_figheight(A4_LANDSCAPE[1]) + + d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak") + vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False) + pe_vals = np.nansum(np.where(vals > lim, vals, np.nan), axis=1) + + n, bins, patches = ax.hist( + pe_vals, + bins=bins, + histtype="step", + label=f"channel {ch} ON ({result_dic[run][pmt]['voltages_in_V']:.2f} V)", + ) + + if aux_dict[run]["voltages_in_V"][int(ch[2:])] == 0: + ax.set_xlim(-10, 2.5e3) + ax.set_ylim(0.5, None) + ax.set_yscale("log") + ax.set_ylabel(f"Counts/{bin_size} NNLS units") + ax.legend() + ax.set_xlabel("NNLS units") + ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)") + + pdf.savefig() + plt.close() + continue + + ###################### + # Fit p.e. peak # + ###################### + + noise_peak, valley_idx = valley_index_strict(n) + pe_peak, _ = valley_index_strict(n[noise_peak:]) + ax.axvline(bins[valley_idx], color="red", ls="--") + ax.axvline(bins[noise_peak:][pe_peak], color="green", ls="--") + + bin_centers = 0.5 * (bins[1:] + bins[:-1]) + # Restrict range + x_min, x_max = ( + bins[valley_idx], + 2 * bins[noise_peak:][pe_peak], + ) # - bins[valley_idx] + mask = (bin_centers >= x_min) & (bin_centers <= x_max) + + bin_centers_fit = bin_centers[mask] + n_fit = n[mask] + + # Initial guesses + amp0 = n[noise_peak:][pe_peak] + mu0 = bins[noise_peak:][pe_peak] + sigma0 = 100.0 + + m = Minuit( + lambda amp, mu, sigma: nll( + amp, mu, sigma, bin_centers_fit=bin_centers_fit, n_fit=n_fit + ), + amp=amp0, + mu=mu0, + sigma=sigma0, + ) + m.errordef = Minuit.LIKELIHOOD # important for Poisson NLL + m.migrad(iterate=10) # perform minimization + + fit_vals = m.values.to_dict() + fit_errs = m.errors.to_dict() + + result_dic[run][pmt]["pe_peak"] = {"vals": fit_vals, "errs": fit_errs} + + y_fit = gaussian( + bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"] + ) + + ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)") + + ax.set_xlim(-10, 2.5e3) + ax.set_ylim(0.5, None) + ax.set_yscale("log") + ax.set_ylabel(f"Counts/{bin_size} NNLS units") + ax.legend() + ax.set_xlabel("NNLS units") + ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)") + + pdf.savefig() + plt.close() + + ###################### + # DCR calculation # + ###################### + + dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:]) + + # TODO fix timestamps in raw (they are sample numbers currently) + ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np") + dt = (ts.max() - ts.min()) * 4.8e-9 + + result_dic[run][pmt]["dcr"] = { + "dcr": {"dcr": float(dcts / dt), "counts": float(dcts), "runtime_s": float(dt)} + } + + ###################### + # Gain calculation # + ###################### + tmp_dic = {} + for _, run in result_dic.items(): + for pmt in run: + if pmt not in tmp_dic: + tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []} + v = run[pmt]["voltages_in_V"] + if v == 0: + continue + tmp_dic[pmt]["voltage"].append(v) + tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak"]["vals"]["mu"]) + tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak"]["errs"]["mu"]) + + with PdfPages(out_folder + "gain_plots.pdf") as pdf: + for key, pmt in tmp_dic.items(): + fig, ax = plt.subplots() + fig.set_figwidth(A4_LANDSCAPE[0]) + fig.set_figheight(A4_LANDSCAPE[1]) + ax.errorbar( + pmt["voltage"], + pmt["vals"], + pmt["errs"], + label=f"PMT {key}", + fmt="o", + ) + ax.set_ylabel("PMT position (NNLS units)") + ax.set_xlabel("Voltage (V)") + + params, covariance = curve_fit( + linear_func, + pmt["voltage"], + pmt["vals"], + sigma=pmt["errs"], + absolute_sigma=True, + ) + a_opt, b_opt = params + perr = np.sqrt(np.diag(covariance)) + x = np.linspace(-1 * b_opt / a_opt, 110, 1000) + ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit") + + tmp_dic[key]["fit"] = { + "vals": {"a": float(a_opt), "b": float(b_opt)}, + "errs": {"a": float(perr[0]), "b": float(perr[1])}, + "func": "gain_in_NNLS_units = a*voltage_in_V+b", + } + + ax.legend() + pdf.savefig() + plt.close() + + result_dic["gain"] = tmp_dic + +with open(out_folder + "results.yaml", "w") as file: + aux_dict = yaml.safe_dump(result_dic, file, default_flow_style=False) diff --git a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py deleted file mode 100644 index d315b35..0000000 --- a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py +++ /dev/null @@ -1,136 +0,0 @@ -# Imports -import awkward as ak -import matplotlib.pyplot as plt -import numpy as np -from iminuit import Minuit - -# LEGEND specific imports -from lgdo import lh5 -from matplotlib.backends.backend_pdf import PdfPages - - -def valley_index_strict(y): - """ - Return the index of the valley between the first strict peak - and the next rise. Returns None if not found. - """ - n = len(y) - if n < 3: - return None - - # 1. Find first strict peak - peak = None - for i in range(1, n - 1): - if y[i] > y[i - 1] and y[i] > y[i + 1]: - peak = i - break - if peak is None: - return None - - # 2. Walk downward and track the minimum - valley = peak - i = peak + 1 - - while i < n: - if y[i] > y[i - 1]: # rising again → done - break - if y[i] < y[valley]: - valley = i - i += 1 - - if i == n: # never rose again - return None - - return peak, valley - - -def gaussian(x, amp, mu, sigma): - return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2) - - -if __name__ == "__main__": - - f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020_2025_10_22_23_43_47.lh5" - # f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r018/r018_2025_09_26_18_44_17.lh5" - f_dsp = f_raw.replace("raw", "dsp") - - bin_size = 20 - bins = np.arange(-100, 10000, bin_size) - lim = 20 - A4_LANDSCAPE = (11.69, 8.27) - result_dic = {} - - with PdfPages("pe_spectra.pdf") as pdf: - for ch in lh5.ls(f_dsp): - result_dic[ch] = {} - fig, ax = plt.subplots() - fig.set_figwidth(A4_LANDSCAPE[0]) - fig.set_figheight(A4_LANDSCAPE[1]) - - d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak") - vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False) - pe_vals = np.nansum(np.where(vals > lim, vals, np.nan), axis=1) - - n, bins, patches = ax.hist( - pe_vals, bins=bins, histtype="step", label=f"channel {ch} ON ($V_N$)" - ) - - noise_peak, valley_idx = valley_index_strict(n) - pe_peak, _ = valley_index_strict(n[noise_peak:]) - ax.axvline(bins[valley_idx], color="red", ls="--") - ax.axvline(bins[noise_peak:][pe_peak], color="green", ls="--") - - bin_centers = 0.5 * (bins[1:] + bins[:-1]) - # Restrict range - x_min, x_max = bins[valley_idx], 2 * bins[noise_peak:][pe_peak] # - bins[valley_idx] - mask = (bin_centers >= x_min) & (bin_centers <= x_max) - - bin_centers_fit = bin_centers[mask] - n_fit = n[mask] - - def nll(amp, mu, sigma): - """Poisson NLL for binned data""" - expected = gaussian(bin_centers_fit, amp, mu, sigma) - expected[expected <= 0] = 1e-10 # avoid log(0) - return np.sum(expected - n_fit * np.log(expected)) - - # Initial guesses - amp0 = n[noise_peak:][pe_peak] - mu0 = bins[noise_peak:][pe_peak] - sigma0 = 100.0 - - m = Minuit(nll, amp=amp0, mu=mu0, sigma=sigma0) - m.errordef = Minuit.LIKELIHOOD # important for Poisson NLL - m.migrad(iterate=10) # perform minimization - - fit_vals = m.values.to_dict() - fit_errs = m.errors.to_dict() - - result_dic[ch]["pe_peak"] = {"vals": fit_vals, "errs": fit_errs} - - y_fit = gaussian( - bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"] - ) - - ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)") - - dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:]) - - # TODO timestamps in rawfile are currently broken (they are sample numbers currently) - ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np") - dt = (ts.max() - ts.min()) * 4.8e-9 - - result_dic[ch]["dcr"] = {"dcr": {"dcr": dcts / dt, "counts": dcts, "runtime_s": dt}} - - ax.set_xlim(-10, 2.5e3) - ax.set_ylim(0.5, None) - ax.set_yscale("log") - ax.set_ylabel(f"Counts/{bin_size} NNLS units") - ax.legend() - ax.set_xlabel("NNLS units") - ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)") - - pdf.savefig() - plt.close() - - print(result_dic) From 376806878e30ecd79a16c595e87a28d37e0fe0c8 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 26 Nov 2025 19:29:34 -0800 Subject: [PATCH 08/32] split analysis into multiple scripts --- src/mintanalysis/pmt/ana/dcr_analysis.py | 50 ++++++ .../pmt/ana/linear_gain_analysis.py | 123 +++++++++++++++ ...mt_analysis.py => pe_spectrum_analysis.py} | 145 +++++++++--------- 3 files changed, 243 insertions(+), 75 deletions(-) create mode 100644 src/mintanalysis/pmt/ana/dcr_analysis.py create mode 100644 src/mintanalysis/pmt/ana/linear_gain_analysis.py rename src/mintanalysis/pmt/ana/{do_pmt_analysis.py => pe_spectrum_analysis.py} (59%) diff --git a/src/mintanalysis/pmt/ana/dcr_analysis.py b/src/mintanalysis/pmt/ana/dcr_analysis.py new file mode 100644 index 0000000..f982a23 --- /dev/null +++ b/src/mintanalysis/pmt/ana/dcr_analysis.py @@ -0,0 +1,50 @@ +import os + +import yaml + + +def linear_func(x, a, b): + return a * x + b + + +if __name__ == "__main__": + + f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml" + override_results = True + time_mode = "aux" + + if os.path.exists(f_result): + with open(f_result) as file: + result_dic = yaml.safe_load(file) + else: + raise RuntimeError("File does not exist") + + if "pe_spectrum" not in result_dic.keys(): + raise RuntimeError("pe_spectrum info not present!") + + if "dcr" in result_dic and not override_results: + raise RuntimeError("results already exist and override flag not set") + result_dic["dcr"] = {} + for rk, run in result_dic["pe_spectrum"].items(): + result_dic["dcr"][rk] = {} + for key, pmt in run.items(): + if pmt["voltage"]["val"] == 0: + continue + if "dcr" in pmt and not override_results: + print( + f"DCR key exists but override flag is not set! --> skipping {pmt} in run {run}" + ) + continue + dcts = ( + pmt["statistics"]["1st_pe_fit_integral_below_valley"]["val"] + + pmt["statistics"]["cts_above_valley"]["val"] + ) + result_dic["dcr"][rk][key] = { + "val": dcts / pmt["runtime"][time_mode], + "err": (dcts**0.5) / pmt["runtime"][time_mode], + "unit": "Hz", + } + + +with open(f_result, "w") as file: + yaml.safe_dump(result_dic, file, default_flow_style=False) diff --git a/src/mintanalysis/pmt/ana/linear_gain_analysis.py b/src/mintanalysis/pmt/ana/linear_gain_analysis.py new file mode 100644 index 0000000..39a101a --- /dev/null +++ b/src/mintanalysis/pmt/ana/linear_gain_analysis.py @@ -0,0 +1,123 @@ +import os + +import matplotlib.pyplot as plt +import numpy as np +import yaml +from matplotlib.backends.backend_pdf import PdfPages +from scipy.optimize import curve_fit + + +def linear_func(x, a, b): + return a * x + b + + +if __name__ == "__main__": + + f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml" + with open(f_result) as file: + result_dic = yaml.safe_load(file) + + out_folder = "/home/pkrause/software/mint-analysis/debug_out/gain/" + + bin_size = 20 + bins = np.arange(-100, 10000, bin_size) + lim = 20 + A4_LANDSCAPE = (11.69, 8.27) + + override_results = True + + if os.path.exists(f_result): + with open(f_result) as file: + result_dic = yaml.safe_load(file) + else: + raise RuntimeError("File does not exist") + + if "pe_spectrum" not in result_dic: + raise RuntimeError("pe_spectrum info not present!") + + if "linear_gain" in result_dic and not override_results: + raise RuntimeError("results already exist and override flag not set") + + ###################### + # Gain calculation # + ###################### + tmp_dic = {"used_keys": []} + y_unit = None + x_unit = None + for key, run in result_dic["pe_spectrum"].items(): + tmp_dic["used_keys"].append(key) + for pmt in run: + if pmt not in tmp_dic: + tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []} + v = run[pmt]["voltage"]["val"] + xu = run[pmt]["voltage"]["unit"] + if x_unit is None: + x_unit = xu + else: + assert x_unit == xu + if v == 0: + continue + + tmp_dic[pmt]["voltage"].append(v) + tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]["val"]) + tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak_fit"]["mean"]["err"]) + + yu = run[pmt]["pe_peak_fit"]["mean"]["unit"] + if y_unit is None: + y_unit = yu + else: + assert y_unit == yu + + with PdfPages(out_folder + "gain_plots.pdf") as pdf: + for key, pmt in tmp_dic.items(): + if key == "used_keys": + continue + fig, ax = plt.subplots() + fig.set_figwidth(A4_LANDSCAPE[0]) + fig.set_figheight(A4_LANDSCAPE[1]) + ax.errorbar( + pmt["voltage"], + pmt["vals"], + pmt["errs"], + label=f"PMT {key}", + fmt="o", + ) + ax.set_ylabel(f"PMT position ({y_unit})") + ax.set_xlabel(f"Voltage ({x_unit})") + + params, covariance = curve_fit( + linear_func, + pmt["voltage"], + pmt["vals"], + sigma=pmt["errs"], + absolute_sigma=True, + ) + a_opt, b_opt = params + perr = np.sqrt(np.diag(covariance)) + x = np.linspace(-1 * b_opt / a_opt, 110, 1000) + ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit") + + tmp_dic[key]["a"] = { + "val": float(a_opt), + "err": float(perr[0]), + "unit": f"{y_unit}/{x_unit}", + } + tmp_dic[key]["b"] = { + "val": float(b_opt), + "err": float(perr[1]), + "unit": f"{y_unit}/{x_unit}", + } + tmp_dic[key]["func"] = "G = a*voltage+b" + + pmt.pop("errs") + pmt.pop("vals") + pmt.pop("voltage") + ax.legend() + pdf.savefig() + plt.close() + + result_dic["linear_gain"] = tmp_dic + + +with open(f_result, "w") as file: + yaml.safe_dump(result_dic, file, default_flow_style=False) diff --git a/src/mintanalysis/pmt/ana/do_pmt_analysis.py b/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py similarity index 59% rename from src/mintanalysis/pmt/ana/do_pmt_analysis.py rename to src/mintanalysis/pmt/ana/pe_spectrum_analysis.py index 5f22da5..3a179b2 100644 --- a/src/mintanalysis/pmt/ana/do_pmt_analysis.py +++ b/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py @@ -1,3 +1,5 @@ +import os + import awkward as ak import matplotlib.pyplot as plt import numpy as np @@ -5,7 +7,6 @@ from iminuit import Minuit from lgdo import lh5 from matplotlib.backends.backend_pdf import PdfPages -from scipy.optimize import curve_fit def valley_index_strict(y): @@ -60,17 +61,18 @@ def linear_func(x, a, b): if __name__ == "__main__": - with open( - "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml", - ) as file: + f_aux = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml" + f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml" + with open(f_aux) as file: aux_dict = yaml.safe_load(file) raw_path = ( "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020" ) - out_folder = "/home/pkrause/software/mint-analysis/debug_out/" + plot_folder = "/home/pkrause/software/mint-analysis/debug_out/pe_spectra/" + + override_results = True - f_result = "result.yaml" bin_size = 20 bins = np.arange(-100, 10000, bin_size) lim = 20 @@ -84,11 +86,14 @@ def linear_func(x, a, b): ) f_dsp = f_raw.replace("raw", "dsp") - with PdfPages(out_folder + f"pe_spectra_{run}.pdf") as pdf: + with PdfPages(plot_folder + f"pe_spectra_{run}.pdf") as pdf: for ch in lh5.ls(f_dsp): pmt = int(ch[2:]) + 1 result_dic[run][pmt] = {} - result_dic[run][pmt]["voltages_in_V"] = aux_dict[run]["voltages_in_V"][int(ch[2:])] + result_dic[run][pmt]["voltage"] = { + "val": aux_dict[run]["voltages_in_V"][int(ch[2:])], + "unit": "V", + } fig, ax = plt.subplots() fig.set_figwidth(A4_LANDSCAPE[0]) fig.set_figheight(A4_LANDSCAPE[1]) @@ -101,7 +106,7 @@ def linear_func(x, a, b): pe_vals, bins=bins, histtype="step", - label=f"channel {ch} ON ({result_dic[run][pmt]['voltages_in_V']:.2f} V)", + label=f"channel {ch} ON ({result_dic[run][pmt]['voltage']['val']:.2f} {result_dic[run][pmt]['voltage']['unit']})", ) if aux_dict[run]["voltages_in_V"][int(ch[2:])] == 0: @@ -156,7 +161,11 @@ def linear_func(x, a, b): fit_vals = m.values.to_dict() fit_errs = m.errors.to_dict() - result_dic[run][pmt]["pe_peak"] = {"vals": fit_vals, "errs": fit_errs} + result_dic[run][pmt]["pe_peak_fit"] = { + "mean": {"val": fit_vals["mu"], "err": fit_errs["mu"], "unit": "NNLS"}, + "sigma": {"val": fit_vals["sigma"], "err": fit_errs["sigma"], "unit": "NNLS"}, + "amp": {"val": fit_vals["amp"], "err": fit_errs["amp"], "unit": ""}, + } y_fit = gaussian( bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"] @@ -176,72 +185,58 @@ def linear_func(x, a, b): plt.close() ###################### - # DCR calculation # + # spectrum values # ###################### - dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:]) - - # TODO fix timestamps in raw (they are sample numbers currently) - ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np") - dt = (ts.max() - ts.min()) * 4.8e-9 - - result_dic[run][pmt]["dcr"] = { - "dcr": {"dcr": float(dcts / dt), "counts": float(dcts), "runtime_s": float(dt)} + result_dic[run][pmt]["statistics"] = { + "1st_pe_fit_integral_below_valley": { + "val": float(np.sum(y_fit[:valley_idx])), + "unit": "", + }, + "cts_above_valley": {"val": int(np.sum(n[:valley_idx])), "unit": ""}, + "cts_below_valley": {"val": int(np.sum(n[valley_idx:])), "unit": ""}, + "1st_pe_fit_integral": {"val": int(float(np.sum(y_fit))), "unit": ""}, + "total_cts": {"val": int(np.sum(n)), "unit": ""}, + "valley": { + "pos": {"val": float(bin_centers[valley_idx]), "unit": "NNLS"}, + "amp": int(n[valley_idx]), + }, + "1st_pe_guess": { + "pos": {"val": float(mu0), "unit": "NNLS"}, + "amp": {"val": int(amp0), "unit": ""}, + }, } - ###################### - # Gain calculation # - ###################### - tmp_dic = {} - for _, run in result_dic.items(): - for pmt in run: - if pmt not in tmp_dic: - tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []} - v = run[pmt]["voltages_in_V"] - if v == 0: - continue - tmp_dic[pmt]["voltage"].append(v) - tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak"]["vals"]["mu"]) - tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak"]["errs"]["mu"]) - - with PdfPages(out_folder + "gain_plots.pdf") as pdf: - for key, pmt in tmp_dic.items(): - fig, ax = plt.subplots() - fig.set_figwidth(A4_LANDSCAPE[0]) - fig.set_figheight(A4_LANDSCAPE[1]) - ax.errorbar( - pmt["voltage"], - pmt["vals"], - pmt["errs"], - label=f"PMT {key}", - fmt="o", - ) - ax.set_ylabel("PMT position (NNLS units)") - ax.set_xlabel("Voltage (V)") - - params, covariance = curve_fit( - linear_func, - pmt["voltage"], - pmt["vals"], - sigma=pmt["errs"], - absolute_sigma=True, - ) - a_opt, b_opt = params - perr = np.sqrt(np.diag(covariance)) - x = np.linspace(-1 * b_opt / a_opt, 110, 1000) - ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit") - - tmp_dic[key]["fit"] = { - "vals": {"a": float(a_opt), "b": float(b_opt)}, - "errs": {"a": float(perr[0]), "b": float(perr[1])}, - "func": "gain_in_NNLS_units = a*voltage_in_V+b", - } - - ax.legend() - pdf.savefig() - plt.close() - - result_dic["gain"] = tmp_dic - -with open(out_folder + "results.yaml", "w") as file: - aux_dict = yaml.safe_dump(result_dic, file, default_flow_style=False) + result_dic[run][pmt]["runtime"] = {"unit": "s"} + if "runtime_in_s" in aux_dict[run]: + result_dic[run][pmt]["runtime"]["aux"] = aux_dict[run]["runtime_in_s"] + + if f"{ch}/raw/timestamp_sec" in lh5.ls( + f_raw, f"{ch}/raw/" + ) and f"{ch}/raw/timestamp_ps" in lh5.ls(f_raw, f"{ch}/raw/"): + ts = lh5.read_as(f"{ch}/raw/timestamp_sec", f_raw, "np") + ts_ps = lh5.read_as(f"{ch}/raw/timestamp_ps", f_raw, "np") + result_dic[run][pmt]["runtime"]["raw"] = float( + ts[ts.argmax()] + + ts_ps[ts.argmax()] * 1e-12 + - (ts[ts.argmin()] + ts_ps[ts.argmin()] * 1e-12) + ) + + # support for old raw files + elif f"{ch}/raw/timestamp" in lh5.ls(f_raw, f"{ch}/raw/"): + ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np") + result_dic[run][pmt]["runtime"]["raw"] = float((ts.max() - ts.min()) * 4.8e-9) + +if os.path.exists(f_result): + with open(f_result) as file: + tmp_dic = yaml.safe_load(file) + if "pe_spectrum" in tmp_dic.keys() and not override_results: + raise RuntimeError("Already exists and override flag is not set") + else: + tmp_dic["pe_spectrum"] = result_dic + with open(f_result, "w") as file: + yaml.safe_dump(tmp_dic, file, default_flow_style=False) + +else: + with open(f_result, "w") as file: + yaml.safe_dump({"pe_spectrum": result_dic}, file, default_flow_style=False) From 261cf332b57d42a1dfaef3392b44efcf416a0675 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 27 Nov 2025 12:50:11 -0800 Subject: [PATCH 09/32] Refactor scripts to analyser class --- src/mintanalysis/pmt/ana/analyser_class.py | 725 ++++++++++++++++++ src/mintanalysis/pmt/ana/dcr_analysis.py | 50 -- .../pmt/ana/linear_gain_analysis.py | 123 --- .../pmt/ana/pe_spectrum_analysis.py | 242 ------ 4 files changed, 725 insertions(+), 415 deletions(-) create mode 100644 src/mintanalysis/pmt/ana/analyser_class.py delete mode 100644 src/mintanalysis/pmt/ana/dcr_analysis.py delete mode 100644 src/mintanalysis/pmt/ana/linear_gain_analysis.py delete mode 100644 src/mintanalysis/pmt/ana/pe_spectrum_analysis.py diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py new file mode 100644 index 0000000..badab7f --- /dev/null +++ b/src/mintanalysis/pmt/ana/analyser_class.py @@ -0,0 +1,725 @@ +""" +PESpectrumAnalyzer + +- Logging to stdout + file. +- Missing critical files -> hard fail. +- Per-channel fit/peak failures -> skip and continue. +- One PDF per run (written once; pages appended in-memory during run processing). +- Cleaner decomposition and minimal repetition. +- 1st p.e. fit: single gauss +- DCR estimate and plot +- linear gain fit +- SNR plot + +Usage: Run via CLI with desired flags +""" + +from __future__ import annotations + +import argparse +import logging +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import awkward as ak +import matplotlib.pyplot as plt +import numpy as np +import yaml +from iminuit import Minuit +from lgdo import lh5 +from matplotlib.backends.backend_pdf import PdfPages +from scipy.optimize import curve_fit + +# -------------------------- +# Configuration / Constants +# -------------------------- +AUX_YAML = Path( + "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml" +) +RAW_DIR = Path("/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020") +RESULT_YAML = Path("/home/pkrause/software/mint-analysis/debug_out/results.yaml") +PLOT_FOLDER = Path("/home/pkrause/software/mint-analysis/debug_out/pe_spectra/") +LOG_FILE = Path("/home/pkrause/software/mint-analysis/debug_out/pe_spectrum.log") + +# Analysis constants +BIN_SIZE = 20 +BINS = np.arange(-100, 10000, BIN_SIZE) +LIM = 20 +A4_LANDSCAPE = (11.69, 8.27) +OVERRIDE_RESULTS = True + + +# -------------------------- +# Logging Setup +# -------------------------- + + +def setup_logging(log_file: Path = LOG_FILE, level: int = logging.INFO) -> logging.Logger: + logger = logging.getLogger("PESpectrum") + logger.setLevel(level) + logger.propagate = False + + if not logger.handlers: + fmt = logging.Formatter( + "[%(asctime)s] [%(name)s - %(funcName)s] [%(levelname)s] %(message)s" + ) + + sh = logging.StreamHandler() + sh.setLevel(level) + sh.setFormatter(fmt) + logger.addHandler(sh) + + fh = logging.FileHandler(log_file, mode="w") + fh.setLevel(level) + fh.setFormatter(fmt) + logger.addHandler(fh) + + return logger + + +# -------------------------- +# Small helpers +# -------------------------- + + +@dataclass +class ChannelResult: + status: str # 'ok', 'skipped', 'error' + data: dict[str, Any] + + +def linear_func(x, a, b): + return a * x + b + + +def gaussian(x: np.ndarray, amp: float, mu: float, sigma: float) -> np.ndarray: + return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + + +def poisson_nll(amp: float, mu: float, sigma: float, x: np.ndarray, y: np.ndarray) -> float: + expected = gaussian(x, amp, mu, sigma) + expected = np.clip(expected, 1e-10, None) + return float(np.sum(expected - y * np.log(expected))) + + +def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None: + """Return (peak_idx, valley_idx) or None.""" + n = len(y) + if n < 3: + return None + + peak = None + for i in range(1, n - 1): + if y[i] > y[i - 1] and y[i] > y[i + 1]: + peak = i + break + if peak is None: + return None + + valley = peak + i = peak + 1 + while i < n: + if y[i] > y[i - 1]: + return peak, valley + if y[i] < y[valley]: + valley = i + i += 1 + + return None + + +# -------------------------- +# Analyzer class +# -------------------------- +class PESpectrumAnalyzer: + def __init__( + self, + aux_yaml: Path = AUX_YAML, + raw_dir: Path = RAW_DIR, + plot_folder: Path = PLOT_FOLDER, + result_yaml: Path = RESULT_YAML, + bins: np.ndarray = BINS, + bin_size: int = BIN_SIZE, + lim: int = LIM, + override_results: bool = OVERRIDE_RESULTS, + logger: logging.Logger | None = None, + ) -> None: + self.aux_yaml = aux_yaml + self.raw_dir = raw_dir + self.plot_folder = plot_folder + self.result_yaml = result_yaml + self.bins = bins + self.bin_size = bin_size + self.lim = lim + self.override_results = override_results + self.logger = logger or setup_logging() + self.aux = self._load_aux() + self.plot_folder.mkdir(parents=True, exist_ok=True) + + # ---------------------- + # I/O helpers + # ---------------------- + def _load_aux(self) -> dict[str, Any]: + if not self.aux_yaml.exists(): + msg = f"Aux file not found: {self.aux_yaml}" + raise FileNotFoundError(msg) + with open(self.aux_yaml) as f: + aux = yaml.safe_load(f) + self.logger.info("Loaded aux YAML: %s", self.aux_yaml) + return aux + + # ---------------------- + # Public entrypoints + # ---------------------- + def run(self) -> None: + results: dict[str, dict[int, dict[str, Any]]] = {} + for run_name, meta in self.aux.items(): + self.logger.info("Starting run: %s", run_name) + try: + results[run_name] = self.analyze_run(run_name, meta) + except FileNotFoundError as e: + self.logger.exception("Critical file missing for run %s: %s", run_name, e) + raise + self._save_results(results) + self.logger.info("All runs processed.") + + # ---------------------- + # Per-run flow + # ---------------------- + def analyze_run(self, run_name: str, meta: dict[str, Any]) -> dict[int, dict[str, Any]]: + # build file paths + fname = meta["daq"].split("/")[-1].replace("daq", "r020").replace("data", "lh5") + f_raw = self.raw_dir / fname + if not f_raw.exists(): + msg = f"Raw file for run {run_name} not found: {f_raw}" + raise FileNotFoundError(msg) + f_dsp = Path(str(f_raw).replace("raw", "dsp")) + if not f_dsp.exists(): + msg = f"DSP file for run {run_name} not found: {f_dsp}" + raise FileNotFoundError(msg) + + run_results: dict[int, dict[str, Any]] = {} + pdf_path = self.plot_folder / f"pe_spectra_{run_name}.pdf" + + # collect figures and write once + with PdfPages(pdf_path) as pdf: + for ch in lh5.ls(f_dsp): + pmt = int(ch[2:]) + 1 + self.logger.info("Run %s - channel %s (PMT %d)", run_name, ch, pmt) + try: + fig, chan_data = self.process_channel(run_name, ch, pmt, meta, f_raw, f_dsp) + # fig may be None if plotting skipped + if fig is not None: + pdf.savefig(fig) + plt.close(fig) + run_results[pmt] = chan_data + except Exception as exc: + self.logger.exception( + "Channel-level error run=%s ch=%s: %s", run_name, ch, exc + ) + run_results[pmt] = {"status": "error", "reason": str(exc)} + + self.logger.info("Wrote PDF for run %s to %s", run_name, pdf_path) + return run_results + + # ---------------------- + # Per-channel processing + # ---------------------- + def process_channel( + self, + run_name: str, + ch: str, + pmt: int, + meta: dict[str, Any], + f_raw: Path, + f_dsp: Path, + ) -> tuple[plt.Figure | None, dict[str, Any]]: + """Process channel. Returns (figure_or_None, channel_result_dict). + + Non-critical failures return a result dict with status 'skipped' + """ + result: dict[str, Any] = {} + ch_idx = int(ch[2:]) + voltage = float(meta["voltages_in_V"][ch_idx]) + result["voltage"] = {"val": voltage, "unit": "V"} + + # load data + try: + d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak") + except Exception as e: + msg = f"Failed to read DSP for {ch} in {f_dsp}: {e}" + self.logger.warning(msg) + return None, {"status": "skipped", "reason": msg} + + # compute pe-values + try: + vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False) + pe_vals = np.nansum(np.where(vals > self.lim, vals, np.nan), axis=1) + except Exception as e: + msg = f"Failed to compute pe values for {ch}: {e}" + self.logger.warning(msg) + return None, {"status": "skipped", "reason": msg} + + # histogram + fig, ax = plt.subplots(figsize=A4_LANDSCAPE) + n, bins, _ = ax.hist( + pe_vals, + bins=self.bins, + histtype="step", + label=f"channel {ch} (PMT {pmt}) at {voltage:.2f} V", + ) + + bin_centers = 0.5 * (bins[1:] + bins[:-1]) + + # noise-only + if voltage == 0: + self._decorate_axis(ax) + # minimal result (no fit) + result.update( + {"status": "ok", "statistics": {}, "pe_peak_fit": {}, "runtime": {"unit": "s"}} + ) + if "runtime_in_s" in meta: + result["runtime"]["aux"] = meta["runtime_in_s"] + raw_runtime = self._extract_runtime_if_present(f_raw, ch) + if raw_runtime is not None: + result["runtime"]["raw"] = raw_runtime + return fig, result + + # detect valley & peaks + vi = valley_index_strict(n) + if vi is None: + msg = "Valley detection failed (no strict peak/valley)." + self.logger.warning("Run %s ch %s: %s", run_name, ch, msg) + self._decorate_axis(ax) + return fig, {"status": "skipped", "reason": msg} + + noise_peak, valley_idx = vi + + # find first p.e. peak after noise_peak + sub = n[noise_peak:] + pe_vi = valley_index_strict(sub) + if pe_vi is None: + msg = "1st-p.e. detection failed after noise peak." + self.logger.warning("Run %s ch %s: %s", run_name, ch, msg) + self._decorate_axis(ax) + return fig, {"status": "skipped", "reason": msg} + + pe_peak_rel, _ = pe_vi + pe_peak_idx = noise_peak + pe_peak_rel + + # annotate choices + ax.axvline(bin_centers[valley_idx], color="red", ls="--", label="valley") + ax.axvline(bin_centers[pe_peak_idx], color="green", ls="--", label="1st pe guess") + + # fit window + x_min = bin_centers[valley_idx] + x_max = 2 * bin_centers[pe_peak_idx] + mask = (bin_centers >= x_min) & (bin_centers <= x_max) + bin_centers_fit = bin_centers[mask] + n_fit = n[mask] + + if len(bin_centers_fit) < 3: + msg = f"Insufficient bins for fitting (n_fit={len(bin_centers_fit)})." + self.logger.warning("Run %s ch %s: %s", run_name, ch, msg) + self._decorate_axis(ax) + return fig, {"status": "skipped", "reason": msg} + + amp0 = float(n[pe_peak_idx]) + mu0 = float(bin_centers[pe_peak_idx]) + sigma0 = 100.0 + + try: + m = Minuit( + lambda amp, mu, sigma: poisson_nll(amp, mu, sigma, bin_centers_fit, n_fit), + amp=amp0, + mu=mu0, + sigma=sigma0, + ) + m.errordef = Minuit.LIKELIHOOD + m.migrad(iterate=10) + except Exception as e: + msg = f"Minuit error for {ch}: {e}" + self.logger.warning("Run %s ch %s: %s", run_name, ch, msg) + self._decorate_axis(ax) + return fig, {"status": "skipped", "reason": msg} + + # Basic validity check + if not getattr(m, "valid", True): + self.logger.warning("Minuit invalid result for run %s ch %s", run_name, ch) + self._decorate_axis(ax) + return fig, {"status": "skipped", "reason": "Minuit invalid"} + + fit_vals = {k: float(v) for k, v in m.values.to_dict().items()} + fit_errs = {k: float(v) for k, v in m.errors.to_dict().items()} + + result["pe_peak_fit"] = { + "mean": {"val": fit_vals["mu"], "err": fit_errs["mu"], "unit": "NNLS"}, + "sigma": {"val": fit_vals["sigma"], "err": fit_errs["sigma"], "unit": "NNLS"}, + "amp": {"val": fit_vals["amp"], "err": fit_errs["amp"], "unit": ""}, + } + + # full fit curve over bins for plotting & integrals + y_fit = gaussian(bin_centers, fit_vals["amp"], fit_vals["mu"], fit_vals["sigma"]) + ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)") + + self._decorate_axis(ax) + + # statistics + try: + result["statistics"] = { + "1st_pe_fit_integral_below_valley": { + "val": float(np.sum(y_fit[:valley_idx])), + "unit": "", + }, + "cts_above_valley": {"val": int(np.sum(n[:valley_idx])), "unit": ""}, + "cts_below_valley": {"val": int(np.sum(n[valley_idx:])), "unit": ""}, + "1st_pe_fit_integral": {"val": int(float(np.sum(y_fit))), "unit": ""}, + "total_cts": {"val": int(np.sum(n)), "unit": ""}, + "valley": { + "pos": {"val": float(bin_centers[valley_idx]), "unit": "NNLS"}, + "amp": {"val": int(n[valley_idx]), "unit": ""}, + }, + "1st_pe_guess": { + "pos": {"val": float(mu0), "unit": "NNLS"}, + "amp": {"val": int(amp0), "unit": ""}, + }, + } + except Exception as e: + self.logger.warning( + "Statistics computation failed for run %s ch %s: %s", run_name, ch, e + ) + result.setdefault("statistics", {}) + result["statistics"]["error"] = str(e) + + # runtime + result["runtime"] = {"unit": "s"} + if "runtime_in_s" in meta: + result["runtime"]["aux"] = meta["runtime_in_s"] + raw_runtime = self._extract_runtime_if_present(f_raw, ch) + if raw_runtime is not None: + result["runtime"]["raw"] = raw_runtime + + result["status"] = "ok" + return fig, result + + # ---------------------- + # Utilities + # ---------------------- + def _decorate_axis(self, ax: plt.Axes) -> None: + ax.set_xlim(-10, 2.5e3) + ax.set_ylim(0.5, None) + ax.set_yscale("log") + ax.set_ylabel(f"Counts/{self.bin_size} NNLS units") + ax.set_xlabel("NNLS units") + ax.set_title(f"pygama-NNLS reconstruction ({self.lim} units solution cut-off)") + ax.legend() + + def _extract_runtime_if_present(self, f_raw: Path, ch: str) -> float | None: + try: + entries = set(lh5.ls(f_raw, f"{ch}/raw/")) + except Exception: + entries = set() + + sec_key = f"{ch}/raw/timestamp_sec" + ps_key = f"{ch}/raw/timestamp_ps" + if sec_key in entries and ps_key in entries: + try: + ts = lh5.read_as(sec_key, f_raw, "np") + ts_ps = lh5.read_as(ps_key, f_raw, "np") + idx_max = int(ts.argmax()) + idx_min = int(ts.argmin()) + return float( + ts[idx_max] + ts_ps[idx_max] * 1e-12 - (ts[idx_min] + ts_ps[idx_min] * 1e-12) + ) + except Exception: + self.logger.debug("Failed to extract new-style timestamps for %s in %s", ch, f_raw) + + legacy_key = f"{ch}/raw/timestamp" + if legacy_key in entries: + try: + t = lh5.read_as(legacy_key, f_raw, "np") + return float((t.max() - t.min()) * 4.8e-9) + except Exception: + self.logger.debug("Failed to extract legacy timestamps for %s in %s", ch, f_raw) + + return None + + def _save_results(self, results: dict[str, dict[int, dict[str, Any]]]) -> None: + if self.result_yaml.exists(): + with open(self.result_yaml) as f: + existing = yaml.safe_load(f) or {} + if "pe_spectrum" in existing and not self.override_results: + msg = "Results already present and override flag is False." + raise RuntimeError(msg) + existing["pe_spectrum"] = results + with open(self.result_yaml, "w") as f: + yaml.safe_dump(existing, f, default_flow_style=False) + self.logger.info("Updated result YAML at %s", self.result_yaml) + else: + with open(self.result_yaml, "w") as f: + yaml.safe_dump({"pe_spectrum": results}, f, default_flow_style=False) + self.logger.info("Wrote new result YAML at %s", self.result_yaml) + + # ---------------------- + # Signal to Noise Ratio (SNR) + # ---------------------- + def plot_snr(self) -> None: + if not self.result_yaml.exists(): + msg = f"Result YAML not found: {self.result_yaml}" + raise FileNotFoundError(msg) + with open(self.result_yaml) as f: + data = yaml.safe_load(f) or {} + if "pe_spectrum" not in data: + msg = "pe_spectrum info not present in result YAML; run analysis first." + raise RuntimeError(msg) + + snr = {} + for run_name, pmt_dict in data["pe_spectrum"].items(): + run_snr = {} + for pmt, info in pmt_dict.items(): + try: + if info.get("voltage", {}).get("val", 0) == 0: + continue + noise = { + "val": info.get("statistics", {}) + .get("valley", {}) + .get("amp", {}) + .get("val", 0.0) + } + noise["err"] = noise["val"] ** 0.5 + if info.get("status", "skipped") == "ok": + signal = { + "val": info.get("pe_peak_fit").get("amp", {}).get("val", 0.0), + "err": info.get("pe_peak_fit").get("amp", {}).get("err", 0.0), + } + else: + signal = { + "val": info.get("statistics", {}) + .get("1st_pe_guess", {}) + .get("amp", {}) + .get("val", 0) + } + signal["err"] = signal["val"] ** 0.5 + run_snr[pmt] = { + "val": 1 - noise["val"] / signal["val"], + "err": ( + noise["err"] ** 2 / signal["val"] ** 2 + + (noise["val"] ** 2 * signal["err"] ** 2) / signal["val"] ** 4 + ), + "unit": "", + } + except Exception as e: + self.logger.warning( + "Failed to compute SNR for run %s PMT %s: %s", run_name, pmt, e + ) + snr[run_name] = run_snr + + fig, ax = plt.subplots(figsize=A4_LANDSCAPE) + for run_name, pmt_dict in snr.items(): + pmts = sorted(pmt_dict.keys()) + vals = [pmt_dict[p]["val"] for p in pmts] + ax.plot(pmts, vals, marker="o", label=run_name) + ax.set_xlabel("PMT") + ax.set_ylabel("SNR (a.u.)") + ax.set_title("SNR per PMT (= 1 - valley/peak)") + ax.legend() + plt.tight_layout() + plot_path = self.plot_folder / "snr_plot.png" + fig.savefig(plot_path) + plt.close(fig) + self.logger.info("SNR plot saved to %s", plot_path) + + # ---------------------- + # Dark Count Rate (DCR) + # ---------------------- + def compute_dark_count_rate(self, time_mode: str = "aux") -> None: + if not self.result_yaml.exists(): + msg = f"Result YAML not found: {self.result_yaml}" + raise FileNotFoundError(msg) + with open(self.result_yaml) as f: + data = yaml.safe_load(f) or {} + if "pe_spectrum" not in data: + msg = "pe_spectrum info not present in result YAML; run analysis first." + raise RuntimeError(msg) + if "dcr" in data and not self.override_results: + msg = "DCR already exists and override flag is False." + raise RuntimeError(msg) + dcr = {} + for run_name, pmt_dict in data["pe_spectrum"].items(): + run_dcr = {} + for pmt, info in pmt_dict.items(): + try: + if info.get("voltage", {}).get("val", 0) == 0: + continue + stats = info.get("statistics", {}) + runtime_info = info.get("runtime", {}) + if time_mode not in runtime_info: + self.logger.warning( + "Run %s PMT %s: missing runtime '%s'; skipping DCR.", + run_name, + pmt, + time_mode, + ) + continue + dcts = stats.get("1st_pe_fit_integral_below_valley", {}).get( + "val", 0.0 + ) + stats.get("cts_above_valley", {}).get("val", 0) + runtime = runtime_info[time_mode] + run_dcr[pmt] = { + "val": float(dcts) / float(runtime), + "err": float(dcts**0.5) / float(runtime), + "unit": "Hz", + } + except Exception as e: + self.logger.warning( + "Failed to compute DCR for run %s PMT %s: %s", run_name, pmt, e + ) + dcr[run_name] = run_dcr + data["dcr"] = dcr + with open(self.result_yaml, "w") as f: + yaml.safe_dump(data, f, default_flow_style=False) + self.logger.info("Wrote DCR results to %s", self.result_yaml) + + fig, ax = plt.subplots(figsize=A4_LANDSCAPE) + for run_name, pmt_dict in dcr.items(): + pmts = sorted(pmt_dict.keys()) + vals = [pmt_dict[p]["val"] for p in pmts] + ax.plot(pmts, vals, marker="o", label=run_name) + ax.set_xlabel("PMT") + ax.set_ylabel("DCR (Hz)") + ax.set_title("Dark Count Rate per PMT") + ax.legend() + plt.tight_layout() + plot_path = self.plot_folder / "dcr_plot.png" + fig.savefig(plot_path) + plt.close(fig) + self.logger.info("DCR plot saved to %s", plot_path) + + # ---------------------- + # Linear Gain Fit computation + # ---------------------- + def compute_linear_gain_fit(self) -> None: + if not self.result_yaml.exists(): + msg = f"Result YAML not found: {self.result_yaml}" + raise FileNotFoundError(msg) + with open(self.result_yaml) as f: + data = yaml.safe_load(f) or {} + if "pe_spectrum" not in data: + msg = "pe_spectrum info not present in result YAML; run analysis first." + raise RuntimeError(msg) + + tmp_dic = {"used_keys": []} + y_unit = None + x_unit = None + for key, run in data["pe_spectrum"].items(): + tmp_dic["used_keys"].append(key) + for pmt in run: + if pmt not in tmp_dic: + tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []} + v = run[pmt]["voltage"]["val"] + xu = run[pmt]["voltage"]["unit"] + if x_unit is None: + x_unit = xu + else: + assert x_unit == xu + if v == 0: + continue + tmp_dic[pmt]["voltage"].append(v) + tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]["val"]) + tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak_fit"]["mean"]["err"]) + yu = run[pmt]["pe_peak_fit"]["mean"]["unit"] + if y_unit is None: + y_unit = yu + else: + assert y_unit == yu + + pdf_path = self.plot_folder / "gain_plots.pdf" + with PdfPages(pdf_path) as pdf: + for key, pmt in tmp_dic.items(): + if key == "used_keys": + continue + fig, ax = plt.subplots(figsize=A4_LANDSCAPE) + ax.errorbar(pmt["voltage"], pmt["vals"], pmt["errs"], fmt="o", label=f"PMT {key}") + ax.set_xlabel(f"Voltage ({x_unit})") + ax.set_ylabel(f"PMT position ({y_unit})") + params, covariance = curve_fit( + linear_func, + pmt["voltage"], + pmt["vals"], + sigma=pmt["errs"], + absolute_sigma=True, + ) + a_opt, b_opt = params + perr = np.sqrt(np.diag(covariance)) + x = np.linspace(-1 * b_opt / a_opt, max(pmt["voltage"]) + 10, 1000) + ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit") + tmp_dic[key]["a"] = { + "val": float(a_opt), + "err": float(perr[0]), + "unit": f"{y_unit}/{x_unit}", + } + tmp_dic[key]["b"] = { + "val": float(b_opt), + "err": float(perr[1]), + "unit": f"{y_unit}/{x_unit}", + } + tmp_dic[key]["func"] = "G = a*voltage+b" + pmt.pop("voltage") + pmt.pop("vals") + pmt.pop("errs") + ax.legend() + pdf.savefig(fig) + plt.close(fig) + data["linear_gain"] = tmp_dic + with open(self.result_yaml, "w") as f: + yaml.safe_dump(data, f, default_flow_style=False) + self.logger.info("Linear gain fit results saved to %s", self.result_yaml) + + +# -------------------------- +# CLI entrypoint +# -------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser( + description="PE Spectrum Analyzer with DCR and linear gain fit" + ) + parser.add_argument( + "-p", "--compute-pe", action="store_true", help="Do p.e. spectrum analysis" + ) + parser.add_argument( + "-d", "--compute-dcr", action="store_true", help="Compute DCR after analysis" + ) + parser.add_argument( + "-g", "--compute-gain", action="store_true", help="Compute linear gain fit after analysis" + ) + parser.add_argument( + "-s", "--compute-snr", action="store_true", help="Compute SNR after analysis" + ) + args = parser.parse_args() + + logger = setup_logging(LOG_FILE, level=logging.INFO) + try: + analyzer = PESpectrumAnalyzer(logger=logger) + if args.compute_pe: + analyzer.run() + if args.compute_dcr: + analyzer.compute_dark_count_rate(time_mode="aux") + if args.compute_gain: + analyzer.compute_linear_gain_fit() + if args.compute_snr: + analyzer.plot_snr() + + logger.info("Processing complete.") + except Exception as e: + logger.exception("Fatal error: %s", e) + raise + + +# -------------------------- +# CLI entrypoint +# -------------------------- +if __name__ == "__main__": + main() diff --git a/src/mintanalysis/pmt/ana/dcr_analysis.py b/src/mintanalysis/pmt/ana/dcr_analysis.py deleted file mode 100644 index f982a23..0000000 --- a/src/mintanalysis/pmt/ana/dcr_analysis.py +++ /dev/null @@ -1,50 +0,0 @@ -import os - -import yaml - - -def linear_func(x, a, b): - return a * x + b - - -if __name__ == "__main__": - - f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml" - override_results = True - time_mode = "aux" - - if os.path.exists(f_result): - with open(f_result) as file: - result_dic = yaml.safe_load(file) - else: - raise RuntimeError("File does not exist") - - if "pe_spectrum" not in result_dic.keys(): - raise RuntimeError("pe_spectrum info not present!") - - if "dcr" in result_dic and not override_results: - raise RuntimeError("results already exist and override flag not set") - result_dic["dcr"] = {} - for rk, run in result_dic["pe_spectrum"].items(): - result_dic["dcr"][rk] = {} - for key, pmt in run.items(): - if pmt["voltage"]["val"] == 0: - continue - if "dcr" in pmt and not override_results: - print( - f"DCR key exists but override flag is not set! --> skipping {pmt} in run {run}" - ) - continue - dcts = ( - pmt["statistics"]["1st_pe_fit_integral_below_valley"]["val"] - + pmt["statistics"]["cts_above_valley"]["val"] - ) - result_dic["dcr"][rk][key] = { - "val": dcts / pmt["runtime"][time_mode], - "err": (dcts**0.5) / pmt["runtime"][time_mode], - "unit": "Hz", - } - - -with open(f_result, "w") as file: - yaml.safe_dump(result_dic, file, default_flow_style=False) diff --git a/src/mintanalysis/pmt/ana/linear_gain_analysis.py b/src/mintanalysis/pmt/ana/linear_gain_analysis.py deleted file mode 100644 index 39a101a..0000000 --- a/src/mintanalysis/pmt/ana/linear_gain_analysis.py +++ /dev/null @@ -1,123 +0,0 @@ -import os - -import matplotlib.pyplot as plt -import numpy as np -import yaml -from matplotlib.backends.backend_pdf import PdfPages -from scipy.optimize import curve_fit - - -def linear_func(x, a, b): - return a * x + b - - -if __name__ == "__main__": - - f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml" - with open(f_result) as file: - result_dic = yaml.safe_load(file) - - out_folder = "/home/pkrause/software/mint-analysis/debug_out/gain/" - - bin_size = 20 - bins = np.arange(-100, 10000, bin_size) - lim = 20 - A4_LANDSCAPE = (11.69, 8.27) - - override_results = True - - if os.path.exists(f_result): - with open(f_result) as file: - result_dic = yaml.safe_load(file) - else: - raise RuntimeError("File does not exist") - - if "pe_spectrum" not in result_dic: - raise RuntimeError("pe_spectrum info not present!") - - if "linear_gain" in result_dic and not override_results: - raise RuntimeError("results already exist and override flag not set") - - ###################### - # Gain calculation # - ###################### - tmp_dic = {"used_keys": []} - y_unit = None - x_unit = None - for key, run in result_dic["pe_spectrum"].items(): - tmp_dic["used_keys"].append(key) - for pmt in run: - if pmt not in tmp_dic: - tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []} - v = run[pmt]["voltage"]["val"] - xu = run[pmt]["voltage"]["unit"] - if x_unit is None: - x_unit = xu - else: - assert x_unit == xu - if v == 0: - continue - - tmp_dic[pmt]["voltage"].append(v) - tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]["val"]) - tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak_fit"]["mean"]["err"]) - - yu = run[pmt]["pe_peak_fit"]["mean"]["unit"] - if y_unit is None: - y_unit = yu - else: - assert y_unit == yu - - with PdfPages(out_folder + "gain_plots.pdf") as pdf: - for key, pmt in tmp_dic.items(): - if key == "used_keys": - continue - fig, ax = plt.subplots() - fig.set_figwidth(A4_LANDSCAPE[0]) - fig.set_figheight(A4_LANDSCAPE[1]) - ax.errorbar( - pmt["voltage"], - pmt["vals"], - pmt["errs"], - label=f"PMT {key}", - fmt="o", - ) - ax.set_ylabel(f"PMT position ({y_unit})") - ax.set_xlabel(f"Voltage ({x_unit})") - - params, covariance = curve_fit( - linear_func, - pmt["voltage"], - pmt["vals"], - sigma=pmt["errs"], - absolute_sigma=True, - ) - a_opt, b_opt = params - perr = np.sqrt(np.diag(covariance)) - x = np.linspace(-1 * b_opt / a_opt, 110, 1000) - ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit") - - tmp_dic[key]["a"] = { - "val": float(a_opt), - "err": float(perr[0]), - "unit": f"{y_unit}/{x_unit}", - } - tmp_dic[key]["b"] = { - "val": float(b_opt), - "err": float(perr[1]), - "unit": f"{y_unit}/{x_unit}", - } - tmp_dic[key]["func"] = "G = a*voltage+b" - - pmt.pop("errs") - pmt.pop("vals") - pmt.pop("voltage") - ax.legend() - pdf.savefig() - plt.close() - - result_dic["linear_gain"] = tmp_dic - - -with open(f_result, "w") as file: - yaml.safe_dump(result_dic, file, default_flow_style=False) diff --git a/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py b/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py deleted file mode 100644 index 3a179b2..0000000 --- a/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py +++ /dev/null @@ -1,242 +0,0 @@ -import os - -import awkward as ak -import matplotlib.pyplot as plt -import numpy as np -import yaml -from iminuit import Minuit -from lgdo import lh5 -from matplotlib.backends.backend_pdf import PdfPages - - -def valley_index_strict(y): - """ - Return the index of the valley between the first strict peak - and the next rise. Returns None if not found. - """ - n = len(y) - if n < 3: - return None - - # 1. Find first strict peak - peak = None - for i in range(1, n - 1): - if y[i] > y[i - 1] and y[i] > y[i + 1]: - peak = i - break - if peak is None: - return None - - # 2. Walk downward and track the minimum - valley = peak - i = peak + 1 - - while i < n: - if y[i] > y[i - 1]: # rising again → done - break - if y[i] < y[valley]: - valley = i - i += 1 - - if i == n: # never rose again - return None - - return peak, valley - - -def nll(amp, mu, sigma, bin_centers_fit, n_fit): - """Poisson NLL for binned data""" - expected = gaussian(bin_centers_fit, amp, mu, sigma) - expected[expected <= 0] = 1e-10 # avoid log(0) - return np.sum(expected - n_fit * np.log(expected)) - - -def gaussian(x, amp, mu, sigma): - return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2) - - -def linear_func(x, a, b): - return a * x + b - - -if __name__ == "__main__": - - f_aux = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml" - f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml" - with open(f_aux) as file: - aux_dict = yaml.safe_load(file) - - raw_path = ( - "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020" - ) - plot_folder = "/home/pkrause/software/mint-analysis/debug_out/pe_spectra/" - - override_results = True - - bin_size = 20 - bins = np.arange(-100, 10000, bin_size) - lim = 20 - A4_LANDSCAPE = (11.69, 8.27) - result_dic = {} - - for run in aux_dict: - result_dic[run] = {} - f_raw = raw_path + aux_dict[run]["daq"].split("/")[-1].replace("daq", "").replace( - "data", "lh5" - ) - f_dsp = f_raw.replace("raw", "dsp") - - with PdfPages(plot_folder + f"pe_spectra_{run}.pdf") as pdf: - for ch in lh5.ls(f_dsp): - pmt = int(ch[2:]) + 1 - result_dic[run][pmt] = {} - result_dic[run][pmt]["voltage"] = { - "val": aux_dict[run]["voltages_in_V"][int(ch[2:])], - "unit": "V", - } - fig, ax = plt.subplots() - fig.set_figwidth(A4_LANDSCAPE[0]) - fig.set_figheight(A4_LANDSCAPE[1]) - - d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak") - vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False) - pe_vals = np.nansum(np.where(vals > lim, vals, np.nan), axis=1) - - n, bins, patches = ax.hist( - pe_vals, - bins=bins, - histtype="step", - label=f"channel {ch} ON ({result_dic[run][pmt]['voltage']['val']:.2f} {result_dic[run][pmt]['voltage']['unit']})", - ) - - if aux_dict[run]["voltages_in_V"][int(ch[2:])] == 0: - ax.set_xlim(-10, 2.5e3) - ax.set_ylim(0.5, None) - ax.set_yscale("log") - ax.set_ylabel(f"Counts/{bin_size} NNLS units") - ax.legend() - ax.set_xlabel("NNLS units") - ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)") - - pdf.savefig() - plt.close() - continue - - ###################### - # Fit p.e. peak # - ###################### - - noise_peak, valley_idx = valley_index_strict(n) - pe_peak, _ = valley_index_strict(n[noise_peak:]) - ax.axvline(bins[valley_idx], color="red", ls="--") - ax.axvline(bins[noise_peak:][pe_peak], color="green", ls="--") - - bin_centers = 0.5 * (bins[1:] + bins[:-1]) - # Restrict range - x_min, x_max = ( - bins[valley_idx], - 2 * bins[noise_peak:][pe_peak], - ) # - bins[valley_idx] - mask = (bin_centers >= x_min) & (bin_centers <= x_max) - - bin_centers_fit = bin_centers[mask] - n_fit = n[mask] - - # Initial guesses - amp0 = n[noise_peak:][pe_peak] - mu0 = bins[noise_peak:][pe_peak] - sigma0 = 100.0 - - m = Minuit( - lambda amp, mu, sigma: nll( - amp, mu, sigma, bin_centers_fit=bin_centers_fit, n_fit=n_fit - ), - amp=amp0, - mu=mu0, - sigma=sigma0, - ) - m.errordef = Minuit.LIKELIHOOD # important for Poisson NLL - m.migrad(iterate=10) # perform minimization - - fit_vals = m.values.to_dict() - fit_errs = m.errors.to_dict() - - result_dic[run][pmt]["pe_peak_fit"] = { - "mean": {"val": fit_vals["mu"], "err": fit_errs["mu"], "unit": "NNLS"}, - "sigma": {"val": fit_vals["sigma"], "err": fit_errs["sigma"], "unit": "NNLS"}, - "amp": {"val": fit_vals["amp"], "err": fit_errs["amp"], "unit": ""}, - } - - y_fit = gaussian( - bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"] - ) - - ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)") - - ax.set_xlim(-10, 2.5e3) - ax.set_ylim(0.5, None) - ax.set_yscale("log") - ax.set_ylabel(f"Counts/{bin_size} NNLS units") - ax.legend() - ax.set_xlabel("NNLS units") - ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)") - - pdf.savefig() - plt.close() - - ###################### - # spectrum values # - ###################### - - result_dic[run][pmt]["statistics"] = { - "1st_pe_fit_integral_below_valley": { - "val": float(np.sum(y_fit[:valley_idx])), - "unit": "", - }, - "cts_above_valley": {"val": int(np.sum(n[:valley_idx])), "unit": ""}, - "cts_below_valley": {"val": int(np.sum(n[valley_idx:])), "unit": ""}, - "1st_pe_fit_integral": {"val": int(float(np.sum(y_fit))), "unit": ""}, - "total_cts": {"val": int(np.sum(n)), "unit": ""}, - "valley": { - "pos": {"val": float(bin_centers[valley_idx]), "unit": "NNLS"}, - "amp": int(n[valley_idx]), - }, - "1st_pe_guess": { - "pos": {"val": float(mu0), "unit": "NNLS"}, - "amp": {"val": int(amp0), "unit": ""}, - }, - } - - result_dic[run][pmt]["runtime"] = {"unit": "s"} - if "runtime_in_s" in aux_dict[run]: - result_dic[run][pmt]["runtime"]["aux"] = aux_dict[run]["runtime_in_s"] - - if f"{ch}/raw/timestamp_sec" in lh5.ls( - f_raw, f"{ch}/raw/" - ) and f"{ch}/raw/timestamp_ps" in lh5.ls(f_raw, f"{ch}/raw/"): - ts = lh5.read_as(f"{ch}/raw/timestamp_sec", f_raw, "np") - ts_ps = lh5.read_as(f"{ch}/raw/timestamp_ps", f_raw, "np") - result_dic[run][pmt]["runtime"]["raw"] = float( - ts[ts.argmax()] - + ts_ps[ts.argmax()] * 1e-12 - - (ts[ts.argmin()] + ts_ps[ts.argmin()] * 1e-12) - ) - - # support for old raw files - elif f"{ch}/raw/timestamp" in lh5.ls(f_raw, f"{ch}/raw/"): - ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np") - result_dic[run][pmt]["runtime"]["raw"] = float((ts.max() - ts.min()) * 4.8e-9) - -if os.path.exists(f_result): - with open(f_result) as file: - tmp_dic = yaml.safe_load(file) - if "pe_spectrum" in tmp_dic.keys() and not override_results: - raise RuntimeError("Already exists and override flag is not set") - else: - tmp_dic["pe_spectrum"] = result_dic - with open(f_result, "w") as file: - yaml.safe_dump(tmp_dic, file, default_flow_style=False) - -else: - with open(f_result, "w") as file: - yaml.safe_dump({"pe_spectrum": result_dic}, file, default_flow_style=False) From 0727907801ff714f82aedc92dd6c78504a6b800a Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 27 Nov 2025 18:05:49 -0800 Subject: [PATCH 10/32] Add functionality to convert NNLS units to other units --- src/mintanalysis/pmt/ana/analyser_class.py | 187 ++++++++++++++++++++- 1 file changed, 186 insertions(+), 1 deletion(-) diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py index badab7f..f4cae76 100644 --- a/src/mintanalysis/pmt/ana/analyser_class.py +++ b/src/mintanalysis/pmt/ana/analyser_class.py @@ -144,6 +144,11 @@ def __init__( lim: int = LIM, override_results: bool = OVERRIDE_RESULTS, logger: logging.Logger | None = None, + up_sampling_ratio: float = 24 / 240, + v_per_adc: float = 0.25e-3, + adc_impedance: float = 75.0, + sampling_time: float = 4.8e-9, + calib: str = "None", ) -> None: self.aux_yaml = aux_yaml self.raw_dir = raw_dir @@ -156,6 +161,11 @@ def __init__( self.logger = logger or setup_logging() self.aux = self._load_aux() self.plot_folder.mkdir(parents=True, exist_ok=True) + self.up_sampling_ratio = up_sampling_ratio + self.v_per_adc = v_per_adc + self.adc_impedance = adc_impedance + self.sampling_time = sampling_time + self.calib = calib # ---------------------- # I/O helpers @@ -270,6 +280,9 @@ def process_channel( label=f"channel {ch} (PMT {pmt}) at {voltage:.2f} V", ) + if self.calib != "None": + self._add_charge_axis(ax, False) + bin_centers = 0.5 * (bins[1:] + bins[:-1]) # noise-only @@ -461,6 +474,71 @@ def _save_results(self, results: dict[str, dict[int, dict[str, Any]]]) -> None: yaml.safe_dump({"pe_spectrum": results}, f, default_flow_style=False) self.logger.info("Wrote new result YAML at %s", self.result_yaml) + def _add_charge_axis(self, ax: plt.Axes, is_y) -> None: + """ + This function add a axis in new units to a given axis plot. + + Parameters + ---------- + ax : plt.Axes + The plot to which to add the pC axis. + is_y : bool + If True add a new y-axis. Else an x-axis. + """ + if self.calib not in ["pC", "adc", "gain"]: + self.logger.warning("Invalid calibration unit, not calibrating.") + return + + label = "Charge " + if self.calib == "pC": + func = ( + lambda x: x + * ( + (self.v_per_adc * self.up_sampling_ratio * self.sampling_time) + / self.adc_impedance + ) + * 1e12, + lambda y: y + / ( + ( + (self.v_per_adc * self.up_sampling_ratio * self.sampling_time) + / self.adc_impedance + ) + * 1e12 + ), + ) + label += "(pC)" + + elif self.calib == "gain": + label = "Gain (a.u.)" + elem = 1.602e-19 # C (elementary charge) + func = ( + lambda x: x + * ( + (self.v_per_adc * self.up_sampling_ratio * self.sampling_time) + / self.adc_impedance + ) + / elem, + lambda y: y + / ( + ( + (self.v_per_adc * self.up_sampling_ratio * self.sampling_time) + / self.adc_impedance + ) + / elem + ), + ) + + elif self.calib == "adc": + label += f"(ADC x {self.sampling_time*1e9:.1f} ns)" + func = (lambda x: x * self.up_sampling_ratio, lambda y: y / self.up_sampling_ratio) + if is_y: + secax_y = ax.secondary_yaxis("right", functions=func) + secax_y.set_ylabel(label) + else: + secax_x = ax.secondary_xaxis("top", functions=func) + secax_x.set_xlabel(label) + # ---------------------- # Signal to Noise Ratio (SNR) # ---------------------- @@ -643,6 +721,10 @@ def compute_linear_gain_fit(self) -> None: ax.errorbar(pmt["voltage"], pmt["vals"], pmt["errs"], fmt="o", label=f"PMT {key}") ax.set_xlabel(f"Voltage ({x_unit})") ax.set_ylabel(f"PMT position ({y_unit})") + + if self.calib != "None": + self._add_charge_axis(ax, True) + params, covariance = curve_fit( linear_func, pmt["voltage"], @@ -676,6 +758,72 @@ def compute_linear_gain_fit(self) -> None: yaml.safe_dump(data, f, default_flow_style=False) self.logger.info("Linear gain fit results saved to %s", self.result_yaml) + def calibrate_nnls_values(self, calibration_func, output_file, new_unit): + """ + Reads the current results.yaml, finds all entries that contain a dict with + keys {"val": , "unit": "NNLS"}, applies calibration_func(val) + and writes a calibrated result file. + """ + factor = calibration_func(1) + try: + with open(self.result_yaml) as f: + data = yaml.safe_load(f) + except Exception as e: + msg = f"Failed to load file: {e}" + self.logger.error(msg) + raise + + def classify(unit: str): + unit = str(unit) + if unit == "NNLS" or unit.startswith("NNLS/"): + return "nnls" + if "/NNLS" in unit: + return "per_nnls" + if "NNLS" in unit: + return "nnls" + return None + + def recurse(obj): + if isinstance(obj, dict): + if set(obj.keys()) >= {"val", "unit"}: + kind = classify(obj.get("unit")) + if kind == "nnls": + try: + obj["val"] = obj["val"] * factor + if "err" in obj: + obj["err"] = obj["err"] * factor + obj["unit"] = obj["unit"].replace("NNLS", new_unit) + except Exception as e: + msg = f"NNLS calibration failed for value {obj}: {e}" + self.logger.error(msg) + elif kind == "per_nnls": + try: + obj["val"] = obj["val"] / factor + if "err" in obj: + obj["err"] = obj["err"] / factor + obj["unit"] = obj["unit"].replace("NNLS", new_unit) + except Exception as e: + msg = f"per-NNLS calibration failed for value {obj}: {e}" + self.logger.error(msg) + for _, v in obj.items(): + recurse(v) + elif isinstance(obj, list): + for item in obj: + recurse(item) + + recurse(data) + + try: + with open(output_file, "w") as f: + yaml.safe_dump(data, f, default_flow_style=False) + except Exception as e: + msg = f"Failed to write calibrated file: {e}" + self.logger.error(msg) + raise + + msg = f"Calibrated results written to {output_file}" + self.logger.info(msg) + # -------------------------- # CLI entrypoint @@ -698,11 +846,18 @@ def main() -> None: parser.add_argument( "-s", "--compute-snr", action="store_true", help="Compute SNR after analysis" ) + parser.add_argument( + "-c", + "--calibrate", + default="None", + choices=["pC", "adc", "gain", "None"], + help="Choose a charge calibration value (pC, adc, gain, or None)", + ) args = parser.parse_args() logger = setup_logging(LOG_FILE, level=logging.INFO) try: - analyzer = PESpectrumAnalyzer(logger=logger) + analyzer = PESpectrumAnalyzer(logger=logger, calib=args.calibrate) if args.compute_pe: analyzer.run() if args.compute_dcr: @@ -711,6 +866,36 @@ def main() -> None: analyzer.compute_linear_gain_fit() if args.compute_snr: analyzer.plot_snr() + if args.calibrate != "None": + if args.calibrate == "pC": + analyzer.calibrate_nnls_values( + lambda x: x + * ( + (analyzer.v_per_adc * analyzer.up_sampling_ratio * analyzer.sampling_time) + / analyzer.adc_impedance + ) + * 1e12, + str(analyzer.result_yaml).replace(".yaml", "_pC_calibrated.yaml"), + "pC", + ) + elif args.calibrate == "adc": + analyzer.calibrate_nnls_values( + lambda x: x * analyzer.up_sampling_ratio, + str(analyzer.result_yaml).replace(".yaml", "_adc_calibrated.yaml"), + "ADC", + ) + elif args.calibrate == "gain": + elem = 1.602e-19 # C (elementary charge) + analyzer.calibrate_nnls_values( + lambda x: x + * ( + (analyzer.v_per_adc * analyzer.up_sampling_ratio * analyzer.sampling_time) + / analyzer.adc_impedance + ) + / elem, + str(analyzer.result_yaml).replace(".yaml", "_gain_calibrated.yaml"), + "a.u.", + ) logger.info("Processing complete.") except Exception as e: From 947a2d95ede4062dd9b0e5fa7f504d74ee9681d8 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 27 Nov 2025 18:38:28 -0800 Subject: [PATCH 11/32] Adding more CLI options --- src/mintanalysis/pmt/ana/analyser_class.py | 77 ++++++++++++++-------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py index f4cae76..f687288 100644 --- a/src/mintanalysis/pmt/ana/analyser_class.py +++ b/src/mintanalysis/pmt/ana/analyser_class.py @@ -32,22 +32,15 @@ from scipy.optimize import curve_fit # -------------------------- -# Configuration / Constants +# TODO For Debugging only! # -------------------------- -AUX_YAML = Path( - "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml" -) RAW_DIR = Path("/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020") -RESULT_YAML = Path("/home/pkrause/software/mint-analysis/debug_out/results.yaml") -PLOT_FOLDER = Path("/home/pkrause/software/mint-analysis/debug_out/pe_spectra/") -LOG_FILE = Path("/home/pkrause/software/mint-analysis/debug_out/pe_spectrum.log") - -# Analysis constants -BIN_SIZE = 20 -BINS = np.arange(-100, 10000, BIN_SIZE) -LIM = 20 +RESULT_DIR = Path("/home/pkrause/software/mint-analysis/debug_out") + +# -------------------------- +# Constants +# -------------------------- A4_LANDSCAPE = (11.69, 8.27) -OVERRIDE_RESULTS = True # -------------------------- @@ -55,7 +48,9 @@ # -------------------------- -def setup_logging(log_file: Path = LOG_FILE, level: int = logging.INFO) -> logging.Logger: +def setup_logging( + log_file: Path = RESULT_DIR / "analysis.log", level: int = logging.INFO +) -> logging.Logger: logger = logging.getLogger("PESpectrum") logger.setLevel(level) logger.propagate = False @@ -135,14 +130,10 @@ def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None: class PESpectrumAnalyzer: def __init__( self, - aux_yaml: Path = AUX_YAML, - raw_dir: Path = RAW_DIR, - plot_folder: Path = PLOT_FOLDER, - result_yaml: Path = RESULT_YAML, - bins: np.ndarray = BINS, - bin_size: int = BIN_SIZE, - lim: int = LIM, - override_results: bool = OVERRIDE_RESULTS, + aux_yaml: Path, + bin_size: int = 20, + lim: float = 20, + override_results: bool = False, logger: logging.Logger | None = None, up_sampling_ratio: float = 24 / 240, v_per_adc: float = 0.25e-3, @@ -151,11 +142,11 @@ def __init__( calib: str = "None", ) -> None: self.aux_yaml = aux_yaml - self.raw_dir = raw_dir - self.plot_folder = plot_folder - self.result_yaml = result_yaml - self.bins = bins + self.raw_dir = RAW_DIR + self.plot_folder = RESULT_DIR / "plots" + self.result_yaml = RESULT_DIR / "results.yaml" self.bin_size = bin_size + self.bins = np.arange(-100, 10000, bin_size) self.lim = lim self.override_results = override_results self.logger = logger or setup_logging() @@ -853,11 +844,41 @@ def main() -> None: choices=["pC", "adc", "gain", "None"], help="Choose a charge calibration value (pC, adc, gain, or None)", ) + parser.add_argument( + "-b", + "--bin_size", + type=int, + default=20, + help="Number of bins used for analysis", + ) + parser.add_argument( + "-l", + "--nnls_limit", + type=float, + default=20, + help="Lower limit for solutions in the NNLS solution vector to be accepted.", + ) + parser.add_argument( + "-a", + "--aux_file", + help="Path to auxiliary file", + ) + parser.add_argument( + "-o", "--override", action="store_true", help="Override results if existing" + ) + args = parser.parse_args() - logger = setup_logging(LOG_FILE, level=logging.INFO) + logger = setup_logging(level=logging.INFO) try: - analyzer = PESpectrumAnalyzer(logger=logger, calib=args.calibrate) + analyzer = PESpectrumAnalyzer( + logger=logger, + aux_yaml=Path(args.aux_file), + bin_size=args.bin_size, + lim=args.nnls_limit, + override_results=args.override, + calib=args.calibrate, + ) if args.compute_pe: analyzer.run() if args.compute_dcr: From 24e5c984c10b5e83553c46e97feccd76a27ccef7 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Thu, 27 Nov 2025 19:44:53 -0800 Subject: [PATCH 12/32] Remove awkward data loading --- src/mintanalysis/pmt/ana/analyser_class.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py index f687288..993ec3b 100644 --- a/src/mintanalysis/pmt/ana/analyser_class.py +++ b/src/mintanalysis/pmt/ana/analyser_class.py @@ -22,7 +22,6 @@ from pathlib import Path from typing import Any -import awkward as ak import matplotlib.pyplot as plt import numpy as np import yaml @@ -247,7 +246,7 @@ def process_channel( # load data try: - d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak") + vals = lh5.read_as(f"{ch}/dsp/nnls_solution/values", f_dsp, "np") except Exception as e: msg = f"Failed to read DSP for {ch} in {f_dsp}: {e}" self.logger.warning(msg) @@ -255,7 +254,6 @@ def process_channel( # compute pe-values try: - vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False) pe_vals = np.nansum(np.where(vals > self.lim, vals, np.nan), axis=1) except Exception as e: msg = f"Failed to compute pe values for {ch}: {e}" From fc057db5689fc1fb437d597928a34c84970c5d15 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Fri, 28 Nov 2025 15:05:16 -0800 Subject: [PATCH 13/32] Add key selection --- src/mintanalysis/pmt/ana/analyser_class.py | 101 +++++++++++++-------- 1 file changed, 64 insertions(+), 37 deletions(-) diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py index 993ec3b..1864d36 100644 --- a/src/mintanalysis/pmt/ana/analyser_class.py +++ b/src/mintanalysis/pmt/ana/analyser_class.py @@ -18,6 +18,7 @@ import argparse import logging +from collections.abc import Callable from dataclasses import dataclass from pathlib import Path from typing import Any @@ -130,6 +131,7 @@ class PESpectrumAnalyzer: def __init__( self, aux_yaml: Path, + keys: list | None = None, bin_size: int = 20, lim: float = 20, override_results: bool = False, @@ -141,6 +143,7 @@ def __init__( calib: str = "None", ) -> None: self.aux_yaml = aux_yaml + self.keys = keys self.raw_dir = RAW_DIR self.plot_folder = RESULT_DIR / "plots" self.result_yaml = RESULT_DIR / "results.yaml" @@ -167,7 +170,36 @@ def _load_aux(self) -> dict[str, Any]: with open(self.aux_yaml) as f: aux = yaml.safe_load(f) self.logger.info("Loaded aux YAML: %s", self.aux_yaml) - return aux + + if self.keys is None: + return aux + ret = {} + for k in self.keys: + if k in aux: + ret[k] = aux[k] + else: + msg = f"Key {k} not in aux file, skipping." + self.logger.warning(msg) + return ret + + def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str) -> None: + if self.result_yaml.exists(): + with open(self.result_yaml) as f: + existing = yaml.safe_load(f) or {} + if key in existing and not self.override_results: + msg = key + " results already present and override flag is False." + self.logger.error(msg) + raise RuntimeError(msg) + + existing[key] = results + with open(self.result_yaml, "w") as f: + yaml.safe_dump(existing, f, default_flow_style=False) + self.logger.info("Updated result YAML at %s", self.result_yaml) + + else: + with open(self.result_yaml, "w") as f: + yaml.safe_dump({key: results}, f, default_flow_style=False) + self.logger.info("Wrote new result YAML at %s", self.result_yaml) # ---------------------- # Public entrypoints @@ -181,7 +213,7 @@ def run(self) -> None: except FileNotFoundError as e: self.logger.exception("Critical file missing for run %s: %s", run_name, e) raise - self._save_results(results) + self._save_results(results, "pe_spectrum") self.logger.info("All runs processed.") # ---------------------- @@ -274,18 +306,27 @@ def process_channel( bin_centers = 0.5 * (bins[1:] + bins[:-1]) + # total counts + result["statistics"] = { + "total_cts": {"val": int(np.sum(n)), "unit": ""}, + } + + # runtime + result["runtime"] = {"unit": "s"} + if "runtime_in_s" in meta: + result["runtime"]["aux"] = meta["runtime_in_s"] + raw_runtime = self._extract_runtime_if_present(f_raw, ch) + if raw_runtime is not None: + result["runtime"]["raw"] = raw_runtime + # noise-only if voltage == 0: self._decorate_axis(ax) # minimal result (no fit) - result.update( - {"status": "ok", "statistics": {}, "pe_peak_fit": {}, "runtime": {"unit": "s"}} - ) - if "runtime_in_s" in meta: - result["runtime"]["aux"] = meta["runtime_in_s"] - raw_runtime = self._extract_runtime_if_present(f_raw, ch) - if raw_runtime is not None: - result["runtime"]["raw"] = raw_runtime + msg = "Voltage a 0 --> Noise run" + result["status"] = ("skipped",) + result["reason"] = (msg,) + return fig, result # detect valley & peaks @@ -294,7 +335,9 @@ def process_channel( msg = "Valley detection failed (no strict peak/valley)." self.logger.warning("Run %s ch %s: %s", run_name, ch, msg) self._decorate_axis(ax) - return fig, {"status": "skipped", "reason": msg} + result["status"] = ("skipped",) + result["reason"] = (msg,) + return fig, result noise_peak, valley_idx = vi @@ -394,14 +437,6 @@ def process_channel( result.setdefault("statistics", {}) result["statistics"]["error"] = str(e) - # runtime - result["runtime"] = {"unit": "s"} - if "runtime_in_s" in meta: - result["runtime"]["aux"] = meta["runtime_in_s"] - raw_runtime = self._extract_runtime_if_present(f_raw, ch) - if raw_runtime is not None: - result["runtime"]["raw"] = raw_runtime - result["status"] = "ok" return fig, result @@ -447,25 +482,9 @@ def _extract_runtime_if_present(self, f_raw: Path, ch: str) -> float | None: return None - def _save_results(self, results: dict[str, dict[int, dict[str, Any]]]) -> None: - if self.result_yaml.exists(): - with open(self.result_yaml) as f: - existing = yaml.safe_load(f) or {} - if "pe_spectrum" in existing and not self.override_results: - msg = "Results already present and override flag is False." - raise RuntimeError(msg) - existing["pe_spectrum"] = results - with open(self.result_yaml, "w") as f: - yaml.safe_dump(existing, f, default_flow_style=False) - self.logger.info("Updated result YAML at %s", self.result_yaml) - else: - with open(self.result_yaml, "w") as f: - yaml.safe_dump({"pe_spectrum": results}, f, default_flow_style=False) - self.logger.info("Wrote new result YAML at %s", self.result_yaml) - def _add_charge_axis(self, ax: plt.Axes, is_y) -> None: """ - This function add a axis in new units to a given axis plot. + This function adds a axis in new units to a given axis plot. Parameters ---------- @@ -747,7 +766,7 @@ def compute_linear_gain_fit(self) -> None: yaml.safe_dump(data, f, default_flow_style=False) self.logger.info("Linear gain fit results saved to %s", self.result_yaml) - def calibrate_nnls_values(self, calibration_func, output_file, new_unit): + def calibrate_nnls_values(self, calibration_func: Callable, output_file: str, new_unit: str): """ Reads the current results.yaml, finds all entries that contain a dict with keys {"val": , "unit": "NNLS"}, applies calibration_func(val) @@ -864,6 +883,13 @@ def main() -> None: parser.add_argument( "-o", "--override", action="store_true", help="Override results if existing" ) + parser.add_argument( + "-k", + "--keys", + nargs="+", + default=None, + help="Only analyse this keys, ignore all other keys in aux file", + ) args = parser.parse_args() @@ -872,6 +898,7 @@ def main() -> None: analyzer = PESpectrumAnalyzer( logger=logger, aux_yaml=Path(args.aux_file), + keys=args.keys, bin_size=args.bin_size, lim=args.nnls_limit, override_results=args.override, From ef43d01bf53e7f0d2ff7a7cc8d223aa634f06614 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 2 Dec 2025 16:31:56 -0800 Subject: [PATCH 14/32] adapted to current aux file layout --- src/mintanalysis/pmt/ana/analyser_class.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py index 1864d36..163d3f1 100644 --- a/src/mintanalysis/pmt/ana/analyser_class.py +++ b/src/mintanalysis/pmt/ana/analyser_class.py @@ -273,8 +273,7 @@ def process_channel( """ result: dict[str, Any] = {} ch_idx = int(ch[2:]) - voltage = float(meta["voltages_in_V"][ch_idx]) - result["voltage"] = {"val": voltage, "unit": "V"} + result["voltage"] = meta[ch_idx + 1]["v10"] # load data try: @@ -298,7 +297,8 @@ def process_channel( pe_vals, bins=self.bins, histtype="step", - label=f"channel {ch} (PMT {pmt}) at {voltage:.2f} V", + label=f"channel {ch} (PMT {pmt}) at {result['voltage']['val']:.2f}" + f" {result['voltage']['unit']}", ) if self.calib != "None": @@ -312,15 +312,15 @@ def process_channel( } # runtime - result["runtime"] = {"unit": "s"} - if "runtime_in_s" in meta: - result["runtime"]["aux"] = meta["runtime_in_s"] + result["runtime"] = {} + if "runtime" in meta: + result["runtime"]["aux"] = meta["runtime"] raw_runtime = self._extract_runtime_if_present(f_raw, ch) if raw_runtime is not None: - result["runtime"]["raw"] = raw_runtime + result["runtime"]["raw"] = {"val": raw_runtime, "unit": "s"} # noise-only - if voltage == 0: + if result["voltage"].get("val", 0) <= 10: self._decorate_axis(ax) # minimal result (no fit) msg = "Voltage a 0 --> Noise run" @@ -651,7 +651,7 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None: dcts = stats.get("1st_pe_fit_integral_below_valley", {}).get( "val", 0.0 ) + stats.get("cts_above_valley", {}).get("val", 0) - runtime = runtime_info[time_mode] + runtime = runtime_info[time_mode].get("val", 0) run_dcr[pmt] = { "val": float(dcts) / float(runtime), "err": float(dcts**0.5) / float(runtime), From 1da1544ba2d71e1f7abcdf1c9e29499e9793fab5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 2 Dec 2025 17:07:11 -0800 Subject: [PATCH 15/32] make a callable cli command --- pyproject.toml | 15 +++++++++++---- src/mintanalysis/pmt/ana/__init__.py | 4 +++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bd9891b..daaf459 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,10 +53,15 @@ requires-python = ">=3.10, <3.13" dynamic = ["version"] dependencies = [ +"dbetto", +"iminuit", +"pymongo", +"PyYAML", +"sshtunnel", "pygama>=2.0.0", "legend-daq2lh5>=1.2.1", "legend-pydataobj>=1.7", -"numpy==1.26.2", +"numpy", "numba>0.52", "pandas", "matplotlib", @@ -65,11 +70,9 @@ dependencies = [ "ipython", "ipympl", "boost_histogram<1.5", -#"dspeed==1.6.2", +"dspeed==2.0.2", ] -#"dspeed @ file:///${PROJECT_ROOT}/dspeed", - [project.optional-dependencies] test = [ "pytest >=6", @@ -85,6 +88,10 @@ docs = [ "furo>=2023.08.17", ] +[project.scripts] +# Format: = ":" +build_ana = "mintanalysis.pmt.ana.analyser_class:main" + #[[tool.uv.index]] # Optional name for the index. #name = "dspeed-pone" diff --git a/src/mintanalysis/pmt/ana/__init__.py b/src/mintanalysis/pmt/ana/__init__.py index 714298a..a83ddd9 100644 --- a/src/mintanalysis/pmt/ana/__init__.py +++ b/src/mintanalysis/pmt/ana/__init__.py @@ -2,4 +2,6 @@ Routines for the final analysis of PMT data """ -__all__ = [] +from .analyser_class import PESpectrumAnalyzer + +__all__ = ["PESpectrumAnalyzer"] From 0a7e385dd16026656ed4888edb82984f2ee1fbf3 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 3 Dec 2025 11:49:33 -0800 Subject: [PATCH 16/32] Analyzer class renamed --- pyproject.toml | 2 +- .../pmt/ana/{analyser_class.py => peSpectrumAnalyzer.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/mintanalysis/pmt/ana/{analyser_class.py => peSpectrumAnalyzer.py} (100%) diff --git a/pyproject.toml b/pyproject.toml index daaf459..b6caa17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,7 +90,7 @@ docs = [ [project.scripts] # Format: = ":" -build_ana = "mintanalysis.pmt.ana.analyser_class:main" +build_ana = "mintanalysis.pmt.ana.peSpectrumAnalyzer:main" #[[tool.uv.index]] # Optional name for the index. diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py similarity index 100% rename from src/mintanalysis/pmt/ana/analyser_class.py rename to src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py From ffbb7310c304c9ec265fc66a9efb3d89775ae49d Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 10 Dec 2025 11:44:19 -0800 Subject: [PATCH 17/32] Implement robust unit system with pint --- src/mintanalysis/pmt/ana/__init__.py | 2 +- .../pmt/ana/peSpectrumAnalyzer.py | 533 +++++++++--------- 2 files changed, 256 insertions(+), 279 deletions(-) diff --git a/src/mintanalysis/pmt/ana/__init__.py b/src/mintanalysis/pmt/ana/__init__.py index a83ddd9..037a007 100644 --- a/src/mintanalysis/pmt/ana/__init__.py +++ b/src/mintanalysis/pmt/ana/__init__.py @@ -2,6 +2,6 @@ Routines for the final analysis of PMT data """ -from .analyser_class import PESpectrumAnalyzer +from .peSpectrumAnalyzer import PESpectrumAnalyzer __all__ = ["PESpectrumAnalyzer"] diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py index 163d3f1..41a467c 100644 --- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py +++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py @@ -31,6 +31,10 @@ from matplotlib.backends.backend_pdf import PdfPages from scipy.optimize import curve_fit +from pint import UnitRegistry +from uncertainties import ufloat, UFloat + + # -------------------------- # TODO For Debugging only! # -------------------------- @@ -76,14 +80,78 @@ def setup_logging( # -------------------------- # Small helpers # -------------------------- - +@dataclass(frozen=True) +class Calibration: + up_sampling_ratio: float + v_per_adc: float + adc_impedance: float + sampling_time: float + renormalization_factor: float @dataclass class ChannelResult: status: str # 'ok', 'skipped', 'error' data: dict[str, Any] +def get_physics_object(obj, ureg): + """ + Convert a given object recursevly + to a object with all {val,(err),unit} occurences replaced with a pint (and ufloat) object + """ + if isinstance(obj, dict) and {"val", "unit"}.issubset(obj): + val = obj["val"] + if "err" in obj: + err = obj["err"] + return ureg.Quantity(ufloat(val, err),ureg(obj["unit"])) + else: + return ureg.Quantity(val,ureg(obj["unit"])) + + elif isinstance(obj, dict): + return {k: get_physics_object(v, ureg) for k, v in obj.items()} + + elif isinstance(obj, (list, tuple)): + return type(obj)(get_physics_object(v, ureg) for v in obj) + + else: + return obj + +def quantity_to_dict(obj): + """ + Recursively replace a pint (and ufloat) object with a dict + {val, (err), unit} + """ + # The pint internal function is broken (see issue 2121) + preferred_units = ['Hz'] + if hasattr(obj, "magnitude") and hasattr(obj, "units"): + if not preferred_units is None: + for u in preferred_units: + if obj.is_compatible_with(u): + obj = obj.to(u) + mag = obj.magnitude + + + if isinstance(mag, UFloat): + val = mag.n + err = mag.s + else: + val = mag + err = None + + return { + "val": float(val), + **({ "err": float(err) } if err is not None else {}), + "unit": format(obj.units,"~") + } + + elif isinstance(obj, dict): + return {k: quantity_to_dict(v) for k, v in obj.items()} + + elif isinstance(obj, (list, tuple)): + return type(obj)(quantity_to_dict(v) for v in obj) + else: + return obj + def linear_func(x, a, b): return a * x + b @@ -136,10 +204,7 @@ def __init__( lim: float = 20, override_results: bool = False, logger: logging.Logger | None = None, - up_sampling_ratio: float = 24 / 240, - v_per_adc: float = 0.25e-3, - adc_impedance: float = 75.0, - sampling_time: float = 4.8e-9, + calibrator: Calibration = Calibration(24/240,0.25e-3,75.,4.8e-9,1), calib: str = "None", ) -> None: self.aux_yaml = aux_yaml @@ -152,13 +217,26 @@ def __init__( self.lim = lim self.override_results = override_results self.logger = logger or setup_logging() - self.aux = self._load_aux() self.plot_folder.mkdir(parents=True, exist_ok=True) - self.up_sampling_ratio = up_sampling_ratio - self.v_per_adc = v_per_adc - self.adc_impedance = adc_impedance - self.sampling_time = sampling_time + self.calibrator = calibrator self.calib = calib + self.ureg = UnitRegistry() + self.aux = self._load_aux() + + # unit handling + vadc = self.ureg.Quantity(calibrator.v_per_adc,self.ureg.volt) + usr = self.ureg.Quantity(calibrator.up_sampling_ratio, self.ureg.dimensionless) + rf = self.ureg.Quantity(calibrator.renormalization_factor, self.ureg.dimensionless) + st = self.ureg.Quantity(calibrator.sampling_time,self.ureg.seconds) + imp = self.ureg.Quantity(calibrator.adc_impedance,self.ureg.ohm) + + nnls_coloumb_factor = ((vadc*usr*st*rf)/imp) + self.ureg.define(f"NNLS = {nnls_coloumb_factor.to('coulomb').magnitude} * coulomb") + self.ureg.define(f"ADC = {usr.magnitude}*NNLS") + + + + # ---------------------- # I/O helpers @@ -171,6 +249,9 @@ def _load_aux(self) -> dict[str, Any]: aux = yaml.safe_load(f) self.logger.info("Loaded aux YAML: %s", self.aux_yaml) + # convert to physics units + aux = get_physics_object(aux,self.ureg) + if self.keys is None: return aux ret = {} @@ -181,6 +262,19 @@ def _load_aux(self) -> dict[str, Any]: msg = f"Key {k} not in aux file, skipping." self.logger.warning(msg) return ret + + def _load_results(self) ->dict: + if not self.result_yaml.exists(): + msg = f"Result YAML not found: {self.result_yaml}" + raise FileNotFoundError(msg) + with open(self.result_yaml) as f: + data = yaml.safe_load(f) or {} + if "pe_spectrum" not in data: + msg = "pe_spectrum info not present in result YAML; run analysis first." + raise RuntimeError(msg) + + return get_physics_object(data,self.ureg) + def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str) -> None: if self.result_yaml.exists(): @@ -191,14 +285,14 @@ def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str) self.logger.error(msg) raise RuntimeError(msg) - existing[key] = results + existing[key] = quantity_to_dict(results) with open(self.result_yaml, "w") as f: yaml.safe_dump(existing, f, default_flow_style=False) self.logger.info("Updated result YAML at %s", self.result_yaml) else: with open(self.result_yaml, "w") as f: - yaml.safe_dump({key: results}, f, default_flow_style=False) + yaml.safe_dump({key: quantity_to_dict(results)}, f, default_flow_style=False) self.logger.info("Wrote new result YAML at %s", self.result_yaml) # ---------------------- @@ -297,8 +391,8 @@ def process_channel( pe_vals, bins=self.bins, histtype="step", - label=f"channel {ch} (PMT {pmt}) at {result['voltage']['val']:.2f}" - f" {result['voltage']['unit']}", + label=f"channel {ch} (PMT {pmt}) at {result['voltage'].magnitude:.2f}" + f" {format(result['voltage'].units,'~')}", ) if self.calib != "None": @@ -308,7 +402,7 @@ def process_channel( # total counts result["statistics"] = { - "total_cts": {"val": int(np.sum(n)), "unit": ""}, + "total_cts": self.ureg.Quantity(ufloat(np.sum(n),np.sum(n)**0.5), 'dimensionless'), } # runtime @@ -317,10 +411,10 @@ def process_channel( result["runtime"]["aux"] = meta["runtime"] raw_runtime = self._extract_runtime_if_present(f_raw, ch) if raw_runtime is not None: - result["runtime"]["raw"] = {"val": raw_runtime, "unit": "s"} + result["runtime"]["raw"] = raw_runtime * self.ureg.seconds # noise-only - if result["voltage"].get("val", 0) <= 10: + if result["voltage"] <= 10 * self.ureg.volt: self._decorate_axis(ax) # minimal result (no fit) msg = "Voltage a 0 --> Noise run" @@ -399,9 +493,9 @@ def process_channel( fit_errs = {k: float(v) for k, v in m.errors.to_dict().items()} result["pe_peak_fit"] = { - "mean": {"val": fit_vals["mu"], "err": fit_errs["mu"], "unit": "NNLS"}, - "sigma": {"val": fit_vals["sigma"], "err": fit_errs["sigma"], "unit": "NNLS"}, - "amp": {"val": fit_vals["amp"], "err": fit_errs["amp"], "unit": ""}, + "mean": self.ureg.Quantity(ufloat(fit_vals["mu"], fit_errs["mu"]),self.ureg.NNLS), + "sigma": self.ureg.Quantity(ufloat(fit_vals["sigma"], fit_errs["sigma"]),self.ureg.NNLS), + "amp": self.ureg.Quantity(ufloat(fit_vals["amp"], fit_errs["amp"]),self.ureg.dimensionless) } # full fit curve over bins for plotting & integrals @@ -413,21 +507,19 @@ def process_channel( # statistics try: result["statistics"] = { - "1st_pe_fit_integral_below_valley": { - "val": float(np.sum(y_fit[:valley_idx])), - "unit": "", - }, - "cts_above_valley": {"val": int(np.sum(n[:valley_idx])), "unit": ""}, - "cts_below_valley": {"val": int(np.sum(n[valley_idx:])), "unit": ""}, - "1st_pe_fit_integral": {"val": int(float(np.sum(y_fit))), "unit": ""}, - "total_cts": {"val": int(np.sum(n)), "unit": ""}, + "1st_pe_fit_integral_below_valley": self.ureg.Quantity(ufloat(np.sum(y_fit[:valley_idx]), + np.sum(y_fit[:valley_idx])**0.5), 'dimensionless'), + "cts_above_valley": self.ureg.Quantity(ufloat(np.sum(n[:valley_idx]), + np.sum(n[:valley_idx])**0.5), 'dimensionless'), + "1st_pe_fit_integral": self.ureg.Quantity(ufloat(np.sum(y_fit), np.sum(y_fit)**0.5), 'dimensionless'), + "total_cts": self.ureg.Quantity(ufloat(np.sum(n), np.sum(n)**0.5), 'dimensionless'), "valley": { - "pos": {"val": float(bin_centers[valley_idx]), "unit": "NNLS"}, - "amp": {"val": int(n[valley_idx]), "unit": ""}, + "pos": float(bin_centers[valley_idx]) * self.ureg.NNLS, + "amp": self.ureg.Quantity(ufloat(n[valley_idx],n[valley_idx]**0.5), 'dimensionless') }, "1st_pe_guess": { - "pos": {"val": float(mu0), "unit": "NNLS"}, - "amp": {"val": int(amp0), "unit": ""}, + "pos": float(mu0) *self.ureg.NNLS, + "amp": self.ureg.Quantity(ufloat(amp0,amp0**0.5), 'dimensionless'), }, } except Exception as e: @@ -447,8 +539,8 @@ def _decorate_axis(self, ax: plt.Axes) -> None: ax.set_xlim(-10, 2.5e3) ax.set_ylim(0.5, None) ax.set_yscale("log") - ax.set_ylabel(f"Counts/{self.bin_size} NNLS units") - ax.set_xlabel("NNLS units") + ax.set_ylabel(f"Counts/{self.bin_size} NNLS") + ax.set_xlabel("Charge (NNLS)") ax.set_title(f"pygama-NNLS reconstruction ({self.lim} units solution cut-off)") ax.legend() @@ -484,62 +576,32 @@ def _extract_runtime_if_present(self, f_raw: Path, ch: str) -> float | None: def _add_charge_axis(self, ax: plt.Axes, is_y) -> None: """ - This function adds a axis in new units to a given axis plot. + This function adds a axis in new Charge units to a given axis plot. + if the unit is not charge convertible (or is gain) will log a warning + and fall gracefully. Parameters ---------- ax : plt.Axes - The plot to which to add the pC axis. + The plot to which to add the new axis. is_y : bool If True add a new y-axis. Else an x-axis. """ - if self.calib not in ["pC", "adc", "gain"]: - self.logger.warning("Invalid calibration unit, not calibrating.") - return - - label = "Charge " - if self.calib == "pC": - func = ( - lambda x: x - * ( - (self.v_per_adc * self.up_sampling_ratio * self.sampling_time) - / self.adc_impedance - ) - * 1e12, - lambda y: y - / ( - ( - (self.v_per_adc * self.up_sampling_ratio * self.sampling_time) - / self.adc_impedance - ) - * 1e12 - ), - ) - label += "(pC)" - - elif self.calib == "gain": + + if self.calib == "gain": label = "Gain (a.u.)" - elem = 1.602e-19 # C (elementary charge) - func = ( - lambda x: x - * ( - (self.v_per_adc * self.up_sampling_ratio * self.sampling_time) - / self.adc_impedance - ) - / elem, - lambda y: y - / ( - ( - (self.v_per_adc * self.up_sampling_ratio * self.sampling_time) - / self.adc_impedance - ) - / elem - ), + func = (lambda x: ((x*self.ureg.NNLS).to("C")/self.ureg.elementary_charge).m, + lambda y: ((y*self.ureg.elementary_charge).to("NNLS")).m ) - - elif self.calib == "adc": - label += f"(ADC x {self.sampling_time*1e9:.1f} ns)" - func = (lambda x: x * self.up_sampling_ratio, lambda y: y / self.up_sampling_ratio) + else: + if not self.ureg.NNLS.is_compatible_with(self.calib): + msg = f"Unit [{self.calib}] not compatible with charge" + self.logger.warning(msg) + label = f"Charge ({self.calib})" + func = (lambda x: (x*self.ureg.NNLS).to(self.calib).m, + lambda y: (y*self.ureg(self.calib)).to("NNLS").m + ) + if is_y: secax_y = ax.secondary_yaxis("right", functions=func) secax_y.set_ylabel(label) @@ -547,65 +609,71 @@ def _add_charge_axis(self, ax: plt.Axes, is_y) -> None: secax_x = ax.secondary_xaxis("top", functions=func) secax_x.set_xlabel(label) + def _unit_converter(self, v,target_unit, constant = 1): + """ + Take a value v and if its a Quantity + apply conversion of the targeted unit. + A constant can be multiplied. + E.g.: + target unit = pC + v = 2.5e-24 C**2/V*m + returns 2.5 pC**2/V/m + """ + if not isinstance(v, self.ureg.Quantity): + return v + dims = dict(v._units) + # Decompose units + dims = dict(v._units) + target_units = {k: d for k, d in dims.items() if self.ureg(k).is_compatible_with(target_unit)} + other_units = {k: d for k, d in dims.items() if not self.ureg(k).is_compatible_with(target_unit)} + + # Start with dimensionless + v_base = v / v.units + + # Multiply back non-target units + for k, d in other_units.items(): + v_base *= self.ureg(k)**d + + # Convert target units to target unit + for k, d in target_units.items(): + v_base *= (self.ureg(k).to(target_unit))**d + + return v_base * constant + + # ---------------------- # Signal to Noise Ratio (SNR) # ---------------------- - def plot_snr(self) -> None: - if not self.result_yaml.exists(): - msg = f"Result YAML not found: {self.result_yaml}" - raise FileNotFoundError(msg) - with open(self.result_yaml) as f: - data = yaml.safe_load(f) or {} - if "pe_spectrum" not in data: - msg = "pe_spectrum info not present in result YAML; run analysis first." - raise RuntimeError(msg) - + def compute_snr(self) -> None: + data = self._load_results() snr = {} for run_name, pmt_dict in data["pe_spectrum"].items(): run_snr = {} for pmt, info in pmt_dict.items(): try: - if info.get("voltage", {}).get("val", 0) == 0: + if info.get("voltage") == 0 * self.ureg.V: continue - noise = { - "val": info.get("statistics", {}) - .get("valley", {}) - .get("amp", {}) - .get("val", 0.0) - } - noise["err"] = noise["val"] ** 0.5 + noise =info.get("statistics").get("valley").get("amp") + if info.get("status", "skipped") == "ok": - signal = { - "val": info.get("pe_peak_fit").get("amp", {}).get("val", 0.0), - "err": info.get("pe_peak_fit").get("amp", {}).get("err", 0.0), - } + signal = info.get("pe_peak_fit").get("amp") else: - signal = { - "val": info.get("statistics", {}) - .get("1st_pe_guess", {}) - .get("amp", {}) - .get("val", 0) - } - signal["err"] = signal["val"] ** 0.5 - run_snr[pmt] = { - "val": 1 - noise["val"] / signal["val"], - "err": ( - noise["err"] ** 2 / signal["val"] ** 2 - + (noise["val"] ** 2 * signal["err"] ** 2) / signal["val"] ** 4 - ), - "unit": "", - } + signal =info.get("statistics").get("1st_pe_guess").get("amp") + run_snr[pmt] = 1- noise/signal except Exception as e: self.logger.warning( "Failed to compute SNR for run %s PMT %s: %s", run_name, pmt, e ) snr[run_name] = run_snr + self._save_results(snr,"snr") + fig, ax = plt.subplots(figsize=A4_LANDSCAPE) for run_name, pmt_dict in snr.items(): pmts = sorted(pmt_dict.keys()) - vals = [pmt_dict[p]["val"] for p in pmts] - ax.plot(pmts, vals, marker="o", label=run_name) + vals = [pmt_dict[p].n for p in pmts] + errs = [pmt_dict[p].s for p in pmts] + ax.errorbar(pmts, vals, errs, label=run_name, fmt='o') ax.set_xlabel("PMT") ax.set_ylabel("SNR (a.u.)") ax.set_title("SNR per PMT (= 1 - valley/peak)") @@ -620,23 +688,13 @@ def plot_snr(self) -> None: # Dark Count Rate (DCR) # ---------------------- def compute_dark_count_rate(self, time_mode: str = "aux") -> None: - if not self.result_yaml.exists(): - msg = f"Result YAML not found: {self.result_yaml}" - raise FileNotFoundError(msg) - with open(self.result_yaml) as f: - data = yaml.safe_load(f) or {} - if "pe_spectrum" not in data: - msg = "pe_spectrum info not present in result YAML; run analysis first." - raise RuntimeError(msg) - if "dcr" in data and not self.override_results: - msg = "DCR already exists and override flag is False." - raise RuntimeError(msg) + data = self._load_results() dcr = {} for run_name, pmt_dict in data["pe_spectrum"].items(): run_dcr = {} for pmt, info in pmt_dict.items(): try: - if info.get("voltage", {}).get("val", 0) == 0: + if info.get("voltage") == 0*self.ureg.volt: continue stats = info.get("statistics", {}) runtime_info = info.get("runtime", {}) @@ -648,30 +706,23 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None: time_mode, ) continue - dcts = stats.get("1st_pe_fit_integral_below_valley", {}).get( - "val", 0.0 - ) + stats.get("cts_above_valley", {}).get("val", 0) - runtime = runtime_info[time_mode].get("val", 0) - run_dcr[pmt] = { - "val": float(dcts) / float(runtime), - "err": float(dcts**0.5) / float(runtime), - "unit": "Hz", - } + dcts = stats.get("1st_pe_fit_integral_below_valley")+ stats.get("cts_above_valley") + runtime = runtime_info[time_mode] + run_dcr[pmt] = dcts/runtime + except Exception as e: self.logger.warning( "Failed to compute DCR for run %s PMT %s: %s", run_name, pmt, e ) dcr[run_name] = run_dcr - data["dcr"] = dcr - with open(self.result_yaml, "w") as f: - yaml.safe_dump(data, f, default_flow_style=False) - self.logger.info("Wrote DCR results to %s", self.result_yaml) + self._save_results(dcr,"dcr") fig, ax = plt.subplots(figsize=A4_LANDSCAPE) for run_name, pmt_dict in dcr.items(): pmts = sorted(pmt_dict.keys()) - vals = [pmt_dict[p]["val"] for p in pmts] - ax.plot(pmts, vals, marker="o", label=run_name) + vals = [pmt_dict[p].n for p in pmts] + errs = [pmt_dict[p].s for p in pmts] + ax.errorbar(pmts, vals, errs, label=run_name, fmt="o") ax.set_xlabel("PMT") ax.set_ylabel("DCR (Hz)") ax.set_title("Dark Count Rate per PMT") @@ -686,39 +737,21 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None: # Linear Gain Fit computation # ---------------------- def compute_linear_gain_fit(self) -> None: - if not self.result_yaml.exists(): - msg = f"Result YAML not found: {self.result_yaml}" - raise FileNotFoundError(msg) - with open(self.result_yaml) as f: - data = yaml.safe_load(f) or {} - if "pe_spectrum" not in data: - msg = "pe_spectrum info not present in result YAML; run analysis first." - raise RuntimeError(msg) - + data = self._load_results() tmp_dic = {"used_keys": []} - y_unit = None - x_unit = None + for key, run in data["pe_spectrum"].items(): tmp_dic["used_keys"].append(key) for pmt in run: if pmt not in tmp_dic: - tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []} - v = run[pmt]["voltage"]["val"] - xu = run[pmt]["voltage"]["unit"] - if x_unit is None: - x_unit = xu - else: - assert x_unit == xu - if v == 0: + tmp_dic[pmt] = {"voltage": [], "vals": []} + v = run[pmt]["voltage"] + + if v == 0 * self.ureg.volts: continue + tmp_dic[pmt]["voltage"].append(v) - tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]["val"]) - tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak_fit"]["mean"]["err"]) - yu = run[pmt]["pe_peak_fit"]["mean"]["unit"] - if y_unit is None: - y_unit = yu - else: - assert y_unit == yu + tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]) pdf_path = self.plot_folder / "gain_plots.pdf" with PdfPages(pdf_path) as pdf: @@ -726,104 +759,77 @@ def compute_linear_gain_fit(self) -> None: if key == "used_keys": continue fig, ax = plt.subplots(figsize=A4_LANDSCAPE) - ax.errorbar(pmt["voltage"], pmt["vals"], pmt["errs"], fmt="o", label=f"PMT {key}") - ax.set_xlabel(f"Voltage ({x_unit})") - ax.set_ylabel(f"PMT position ({y_unit})") + xunit = pmt["voltage"][0].units + pmt["voltage"] = [i.to(xunit) for i in pmt["voltage"]] + yunit = pmt["vals"][0].units + pmt["vals"] =[i.to(yunit) for i in pmt["vals"]] + ax.errorbar([i.m for i in pmt["voltage"]], + [i.n for i in pmt["vals"]], + [i.s for i in pmt["vals"]], + label=f"PMT {key}", + fmt="o" + ) + ax.set_xlabel(f"Voltage ({format(xunit,"~")})") + ax.set_ylabel(f"Gain ({format(yunit,"~")})") if self.calib != "None": self._add_charge_axis(ax, True) params, covariance = curve_fit( linear_func, - pmt["voltage"], - pmt["vals"], - sigma=pmt["errs"], + [i.m for i in pmt["voltage"]], + [i.n for i in pmt["vals"]], + [i.s for i in pmt["vals"]], absolute_sigma=True, ) a_opt, b_opt = params perr = np.sqrt(np.diag(covariance)) - x = np.linspace(-1 * b_opt / a_opt, max(pmt["voltage"]) + 10, 1000) + x = np.linspace(-1 * b_opt / a_opt, max([i.m for i in pmt["voltage"]]) + 10, 1000) ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit") - tmp_dic[key]["a"] = { - "val": float(a_opt), - "err": float(perr[0]), - "unit": f"{y_unit}/{x_unit}", - } - tmp_dic[key]["b"] = { - "val": float(b_opt), - "err": float(perr[1]), - "unit": f"{y_unit}/{x_unit}", - } + tmp_dic[key]["a"] = self.ureg.Quantity(ufloat(a_opt,perr[0]),yunit/xunit) + tmp_dic[key]["b"] = self.ureg.Quantity(ufloat(b_opt,perr[1]),yunit) + tmp_dic[key]["func"] = "G = a*voltage+b" pmt.pop("voltage") pmt.pop("vals") - pmt.pop("errs") ax.legend() pdf.savefig(fig) plt.close(fig) - data["linear_gain"] = tmp_dic - with open(self.result_yaml, "w") as f: - yaml.safe_dump(data, f, default_flow_style=False) - self.logger.info("Linear gain fit results saved to %s", self.result_yaml) + + self._save_results(tmp_dic,"linear_gain") - def calibrate_nnls_values(self, calibration_func: Callable, output_file: str, new_unit: str): + def calibrate_nnls_values(self, output_file: str): """ - Reads the current results.yaml, finds all entries that contain a dict with - keys {"val": , "unit": "NNLS"}, applies calibration_func(val) - and writes a calibrated result file. + Reads the current results.yaml, + finds all entries that contain a Charge compatible quantities + Applies charge conversion as per self.calib + writes out calibrated file to output_file. """ - factor = calibration_func(1) - try: - with open(self.result_yaml) as f: - data = yaml.safe_load(f) - except Exception as e: - msg = f"Failed to load file: {e}" + if self.calib != "gain" and not self.ureg.NNLS.is_compatible_with(self.calib): + msg = f"Unit [{self.calib}] not compatible with charge" self.logger.error(msg) - raise + return + + data = self._load_results() + + def convert_charge_units(d): + """Recursively convert all Quantities compatible with charge to new_unit.""" + for k, v in d.items(): + if isinstance(v, dict): + convert_charge_units(v) # recurse + elif isinstance(v, self.ureg.Quantity): + if self.calib == "gain": + self._unit_converter(v,'C',self.ureg.elementary_charge) + else: + self._unit_converter(v,self.calib) + - def classify(unit: str): - unit = str(unit) - if unit == "NNLS" or unit.startswith("NNLS/"): - return "nnls" - if "/NNLS" in unit: - return "per_nnls" - if "NNLS" in unit: - return "nnls" - return None - - def recurse(obj): - if isinstance(obj, dict): - if set(obj.keys()) >= {"val", "unit"}: - kind = classify(obj.get("unit")) - if kind == "nnls": - try: - obj["val"] = obj["val"] * factor - if "err" in obj: - obj["err"] = obj["err"] * factor - obj["unit"] = obj["unit"].replace("NNLS", new_unit) - except Exception as e: - msg = f"NNLS calibration failed for value {obj}: {e}" - self.logger.error(msg) - elif kind == "per_nnls": - try: - obj["val"] = obj["val"] / factor - if "err" in obj: - obj["err"] = obj["err"] / factor - obj["unit"] = obj["unit"].replace("NNLS", new_unit) - except Exception as e: - msg = f"per-NNLS calibration failed for value {obj}: {e}" - self.logger.error(msg) - for _, v in obj.items(): - recurse(v) - elif isinstance(obj, list): - for item in obj: - recurse(item) - - recurse(data) + # Convert all charge-compatible quantities to nanocoulombs + convert_charge_units(data) try: with open(output_file, "w") as f: - yaml.safe_dump(data, f, default_flow_style=False) + yaml.safe_dump(quantity_to_dict(data), f, default_flow_style=False) except Exception as e: msg = f"Failed to write calibrated file: {e}" self.logger.error(msg) @@ -858,8 +864,7 @@ def main() -> None: "-c", "--calibrate", default="None", - choices=["pC", "adc", "gain", "None"], - help="Choose a charge calibration value (pC, adc, gain, or None)", + help="Choose a charge calibration unit", ) parser.add_argument( "-b", @@ -911,37 +916,9 @@ def main() -> None: if args.compute_gain: analyzer.compute_linear_gain_fit() if args.compute_snr: - analyzer.plot_snr() + analyzer.compute_snr() if args.calibrate != "None": - if args.calibrate == "pC": - analyzer.calibrate_nnls_values( - lambda x: x - * ( - (analyzer.v_per_adc * analyzer.up_sampling_ratio * analyzer.sampling_time) - / analyzer.adc_impedance - ) - * 1e12, - str(analyzer.result_yaml).replace(".yaml", "_pC_calibrated.yaml"), - "pC", - ) - elif args.calibrate == "adc": - analyzer.calibrate_nnls_values( - lambda x: x * analyzer.up_sampling_ratio, - str(analyzer.result_yaml).replace(".yaml", "_adc_calibrated.yaml"), - "ADC", - ) - elif args.calibrate == "gain": - elem = 1.602e-19 # C (elementary charge) - analyzer.calibrate_nnls_values( - lambda x: x - * ( - (analyzer.v_per_adc * analyzer.up_sampling_ratio * analyzer.sampling_time) - / analyzer.adc_impedance - ) - / elem, - str(analyzer.result_yaml).replace(".yaml", "_gain_calibrated.yaml"), - "a.u.", - ) + analyzer.calibrate_nnls_values(output_file=str(analyzer.result_yaml).replace(".yaml", "_calibrated.yaml")) logger.info("Processing complete.") except Exception as e: From f171e489c971140c13fbacb6c5b03c57ec78e5fb Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 10 Dec 2025 12:07:13 -0800 Subject: [PATCH 18/32] moved helper functions to util file --- pyproject.toml | 2 + .../pmt/ana/peSpectrumAnalyzer.py | 318 ++++++------------ src/mintanalysis/pmt/ana/utils.py | 128 +++++++ 3 files changed, 237 insertions(+), 211 deletions(-) create mode 100644 src/mintanalysis/pmt/ana/utils.py diff --git a/pyproject.toml b/pyproject.toml index b6caa17..c70749a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ dependencies = [ "iminuit", "pymongo", "PyYAML", +"pint", "sshtunnel", "pygama>=2.0.0", "legend-daq2lh5>=1.2.1", @@ -65,6 +66,7 @@ dependencies = [ "numba>0.52", "pandas", "matplotlib", +"uncertainties", "scipy>=1.14", "jupyter", "ipython", diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py index 41a467c..6b87d28 100644 --- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py +++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py @@ -18,7 +18,6 @@ import argparse import logging -from collections.abc import Callable from dataclasses import dataclass from pathlib import Path from typing import Any @@ -29,11 +28,19 @@ from iminuit import Minuit from lgdo import lh5 from matplotlib.backends.backend_pdf import PdfPages -from scipy.optimize import curve_fit - from pint import UnitRegistry -from uncertainties import ufloat, UFloat - +from scipy.optimize import curve_fit +from uncertainties import ufloat + +from .utils import ( + gaussian, + get_physics_object, + linear_func, + poisson_nll, + quantity_to_dict, + setup_logging, + valley_index_strict, +) # -------------------------- # TODO For Debugging only! @@ -47,39 +54,6 @@ A4_LANDSCAPE = (11.69, 8.27) -# -------------------------- -# Logging Setup -# -------------------------- - - -def setup_logging( - log_file: Path = RESULT_DIR / "analysis.log", level: int = logging.INFO -) -> logging.Logger: - logger = logging.getLogger("PESpectrum") - logger.setLevel(level) - logger.propagate = False - - if not logger.handlers: - fmt = logging.Formatter( - "[%(asctime)s] [%(name)s - %(funcName)s] [%(levelname)s] %(message)s" - ) - - sh = logging.StreamHandler() - sh.setLevel(level) - sh.setFormatter(fmt) - logger.addHandler(sh) - - fh = logging.FileHandler(log_file, mode="w") - fh.setLevel(level) - fh.setFormatter(fmt) - logger.addHandler(fh) - - return logger - - -# -------------------------- -# Small helpers -# -------------------------- @dataclass(frozen=True) class Calibration: up_sampling_ratio: float @@ -88,109 +62,12 @@ class Calibration: sampling_time: float renormalization_factor: float + @dataclass class ChannelResult: status: str # 'ok', 'skipped', 'error' data: dict[str, Any] -def get_physics_object(obj, ureg): - """ - Convert a given object recursevly - to a object with all {val,(err),unit} occurences replaced with a pint (and ufloat) object - """ - if isinstance(obj, dict) and {"val", "unit"}.issubset(obj): - val = obj["val"] - if "err" in obj: - err = obj["err"] - return ureg.Quantity(ufloat(val, err),ureg(obj["unit"])) - else: - return ureg.Quantity(val,ureg(obj["unit"])) - - elif isinstance(obj, dict): - return {k: get_physics_object(v, ureg) for k, v in obj.items()} - - elif isinstance(obj, (list, tuple)): - return type(obj)(get_physics_object(v, ureg) for v in obj) - - else: - return obj - -def quantity_to_dict(obj): - """ - Recursively replace a pint (and ufloat) object with a dict - {val, (err), unit} - """ - # The pint internal function is broken (see issue 2121) - preferred_units = ['Hz'] - if hasattr(obj, "magnitude") and hasattr(obj, "units"): - if not preferred_units is None: - for u in preferred_units: - if obj.is_compatible_with(u): - obj = obj.to(u) - mag = obj.magnitude - - - if isinstance(mag, UFloat): - val = mag.n - err = mag.s - else: - val = mag - err = None - - return { - "val": float(val), - **({ "err": float(err) } if err is not None else {}), - "unit": format(obj.units,"~") - } - - elif isinstance(obj, dict): - return {k: quantity_to_dict(v) for k, v in obj.items()} - - elif isinstance(obj, (list, tuple)): - return type(obj)(quantity_to_dict(v) for v in obj) - - else: - return obj - -def linear_func(x, a, b): - return a * x + b - - -def gaussian(x: np.ndarray, amp: float, mu: float, sigma: float) -> np.ndarray: - return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2) - - -def poisson_nll(amp: float, mu: float, sigma: float, x: np.ndarray, y: np.ndarray) -> float: - expected = gaussian(x, amp, mu, sigma) - expected = np.clip(expected, 1e-10, None) - return float(np.sum(expected - y * np.log(expected))) - - -def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None: - """Return (peak_idx, valley_idx) or None.""" - n = len(y) - if n < 3: - return None - - peak = None - for i in range(1, n - 1): - if y[i] > y[i - 1] and y[i] > y[i + 1]: - peak = i - break - if peak is None: - return None - - valley = peak - i = peak + 1 - while i < n: - if y[i] > y[i - 1]: - return peak, valley - if y[i] < y[valley]: - valley = i - i += 1 - - return None - # -------------------------- # Analyzer class @@ -204,7 +81,7 @@ def __init__( lim: float = 20, override_results: bool = False, logger: logging.Logger | None = None, - calibrator: Calibration = Calibration(24/240,0.25e-3,75.,4.8e-9,1), + calibrator: Calibration | None = None, calib: str = "None", ) -> None: self.aux_yaml = aux_yaml @@ -219,25 +96,23 @@ def __init__( self.logger = logger or setup_logging() self.plot_folder.mkdir(parents=True, exist_ok=True) self.calibrator = calibrator + if calibrator is None: + self.calibrator = Calibration(24 / 240, 0.25e-3, 75.0, 4.8e-9, 1) self.calib = calib self.ureg = UnitRegistry() self.aux = self._load_aux() # unit handling - vadc = self.ureg.Quantity(calibrator.v_per_adc,self.ureg.volt) - usr = self.ureg.Quantity(calibrator.up_sampling_ratio, self.ureg.dimensionless) - rf = self.ureg.Quantity(calibrator.renormalization_factor, self.ureg.dimensionless) - st = self.ureg.Quantity(calibrator.sampling_time,self.ureg.seconds) - imp = self.ureg.Quantity(calibrator.adc_impedance,self.ureg.ohm) + vadc = self.ureg.Quantity(self.calibrator.v_per_adc, self.ureg.volt) + usr = self.ureg.Quantity(self.calibrator.up_sampling_ratio, self.ureg.dimensionless) + rf = self.ureg.Quantity(self.calibrator.renormalization_factor, self.ureg.dimensionless) + st = self.ureg.Quantity(self.calibrator.sampling_time, self.ureg.seconds) + imp = self.ureg.Quantity(self.calibrator.adc_impedance, self.ureg.ohm) - nnls_coloumb_factor = ((vadc*usr*st*rf)/imp) + nnls_coloumb_factor = (vadc * usr * st * rf) / imp self.ureg.define(f"NNLS = {nnls_coloumb_factor.to('coulomb').magnitude} * coulomb") self.ureg.define(f"ADC = {usr.magnitude}*NNLS") - - - - # ---------------------- # I/O helpers # ---------------------- @@ -250,7 +125,7 @@ def _load_aux(self) -> dict[str, Any]: self.logger.info("Loaded aux YAML: %s", self.aux_yaml) # convert to physics units - aux = get_physics_object(aux,self.ureg) + aux = get_physics_object(aux, self.ureg) if self.keys is None: return aux @@ -262,8 +137,8 @@ def _load_aux(self) -> dict[str, Any]: msg = f"Key {k} not in aux file, skipping." self.logger.warning(msg) return ret - - def _load_results(self) ->dict: + + def _load_results(self) -> dict: if not self.result_yaml.exists(): msg = f"Result YAML not found: {self.result_yaml}" raise FileNotFoundError(msg) @@ -272,9 +147,8 @@ def _load_results(self) ->dict: if "pe_spectrum" not in data: msg = "pe_spectrum info not present in result YAML; run analysis first." raise RuntimeError(msg) - - return get_physics_object(data,self.ureg) + return get_physics_object(data, self.ureg) def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str) -> None: if self.result_yaml.exists(): @@ -402,7 +276,7 @@ def process_channel( # total counts result["statistics"] = { - "total_cts": self.ureg.Quantity(ufloat(np.sum(n),np.sum(n)**0.5), 'dimensionless'), + "total_cts": self.ureg.Quantity(ufloat(np.sum(n), np.sum(n) ** 0.5), "dimensionless"), } # runtime @@ -493,9 +367,13 @@ def process_channel( fit_errs = {k: float(v) for k, v in m.errors.to_dict().items()} result["pe_peak_fit"] = { - "mean": self.ureg.Quantity(ufloat(fit_vals["mu"], fit_errs["mu"]),self.ureg.NNLS), - "sigma": self.ureg.Quantity(ufloat(fit_vals["sigma"], fit_errs["sigma"]),self.ureg.NNLS), - "amp": self.ureg.Quantity(ufloat(fit_vals["amp"], fit_errs["amp"]),self.ureg.dimensionless) + "mean": self.ureg.Quantity(ufloat(fit_vals["mu"], fit_errs["mu"]), self.ureg.NNLS), + "sigma": self.ureg.Quantity( + ufloat(fit_vals["sigma"], fit_errs["sigma"]), self.ureg.NNLS + ), + "amp": self.ureg.Quantity( + ufloat(fit_vals["amp"], fit_errs["amp"]), self.ureg.dimensionless + ), } # full fit curve over bins for plotting & integrals @@ -507,19 +385,28 @@ def process_channel( # statistics try: result["statistics"] = { - "1st_pe_fit_integral_below_valley": self.ureg.Quantity(ufloat(np.sum(y_fit[:valley_idx]), - np.sum(y_fit[:valley_idx])**0.5), 'dimensionless'), - "cts_above_valley": self.ureg.Quantity(ufloat(np.sum(n[:valley_idx]), - np.sum(n[:valley_idx])**0.5), 'dimensionless'), - "1st_pe_fit_integral": self.ureg.Quantity(ufloat(np.sum(y_fit), np.sum(y_fit)**0.5), 'dimensionless'), - "total_cts": self.ureg.Quantity(ufloat(np.sum(n), np.sum(n)**0.5), 'dimensionless'), + "1st_pe_fit_integral_below_valley": self.ureg.Quantity( + ufloat(np.sum(y_fit[:valley_idx]), np.sum(y_fit[:valley_idx]) ** 0.5), + "dimensionless", + ), + "cts_above_valley": self.ureg.Quantity( + ufloat(np.sum(n[:valley_idx]), np.sum(n[:valley_idx]) ** 0.5), "dimensionless" + ), + "1st_pe_fit_integral": self.ureg.Quantity( + ufloat(np.sum(y_fit), np.sum(y_fit) ** 0.5), "dimensionless" + ), + "total_cts": self.ureg.Quantity( + ufloat(np.sum(n), np.sum(n) ** 0.5), "dimensionless" + ), "valley": { "pos": float(bin_centers[valley_idx]) * self.ureg.NNLS, - "amp": self.ureg.Quantity(ufloat(n[valley_idx],n[valley_idx]**0.5), 'dimensionless') + "amp": self.ureg.Quantity( + ufloat(n[valley_idx], n[valley_idx] ** 0.5), "dimensionless" + ), }, "1st_pe_guess": { - "pos": float(mu0) *self.ureg.NNLS, - "amp": self.ureg.Quantity(ufloat(amp0,amp0**0.5), 'dimensionless'), + "pos": float(mu0) * self.ureg.NNLS, + "amp": self.ureg.Quantity(ufloat(amp0, amp0**0.5), "dimensionless"), }, } except Exception as e: @@ -587,21 +474,23 @@ def _add_charge_axis(self, ax: plt.Axes, is_y) -> None: is_y : bool If True add a new y-axis. Else an x-axis. """ - + if self.calib == "gain": label = "Gain (a.u.)" - func = (lambda x: ((x*self.ureg.NNLS).to("C")/self.ureg.elementary_charge).m, - lambda y: ((y*self.ureg.elementary_charge).to("NNLS")).m + func = ( + lambda x: ((x * self.ureg.NNLS).to("C") / self.ureg.elementary_charge).m, + lambda y: ((y * self.ureg.elementary_charge).to("NNLS")).m, ) else: if not self.ureg.NNLS.is_compatible_with(self.calib): msg = f"Unit [{self.calib}] not compatible with charge" self.logger.warning(msg) label = f"Charge ({self.calib})" - func = (lambda x: (x*self.ureg.NNLS).to(self.calib).m, - lambda y: (y*self.ureg(self.calib)).to("NNLS").m + func = ( + lambda x: (x * self.ureg.NNLS).to(self.calib).m, + lambda y: (y * self.ureg(self.calib)).to("NNLS").m, ) - + if is_y: secax_y = ax.secondary_yaxis("right", functions=func) secax_y.set_ylabel(label) @@ -609,7 +498,7 @@ def _add_charge_axis(self, ax: plt.Axes, is_y) -> None: secax_x = ax.secondary_xaxis("top", functions=func) secax_x.set_xlabel(label) - def _unit_converter(self, v,target_unit, constant = 1): + def _unit_converter(self, v, target_unit, constant=1): """ Take a value v and if its a Quantity apply conversion of the targeted unit. @@ -624,23 +513,26 @@ def _unit_converter(self, v,target_unit, constant = 1): dims = dict(v._units) # Decompose units dims = dict(v._units) - target_units = {k: d for k, d in dims.items() if self.ureg(k).is_compatible_with(target_unit)} - other_units = {k: d for k, d in dims.items() if not self.ureg(k).is_compatible_with(target_unit)} + target_units = { + k: d for k, d in dims.items() if self.ureg(k).is_compatible_with(target_unit) + } + other_units = { + k: d for k, d in dims.items() if not self.ureg(k).is_compatible_with(target_unit) + } # Start with dimensionless v_base = v / v.units # Multiply back non-target units for k, d in other_units.items(): - v_base *= self.ureg(k)**d + v_base *= self.ureg(k) ** d # Convert target units to target unit for k, d in target_units.items(): - v_base *= (self.ureg(k).to(target_unit))**d + v_base *= (self.ureg(k).to(target_unit)) ** d return v_base * constant - # ---------------------- # Signal to Noise Ratio (SNR) # ---------------------- @@ -653,27 +545,27 @@ def compute_snr(self) -> None: try: if info.get("voltage") == 0 * self.ureg.V: continue - noise =info.get("statistics").get("valley").get("amp") - + noise = info.get("statistics").get("valley").get("amp") + if info.get("status", "skipped") == "ok": signal = info.get("pe_peak_fit").get("amp") else: - signal =info.get("statistics").get("1st_pe_guess").get("amp") - run_snr[pmt] = 1- noise/signal + signal = info.get("statistics").get("1st_pe_guess").get("amp") + run_snr[pmt] = 1 - noise / signal except Exception as e: self.logger.warning( "Failed to compute SNR for run %s PMT %s: %s", run_name, pmt, e ) snr[run_name] = run_snr - self._save_results(snr,"snr") + self._save_results(snr, "snr") fig, ax = plt.subplots(figsize=A4_LANDSCAPE) for run_name, pmt_dict in snr.items(): pmts = sorted(pmt_dict.keys()) vals = [pmt_dict[p].n for p in pmts] errs = [pmt_dict[p].s for p in pmts] - ax.errorbar(pmts, vals, errs, label=run_name, fmt='o') + ax.errorbar(pmts, vals, errs, label=run_name, fmt="o") ax.set_xlabel("PMT") ax.set_ylabel("SNR (a.u.)") ax.set_title("SNR per PMT (= 1 - valley/peak)") @@ -694,7 +586,7 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None: run_dcr = {} for pmt, info in pmt_dict.items(): try: - if info.get("voltage") == 0*self.ureg.volt: + if info.get("voltage") == 0 * self.ureg.volt: continue stats = info.get("statistics", {}) runtime_info = info.get("runtime", {}) @@ -706,16 +598,18 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None: time_mode, ) continue - dcts = stats.get("1st_pe_fit_integral_below_valley")+ stats.get("cts_above_valley") + dcts = stats.get("1st_pe_fit_integral_below_valley") + stats.get( + "cts_above_valley" + ) runtime = runtime_info[time_mode] - run_dcr[pmt] = dcts/runtime - + run_dcr[pmt] = dcts / runtime + except Exception as e: self.logger.warning( "Failed to compute DCR for run %s PMT %s: %s", run_name, pmt, e ) dcr[run_name] = run_dcr - self._save_results(dcr,"dcr") + self._save_results(dcr, "dcr") fig, ax = plt.subplots(figsize=A4_LANDSCAPE) for run_name, pmt_dict in dcr.items(): @@ -762,15 +656,16 @@ def compute_linear_gain_fit(self) -> None: xunit = pmt["voltage"][0].units pmt["voltage"] = [i.to(xunit) for i in pmt["voltage"]] yunit = pmt["vals"][0].units - pmt["vals"] =[i.to(yunit) for i in pmt["vals"]] - ax.errorbar([i.m for i in pmt["voltage"]], - [i.n for i in pmt["vals"]], - [i.s for i in pmt["vals"]], - label=f"PMT {key}", - fmt="o" - ) - ax.set_xlabel(f"Voltage ({format(xunit,"~")})") - ax.set_ylabel(f"Gain ({format(yunit,"~")})") + pmt["vals"] = [i.to(yunit) for i in pmt["vals"]] + ax.errorbar( + [i.m for i in pmt["voltage"]], + [i.n for i in pmt["vals"]], + [i.s for i in pmt["vals"]], + label=f"PMT {key}", + fmt="o", + ) + ax.set_xlabel(f"Voltage ({format(xunit,'~')})") + ax.set_ylabel(f"Gain ({format(yunit,'~')})") if self.calib != "None": self._add_charge_axis(ax, True) @@ -778,7 +673,7 @@ def compute_linear_gain_fit(self) -> None: params, covariance = curve_fit( linear_func, [i.m for i in pmt["voltage"]], - [i.n for i in pmt["vals"]], + [i.n for i in pmt["vals"]], [i.s for i in pmt["vals"]], absolute_sigma=True, ) @@ -786,8 +681,8 @@ def compute_linear_gain_fit(self) -> None: perr = np.sqrt(np.diag(covariance)) x = np.linspace(-1 * b_opt / a_opt, max([i.m for i in pmt["voltage"]]) + 10, 1000) ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit") - tmp_dic[key]["a"] = self.ureg.Quantity(ufloat(a_opt,perr[0]),yunit/xunit) - tmp_dic[key]["b"] = self.ureg.Quantity(ufloat(b_opt,perr[1]),yunit) + tmp_dic[key]["a"] = self.ureg.Quantity(ufloat(a_opt, perr[0]), yunit / xunit) + tmp_dic[key]["b"] = self.ureg.Quantity(ufloat(b_opt, perr[1]), yunit) tmp_dic[key]["func"] = "G = a*voltage+b" pmt.pop("voltage") @@ -795,12 +690,12 @@ def compute_linear_gain_fit(self) -> None: ax.legend() pdf.savefig(fig) plt.close(fig) - - self._save_results(tmp_dic,"linear_gain") + + self._save_results(tmp_dic, "linear_gain") def calibrate_nnls_values(self, output_file: str): """ - Reads the current results.yaml, + Reads the current results.yaml, finds all entries that contain a Charge compatible quantities Applies charge conversion as per self.calib writes out calibrated file to output_file. @@ -809,23 +704,22 @@ def calibrate_nnls_values(self, output_file: str): msg = f"Unit [{self.calib}] not compatible with charge" self.logger.error(msg) return - + data = self._load_results() def convert_charge_units(d): """Recursively convert all Quantities compatible with charge to new_unit.""" - for k, v in d.items(): + for _, v in d.items(): if isinstance(v, dict): convert_charge_units(v) # recurse elif isinstance(v, self.ureg.Quantity): if self.calib == "gain": - self._unit_converter(v,'C',self.ureg.elementary_charge) + self._unit_converter(v, "C", self.ureg.elementary_charge) else: - self._unit_converter(v,self.calib) - + self._unit_converter(v, self.calib) # Convert all charge-compatible quantities to nanocoulombs - convert_charge_units(data) + convert_charge_units(data) try: with open(output_file, "w") as f: @@ -898,7 +792,7 @@ def main() -> None: args = parser.parse_args() - logger = setup_logging(level=logging.INFO) + logger = setup_logging(log_file=RESULT_DIR / "analysis.log", level=logging.INFO) try: analyzer = PESpectrumAnalyzer( logger=logger, @@ -918,7 +812,9 @@ def main() -> None: if args.compute_snr: analyzer.compute_snr() if args.calibrate != "None": - analyzer.calibrate_nnls_values(output_file=str(analyzer.result_yaml).replace(".yaml", "_calibrated.yaml")) + analyzer.calibrate_nnls_values( + output_file=str(analyzer.result_yaml).replace(".yaml", "_calibrated.yaml") + ) logger.info("Processing complete.") except Exception as e: diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py new file mode 100644 index 0000000..2298d7c --- /dev/null +++ b/src/mintanalysis/pmt/ana/utils.py @@ -0,0 +1,128 @@ +''' +Helper functions for ana tier +''' +from __future__ import annotations +import logging +import numpy as np +from pathlib import Path +from uncertainties import UFloat, ufloat + +def setup_logging( + log_file: Path = "analysis.log", level: int = logging.INFO +) -> logging.Logger: + logger = logging.getLogger("PESpectrum") + logger.setLevel(level) + logger.propagate = False + + if not logger.handlers: + fmt = logging.Formatter( + "[%(asctime)s] [%(name)s - %(funcName)s] [%(levelname)s] %(message)s" + ) + + sh = logging.StreamHandler() + sh.setLevel(level) + sh.setFormatter(fmt) + logger.addHandler(sh) + + fh = logging.FileHandler(log_file, mode="w") + fh.setLevel(level) + fh.setFormatter(fmt) + logger.addHandler(fh) + + return logger + +def get_physics_object(obj, ureg): + """ + Convert a given object recursevly + to a object with all {val,(err),unit} occurrences replaced with a pint (and ufloat) object + """ + if isinstance(obj, dict) and {"val", "unit"}.issubset(obj): + val = obj["val"] + if "err" in obj: + err = obj["err"] + return ureg.Quantity(ufloat(val, err), ureg(obj["unit"])) + return ureg.Quantity(val, ureg(obj["unit"])) + + if isinstance(obj, dict): + return {k: get_physics_object(v, ureg) for k, v in obj.items()} + + if isinstance(obj, (list, tuple)): + return type(obj)(get_physics_object(v, ureg) for v in obj) + + return obj + + +def quantity_to_dict(obj): + """ + Recursively replace a pint (and ufloat) object with a dict + {val, (err), unit} + """ + # The pint internal function is broken (see issue 2121) + preferred_units = ["Hz"] + if hasattr(obj, "magnitude") and hasattr(obj, "units"): + if preferred_units is not None: + for u in preferred_units: + if obj.is_compatible_with(u): + obj = obj.to(u) + mag = obj.magnitude + + if isinstance(mag, UFloat): + val = mag.n + err = mag.s + else: + val = mag + err = None + + return { + "val": float(val), + **({"err": float(err)} if err is not None else {}), + "unit": format(obj.units, "~"), + } + + if isinstance(obj, dict): + return {k: quantity_to_dict(v) for k, v in obj.items()} + + if isinstance(obj, (list, tuple)): + return type(obj)(quantity_to_dict(v) for v in obj) + + return obj + + +def linear_func(x, a, b): + return a * x + b + + +def gaussian(x: np.ndarray, amp: float, mu: float, sigma: float) -> np.ndarray: + return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + + +def poisson_nll(amp: float, mu: float, sigma: float, x: np.ndarray, y: np.ndarray) -> float: + expected = gaussian(x, amp, mu, sigma) + expected = np.clip(expected, 1e-10, None) + return float(np.sum(expected - y * np.log(expected))) + + +def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None: + """Return (peak_idx, valley_idx) or None.""" + n = len(y) + if n < 3: + return None + + peak = None + for i in range(1, n - 1): + if y[i] > y[i - 1] and y[i] > y[i + 1]: + peak = i + break + if peak is None: + return None + + valley = peak + i = peak + 1 + while i < n: + if y[i] > y[i - 1]: + return peak, valley + if y[i] < y[valley]: + valley = i + i += 1 + + return None \ No newline at end of file From d7af52f759618917a02445619ddd5f8aded776e2 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Wed, 10 Dec 2025 16:46:23 -0800 Subject: [PATCH 19/32] First commit on uploader script --- pyproject.toml | 1 + src/mintanalysis/pmt/ana/__init__.py | 3 +- src/mintanalysis/pmt/ana/uploadToDB.py | 354 +++++++++++++++++++++++++ src/mintanalysis/pmt/ana/utils.py | 17 +- 4 files changed, 367 insertions(+), 8 deletions(-) create mode 100644 src/mintanalysis/pmt/ana/uploadToDB.py diff --git a/pyproject.toml b/pyproject.toml index c70749a..0d80565 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ docs = [ [project.scripts] # Format: = ":" build_ana = "mintanalysis.pmt.ana.peSpectrumAnalyzer:main" +upload_ana = "mintanalysis.pmt.ana.uploadToDB:main" #[[tool.uv.index]] # Optional name for the index. diff --git a/src/mintanalysis/pmt/ana/__init__.py b/src/mintanalysis/pmt/ana/__init__.py index 037a007..3d5417f 100644 --- a/src/mintanalysis/pmt/ana/__init__.py +++ b/src/mintanalysis/pmt/ana/__init__.py @@ -2,6 +2,7 @@ Routines for the final analysis of PMT data """ +from . import uploadToDB from .peSpectrumAnalyzer import PESpectrumAnalyzer -__all__ = ["PESpectrumAnalyzer"] +__all__ = ["PESpectrumAnalyzer", "uploadToDB"] diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py new file mode 100644 index 0000000..a3a2fd6 --- /dev/null +++ b/src/mintanalysis/pmt/ana/uploadToDB.py @@ -0,0 +1,354 @@ +""" +Takes selective data from the ANA tier and uploads it to the mongoDB +mongoDB structure has the shape: +""" + +from __future__ import annotations + +import argparse +import logging +from dataclasses import dataclass +from pathlib import Path +from typing import Any, List + +import yaml +from pint import UnitRegistry + +from .utils import get_physics_object, quantity_to_dict, setup_logging + +ROOT_DIR = Path("/home/pkrause/software/mint-analysis/debug_out") + +# binary channel mask to upload +# 0x0001 is PMT 1 only +# 0x0002 is PMT 2 only +# 0x0003 is PMT 1,2 +# 0x0004 is PMT 3 only + + +NumberOrList = float | List[float] +StrOrList = str | List[str] + + +@dataclass +class Mapping: + parameters: list[str] + func: str + + +@dataclass +class PMTInfo: + v10_in_volt: NumberOrList + di10_in_mA: NumberOrList + frac: NumberOrList + + +@dataclass +class EnvironmentInfo: + temperature_in_celsius: NumberOrList + pressure_in_hpa: NumberOrList + humidity_in_percent: NumberOrList + measurement_duration_in_s: NumberOrList + + +@dataclass +class SoftwareInfo: + framework: str + pe_reconstruction: str # e.g. "NNLS" | "Integral" + sftp_path: str + run_tags: StrOrList + + +@dataclass +class MeasurementResult: + measurement_type: str + measurement_location: str + devices_used: Any # TODO: Loosely typed right now, can be tightened with mongoDB object? + + result: NumberOrList + result_unc: NumberOrList + units: StrOrList + + pmt_info: PMTInfo + env_info: EnvironmentInfo + software_info: SoftwareInfo + + mapping: Mapping | None = None + + def __post_init__(self): + # ---- Type consistency: list or scalar ---- + is_list_result = isinstance(self.result, list) + is_list_unc = isinstance(self.result_unc, list) + is_list_units = isinstance(self.units, list) + + if len({is_list_result, is_list_unc, is_list_units}) != 1: + msg = "result, result_unc, and units must all be either lists or scalars" + raise ValueError(msg) + + # ---- Mapping existence rule ---- + if self.mapping is not None and not is_list_result: + msg = "mapping may only be provided when result, result_unc, and units are lists" + raise ValueError() + + # ---- Length consistency (only for list case) ---- + if is_list_result: + lengths = { + len(self.result), + len(self.result_unc), + len(self.units), + } + + if self.mapping is not None: + lengths.add(len(self.mapping.parameters)) + + if len(lengths) != 1: + msg = "result, result_unc, units, and mapping.parameters must all have same length" + raise ValueError(msg) + + +def get_values( + obj: Any, +) -> tuple[NumberOrList, NumberOrList, StrOrList, Mapping | None, list | None]: + if hasattr(obj, "magnitude") and hasattr(obj, "units"): + ret = quantity_to_dict(obj) + if "err" not in ret: + return ret["val"], 0, ret["unit"], None, None + return ret["val"], ret["err"], ret["unit"], None, None + # else we have multiple results (e.g. gain fit) + if isinstance(obj, dict): + ret = quantity_to_dict(obj) + vals = [] + errs = [] + units = [] + params = [] + for k, v in ret.items(): + if not isinstance(v, dict): + continue + vals.append(v["val"]) + errs.append(v.get("err", 0)) + units.append(v["unit"]) + params.append(k) + + return ( + vals, + errs, + units, + Mapping(params, ret.get("func", "No mapping function provided")), + ret.get("used_keys", []), + ) + + msg = f"{type(obj)} not supported" + raise NotImplementedError(msg) + + +def get_pmt_info(channel: int, aux_dict: dict, keys: str | list) -> PMTInfo: + + if isinstance(keys, str): + v10 = aux_dict[keys][channel]["v10"] + di10 = aux_dict[keys][channel]["di10"] + frac = aux_dict[keys][channel]["frac"] + return PMTInfo(v10.to("V").m, di10.to("mA").m, frac.m) + + if isinstance(keys, list): + v10 = [] + di10 = [] + frac = [] + for k in keys: + v10.append(aux_dict[k][channel]["v10"].to("V").m) + di10.append(aux_dict[k][channel]["di10"].to("mA").m) + frac.append(aux_dict[k][channel]["frac"].m) + return PMTInfo(v10, di10, frac) + + msg = f"{type(keys)} not supported" + raise NotImplementedError(msg) + + +def get_env_info(aux_dict: dict, keys: str | list) -> EnvironmentInfo: + """ + Return average env info across runs + """ + if isinstance(keys, str): + pth_start = aux_dict[keys]["pth_start"] + pth_end = aux_dict[keys]["pth_end"] + t = (pth_end["temperature"].to("degC").m + pth_start["temperature"].to("degC").m) / 2 + p = (pth_end["pressure"].to("hPa").m + pth_start["pressure"].to("hPa").m) / 2 + h = (pth_end["humidity"].to("percent").m + pth_start["humidity"].to("percent").m) / 2 + m = aux_dict[keys]["runtime"].to("seconds").m + return EnvironmentInfo(t, p, h, m) + + if isinstance(keys, list): + t, p, h, m = [], [], [], [] + for key in keys: + pth_start = aux_dict[key]["pth_start"] + pth_end = aux_dict[key]["pth_end"] + t.append( + (pth_end["temperature"].to("degC").m + pth_start["temperature"].to("degC").m) / 2 + ) + p.append((pth_end["pressure"].to("hPa").m + pth_start["pressure"].to("hPa").m) / 2) + h.append( + (pth_end["humidity"].to("percent").m + pth_start["humidity"].to("percent").m) / 2 + ) + m.append(aux_dict[key]["runtime"].to("seconds").m) + + return EnvironmentInfo(t, p, h, m) + + msg = f"{type(keys)} not supported" + raise NotImplementedError(msg) + + +def auto_int(x): + """ + Custom type converter for argparse that attempts to convert a string + to an integer, supporting both decimal and hexadecimal (prefixed with '0x'). + """ + try: + # Attempt to convert as a decimal integer + return int(x) + except ValueError: + try: + # If decimal conversion fails, attempt to convert as hexadecimal + # The base 0 in int(x, 0) handles '0x' prefix automatically + return int(x, 0) + except ValueError as e: + msg = f"Invalid integer or hexadecimal value: '{x}'" + raise argparse.ArgumentTypeError(msg) from e + + +def main(): + logger = setup_logging(level=logging.INFO) + ureg = UnitRegistry() + + parser = argparse.ArgumentParser(description="Upload analysis results to mongoDB.") + parser.add_argument("-x", "--f_aux", help="Path to aux file", required=True) + parser.add_argument("-a", "--f_ana", help="Path to ana file", required=True) + parser.add_argument( + "-c", + "--ch_mask", + type=auto_int, + help="Channel mask (can be decimal or hexadecimal, e.g., 10 or 0xA)", + required=True, + ) + parser.add_argument( + "-m", + "--measurement", + choices=["dcr", "linear_gain", "snr"], + help="Measurement to upload", + required=True, + ) + parser.add_argument( + "-k", + "--key", + default=None, + help="key to upload (will be ignored for linear_gain measurement)", + ) + parser.add_argument( + "-r", "--reco", default="NNLS", help="Reconstruction algorithm used in DSP" + ) + + args = parser.parse_args() + result_yaml = args.f_ana + aux_file = args.f_aux + measurement = args.measurement + reco = args.reco + ch_mask = args.ch_mask + tags = args.key + + # TODO For debug only. By definition keys need to be identical in aux and result files + tag_aux = "2025_12_02_22_46_06" + if measurement == "linear_gain": + tag_aux = ["2025_12_02_22_46_06", "2025_12_02_22_46_58"] + + sftp_pmt_dir = "PMT" + sftp_base_dir = Path("/mint/mint-data/") + p = Path(aux_file) + try: + index = p.parts.index(sftp_pmt_dir) + sftp_root_dir = Path(p.parts[index - 1]) + sftp_dir = str(sftp_base_dir / sftp_root_dir / sftp_pmt_dir) + except ValueError: + msg = f"'{sftp_pmt_dir}' not found in the aux path." + logger.error(msg) + + try: + with open(result_yaml) as f: + data = yaml.safe_load(f) + data = get_physics_object(data, ureg) + except Exception as e: + msg = f"Failed to load file: {e}" + logger.error(msg) + raise + try: + with open(aux_file) as f: + aux = yaml.safe_load(f) + aux = get_physics_object(aux, ureg) + except Exception as e: + msg = f"Failed to load file: {e}" + logger.error(msg) + raise + + if measurement not in data: + msg = f"{measurement} measurement not in result file" + logger.error(msg) + return + + if measurement == "linear_gain": + if tags is not None: + logger.warning( + "Tags are defined redundandly. Using tags defined in linear gain results" + ) + tags = data[measurement].get("used_keys") + if len(tags) < 3: + logger.warning("Less than three measurements found. Linear fit result will be bogus") + + while ch_mask: + ch = (ch_mask & -ch_mask).bit_length() - 1 + pmt_no = ch + 1 + + # collect value(s) + vals, errs, units, mapping, _keys = get_values( + data[measurement][pmt_no] + if measurement == "linear_gain" + else data[measurement][tags][pmt_no] + ) + logger.info("collected measurement results") + + # collect pmt info + pmt_info = get_pmt_info(pmt_no, aux, tag_aux) + logger.info("collected PMT info") + + # get environment info + env_info = get_env_info(aux, tag_aux) + logger.info("collected environment info") + + sw_info = SoftwareInfo( + framework="mint-xyz", pe_reconstruction=reco, sftp_path=sftp_dir, run_tags=tags + ) + logger.info("collected software info") + + # get used devices + pmt_devices = f"pmt_{pmt_no}" + + # Build measurement result + res = MeasurementResult( + measurement_type=measurement, + measurement_location="MINT", + devices_used=pmt_devices, + result=vals, + result_unc=errs, + units=units, + mapping=mapping, + pmt_info=pmt_info, + env_info=env_info, + software_info=sw_info, + ) + logger.info("Build data object") + + # TODO Upload to database + + ch_mask &= ch_mask - 1 # clear lowest set bit + + +# -------------------------- +# CLI entrypoint +# -------------------------- +if __name__ == "__main__": + main() diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py index 2298d7c..8db3c87 100644 --- a/src/mintanalysis/pmt/ana/utils.py +++ b/src/mintanalysis/pmt/ana/utils.py @@ -1,15 +1,17 @@ -''' +""" Helper functions for ana tier -''' +""" + from __future__ import annotations + import logging -import numpy as np from pathlib import Path + +import numpy as np from uncertainties import UFloat, ufloat -def setup_logging( - log_file: Path = "analysis.log", level: int = logging.INFO -) -> logging.Logger: + +def setup_logging(log_file: Path = "analysis.log", level: int = logging.INFO) -> logging.Logger: logger = logging.getLogger("PESpectrum") logger.setLevel(level) logger.propagate = False @@ -31,6 +33,7 @@ def setup_logging( return logger + def get_physics_object(obj, ureg): """ Convert a given object recursevly @@ -125,4 +128,4 @@ def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None: valley = i i += 1 - return None \ No newline at end of file + return None From 79d2827b8b6087891149eba5271774b49c7aa1bf Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 13 Dec 2025 01:59:36 -0800 Subject: [PATCH 20/32] Move to Paths relative to aux file --- .../pmt/ana/peSpectrumAnalyzer.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py index 6b87d28..90180cf 100644 --- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py +++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py @@ -42,12 +42,6 @@ valley_index_strict, ) -# -------------------------- -# TODO For Debugging only! -# -------------------------- -RAW_DIR = Path("/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020") -RESULT_DIR = Path("/home/pkrause/software/mint-analysis/debug_out") - # -------------------------- # Constants # -------------------------- @@ -86,9 +80,9 @@ def __init__( ) -> None: self.aux_yaml = aux_yaml self.keys = keys - self.raw_dir = RAW_DIR - self.plot_folder = RESULT_DIR / "plots" - self.result_yaml = RESULT_DIR / "results.yaml" + + self.plot_folder = self.aux_yaml.parent / "../ana/plots" + self.result_yaml = self.aux_yaml.parent / "../ana/results.yaml" self.bin_size = bin_size self.bins = np.arange(-100, 10000, bin_size) self.lim = lim @@ -189,8 +183,9 @@ def run(self) -> None: # ---------------------- def analyze_run(self, run_name: str, meta: dict[str, Any]) -> dict[int, dict[str, Any]]: # build file paths - fname = meta["daq"].split("/")[-1].replace("daq", "r020").replace("data", "lh5") - f_raw = self.raw_dir / fname + f_raw = self.aux_yaml.parent / Path( + meta["daq"].replace("daq", "raw").replace("data", "lh5") + ) if not f_raw.exists(): msg = f"Raw file for run {run_name} not found: {f_raw}" raise FileNotFoundError(msg) @@ -792,7 +787,9 @@ def main() -> None: args = parser.parse_args() - logger = setup_logging(log_file=RESULT_DIR / "analysis.log", level=logging.INFO) + logger = setup_logging( + log_file=Path(args.aux_file).parent / "../ana/analysis.log", level=logging.INFO + ) try: analyzer = PESpectrumAnalyzer( logger=logger, From 523e00295ba6f96e71848512ca8ecad901e0a94c Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Sat, 13 Dec 2025 02:13:12 -0800 Subject: [PATCH 21/32] Switch pe peak initial guess algorithm, add noise peak info to stats --- src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py index 90180cf..e7713fc 100644 --- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py +++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py @@ -305,16 +305,15 @@ def process_channel( noise_peak, valley_idx = vi # find first p.e. peak after noise_peak - sub = n[noise_peak:] - pe_vi = valley_index_strict(sub) + sub = n[valley_idx:] + pe_vi = np.argmax(sub) if pe_vi is None: msg = "1st-p.e. detection failed after noise peak." self.logger.warning("Run %s ch %s: %s", run_name, ch, msg) self._decorate_axis(ax) return fig, {"status": "skipped", "reason": msg} - pe_peak_rel, _ = pe_vi - pe_peak_idx = noise_peak + pe_peak_rel + pe_peak_idx = valley_idx + pe_vi # annotate choices ax.axvline(bin_centers[valley_idx], color="red", ls="--", label="valley") @@ -393,6 +392,12 @@ def process_channel( "total_cts": self.ureg.Quantity( ufloat(np.sum(n), np.sum(n) ** 0.5), "dimensionless" ), + "noise_peak": { + "pos": float(bin_centers[noise_peak]) * self.ureg.NNLS, + "amp": self.ureg.Quantity( + ufloat(n[noise_peak], n[noise_peak] ** 0.5), "dimensionless" + ), + }, "valley": { "pos": float(bin_centers[valley_idx]) * self.ureg.NNLS, "amp": self.ureg.Quantity( From 0a860df65561139aa70c45bbb3f15098959ca16d Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Dec 2025 10:39:55 -0800 Subject: [PATCH 22/32] Split out utility functions --- analysis.log | 7 +++++++ src/mintanalysis/pmt/ana/utils.py | 25 +++++++++++-------------- 2 files changed, 18 insertions(+), 14 deletions(-) create mode 100644 analysis.log diff --git a/analysis.log b/analysis.log new file mode 100644 index 0000000..dd93656 --- /dev/null +++ b/analysis.log @@ -0,0 +1,7 @@ +[2025-12-10 16:29:52,739] [PESpectrum - main] [WARNING] Tags are defined redundandly. Using tags defined in linear gain results +[2025-12-10 16:29:52,739] [PESpectrum - main] [WARNING] Less than three measurments found. Linear fit result will be bogous +[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected measurement results +[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected PMT info +[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected environment info +[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected software info +[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] Build data object diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py index 8db3c87..5d02f87 100644 --- a/src/mintanalysis/pmt/ana/utils.py +++ b/src/mintanalysis/pmt/ana/utils.py @@ -1,17 +1,15 @@ -""" +''' Helper functions for ana tier -""" - +''' from __future__ import annotations - import logging -from pathlib import Path - import numpy as np +from pathlib import Path from uncertainties import UFloat, ufloat - -def setup_logging(log_file: Path = "analysis.log", level: int = logging.INFO) -> logging.Logger: +def setup_logging( + log_file: Path = "analysis.log", level: int = logging.INFO +) -> logging.Logger: logger = logging.getLogger("PESpectrum") logger.setLevel(level) logger.propagate = False @@ -33,7 +31,6 @@ def setup_logging(log_file: Path = "analysis.log", level: int = logging.INFO) -> return logger - def get_physics_object(obj, ureg): """ Convert a given object recursevly @@ -55,7 +52,7 @@ def get_physics_object(obj, ureg): return obj -def quantity_to_dict(obj): +def quantity_to_dict(obj,val_key="val", err_key="err", unit_key="unit"): """ Recursively replace a pint (and ufloat) object with a dict {val, (err), unit} @@ -77,9 +74,9 @@ def quantity_to_dict(obj): err = None return { - "val": float(val), - **({"err": float(err)} if err is not None else {}), - "unit": format(obj.units, "~"), + val_key: float(val), + **({err_key: float(err)} if err is not None else {}), + unit_key: format(obj.units, "~"), } if isinstance(obj, dict): @@ -128,4 +125,4 @@ def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None: valley = i i += 1 - return None + return None \ No newline at end of file From 9df71d42f1b58158a150e3a832c37a3c5579b776 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 18:40:39 +0000 Subject: [PATCH 23/32] style: pre-commit fixes --- src/mintanalysis/pmt/ana/utils.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py index 5d02f87..f7cc532 100644 --- a/src/mintanalysis/pmt/ana/utils.py +++ b/src/mintanalysis/pmt/ana/utils.py @@ -1,15 +1,17 @@ -''' +""" Helper functions for ana tier -''' +""" + from __future__ import annotations + import logging -import numpy as np from pathlib import Path + +import numpy as np from uncertainties import UFloat, ufloat -def setup_logging( - log_file: Path = "analysis.log", level: int = logging.INFO -) -> logging.Logger: + +def setup_logging(log_file: Path = "analysis.log", level: int = logging.INFO) -> logging.Logger: logger = logging.getLogger("PESpectrum") logger.setLevel(level) logger.propagate = False @@ -31,6 +33,7 @@ def setup_logging( return logger + def get_physics_object(obj, ureg): """ Convert a given object recursevly @@ -52,7 +55,7 @@ def get_physics_object(obj, ureg): return obj -def quantity_to_dict(obj,val_key="val", err_key="err", unit_key="unit"): +def quantity_to_dict(obj, val_key="val", err_key="err", unit_key="unit"): """ Recursively replace a pint (and ufloat) object with a dict {val, (err), unit} @@ -125,4 +128,4 @@ def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None: valley = i i += 1 - return None \ No newline at end of file + return None From 50be2d5f086f8149a67c16c87ddec2353d7ff589 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Dec 2025 10:55:43 -0800 Subject: [PATCH 24/32] Create dir if not existing --- src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py index e7713fc..fd2e6b2 100644 --- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py +++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py @@ -792,9 +792,10 @@ def main() -> None: args = parser.parse_args() - logger = setup_logging( - log_file=Path(args.aux_file).parent / "../ana/analysis.log", level=logging.INFO - ) + f_log = Path(args.aux_file).parent / "../ana/analysis.log" + f_log.parent.mkdir(parents=True, exist_ok=True) + + logger = setup_logging(log_file=f_log, level=logging.INFO) try: analyzer = PESpectrumAnalyzer( logger=logger, From 4efe039ecf12f241155664481eb9e445d70798b5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Mon, 15 Dec 2025 15:06:35 -0800 Subject: [PATCH 25/32] edited uploader --- analysis.log | 23 ++-- .../pmt/ana/peSpectrumAnalyzer.py | 9 ++ src/mintanalysis/pmt/ana/uploadToDB.py | 102 ++++++++++++++---- src/mintanalysis/pmt/ana/utils.py | 4 +- 4 files changed, 111 insertions(+), 27 deletions(-) diff --git a/analysis.log b/analysis.log index dd93656..2041109 100644 --- a/analysis.log +++ b/analysis.log @@ -1,7 +1,16 @@ -[2025-12-10 16:29:52,739] [PESpectrum - main] [WARNING] Tags are defined redundandly. Using tags defined in linear gain results -[2025-12-10 16:29:52,739] [PESpectrum - main] [WARNING] Less than three measurments found. Linear fit result will be bogous -[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected measurement results -[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected PMT info -[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected environment info -[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected software info -[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] Build data object +[2025-12-15 15:05:54,438] [PESpectrum - main] [INFO] collected measurement results +[2025-12-15 15:05:54,438] [PESpectrum - main] [INFO] collected PMT info +[2025-12-15 15:05:54,439] [PESpectrum - main] [INFO] collected environment info +[2025-12-15 15:05:54,439] [PESpectrum - main] [INFO] collected software info +[2025-12-15 15:05:58,512] [PESpectrum - _connect_tunnel] [INFO] [info] SSH tunnel established: 127.0.0.1:43307 -> 127.0.0.1:27017 via production.pacific-neutrino.org +[2025-12-15 15:05:58,620] [PESpectrum - _connect_client] [INFO] [info] Connected to MongoDB at 127.0.0.1:43307 with user p-one +[2025-12-15 15:05:58,753] [PESpectrum - main] [INFO] Build data object +[2025-12-15 15:05:58,753] [PESpectrum - main] [INFO] The following dict would be uploaded to the DB: {'measurement_type': 'dcr', 'measurement_location': 'MINT', 'devices_used': ['p-1-1-pmt-unit-KM56674_275', 'p-1-1-om-hs-01'], 'result': 26463.90079865678, 'result_unc': 30.15089782382669, 'units': 'Hz', 'pmt_info': {'v10_in_volt': 85.09310150146484, 'di10_in_mA': 2.9245901107788086, 'frac': 0.877996027469635}, 'env_info': {'temperature_in_celsius': 29.11357307688087, 'pressure_in_hpa': 1011.6844068085361, 'humidity_in_percent': 23.183785204174583, 'measurement_duration_in_s': 29.110747814178467}, 'software_info': {'framework': 'mint-xyz', 'pe_reconstruction': 'NNLS', 'sftp_path': '/mint/mint-data/p-1-1-om-hs-01/PMT', 'run_tags': '2025_12_13_03_31_52'}, 'mapping': None} +[2025-12-15 15:05:58,753] [PESpectrum - main] [INFO] collected measurement results +[2025-12-15 15:05:58,754] [PESpectrum - main] [INFO] collected PMT info +[2025-12-15 15:05:58,754] [PESpectrum - main] [INFO] collected environment info +[2025-12-15 15:05:58,754] [PESpectrum - main] [INFO] collected software info +[2025-12-15 15:05:59,167] [PESpectrum - _connect_tunnel] [INFO] [info] SSH tunnel established: 127.0.0.1:38145 -> 127.0.0.1:27017 via production.pacific-neutrino.org +[2025-12-15 15:05:59,242] [PESpectrum - _connect_client] [INFO] [info] Connected to MongoDB at 127.0.0.1:38145 with user p-one +[2025-12-15 15:05:59,379] [PESpectrum - main] [INFO] Build data object +[2025-12-15 15:05:59,380] [PESpectrum - main] [INFO] The following dict would be uploaded to the DB: {'measurement_type': 'dcr', 'measurement_location': 'MINT', 'devices_used': ['p-1-1-pmt-unit-KM54356_23', 'p-1-1-om-hs-01'], 'result': 26326.463667634333, 'result_unc': 30.07250335177778, 'units': 'Hz', 'pmt_info': {'v10_in_volt': 87.58090209960938, 'di10_in_mA': 1.8282999992370605, 'frac': 0.8174149990081787}, 'env_info': {'temperature_in_celsius': 29.11357307688087, 'pressure_in_hpa': 1011.6844068085361, 'humidity_in_percent': 23.183785204174583, 'measurement_duration_in_s': 29.110747814178467}, 'software_info': {'framework': 'mint-xyz', 'pe_reconstruction': 'NNLS', 'sftp_path': '/mint/mint-data/p-1-1-om-hs-01/PMT', 'run_tags': '2025_12_13_03_31_52'}, 'mapping': None} diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py index fd2e6b2..b98e19c 100644 --- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py +++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py @@ -107,6 +107,15 @@ def __init__( self.ureg.define(f"NNLS = {nnls_coloumb_factor.to('coulomb').magnitude} * coulomb") self.ureg.define(f"ADC = {usr.magnitude}*NNLS") + cal_dict = { + "vadc": vadc, + "upsampling_ratio": usr, + "renormalization_factor": rf, + "sampling_time": st, + "adc_impedance": imp, + } + self._save_results(cal_dict, "calibration_constants") + # ---------------------- # I/O helpers # ---------------------- diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py index a3a2fd6..3ea8d5d 100644 --- a/src/mintanalysis/pmt/ana/uploadToDB.py +++ b/src/mintanalysis/pmt/ana/uploadToDB.py @@ -7,12 +7,14 @@ import argparse import logging -from dataclasses import dataclass +from dataclasses import asdict, dataclass from pathlib import Path from typing import Any, List import yaml +from andromeda.testing.devices.production_database import ProductionDatabase from pint import UnitRegistry +from pint.errors import UndefinedUnitError from .utils import get_physics_object, quantity_to_dict, setup_logging @@ -220,6 +222,9 @@ def main(): parser = argparse.ArgumentParser(description="Upload analysis results to mongoDB.") parser.add_argument("-x", "--f_aux", help="Path to aux file", required=True) parser.add_argument("-a", "--f_ana", help="Path to ana file", required=True) + parser.add_argument( + "-d", "--dry", action="store_true", help="Perform a dry run only (i.e do not upload to DB)" + ) parser.add_argument( "-c", "--ch_mask", @@ -250,13 +255,15 @@ def main(): measurement = args.measurement reco = args.reco ch_mask = args.ch_mask - tags = args.key - - # TODO For debug only. By definition keys need to be identical in aux and result files - tag_aux = "2025_12_02_22_46_06" - if measurement == "linear_gain": - tag_aux = ["2025_12_02_22_46_06", "2025_12_02_22_46_58"] - + tag = args.key + db_opts = { + "use_tunnel": True, + "ssh_keyring_service": "mongo-prod", + "mongo_keyring_service": "mongo-prod", + "logger": logger.info, + } + + # Build SFTP directory sftp_pmt_dir = "PMT" sftp_base_dir = Path("/mint/mint-data/") p = Path(aux_file) @@ -268,14 +275,40 @@ def main(): msg = f"'{sftp_pmt_dir}' not found in the aux path." logger.error(msg) + # Load measurement data try: with open(result_yaml) as f: data = yaml.safe_load(f) - data = get_physics_object(data, ureg) except Exception as e: msg = f"Failed to load file: {e}" logger.error(msg) raise + + # 'calibration_constants' in aux file define NNLS + if "calibration_constants" in data: + calib_data = get_physics_object(data["calibration_constants"], ureg) + nnls_coloumb_factor = ( + calib_data.get("vadc") + * calib_data.get("upsampling_ratio") + * calib_data.get("sampling_time") + * calib_data.get("renormalization_factor") + ) / calib_data.get("adc_impedance") + ureg.define(f"NNLS = {nnls_coloumb_factor.to('coulomb').magnitude} * coulomb") + ureg.define(f"ADC = {calib_data.get('upsampling_ratio').magnitude}*NNLS") + + # if no calibration values passed, try to load data without NNLS parameters + # if NNLS is in the tagged measurement, raise error. + try: + data = get_physics_object(data, ureg) + except UndefinedUnitError as e: + if "NNLS" in str(e): + msg = "Unit NNLS not defined. Trying to load reduced data set" + logger.warning(msg) + data = {measurement: get_physics_object(data[measurement], ureg)} + else: + raise e + + # Load aux file try: with open(aux_file) as f: aux = yaml.safe_load(f) @@ -285,20 +318,39 @@ def main(): logger.error(msg) raise + # check measurement sanity if measurement not in data: msg = f"{measurement} measurement not in result file" logger.error(msg) return + tags_aux = list(aux.keys()) if measurement == "linear_gain": - if tags is not None: + if tag is not None: logger.warning( "Tags are defined redundandly. Using tags defined in linear gain results" ) - tags = data[measurement].get("used_keys") - if len(tags) < 3: + tag = data[measurement].get("used_keys") + if len(tag) < 3: logger.warning("Less than three measurements found. Linear fit result will be bogus") + if not set(tag).issubset(set(tags_aux)): + msg = f"Tag {tag} not found in the aux file" + logger.error(msg) + raise ValueError + + else: + # Check key sanity + tags_data = list(data[measurement].keys()) + if tag not in tags_aux: + msg = f"Tag {tag} not found in the aux file" + logger.error(msg) + raise ValueError + if tag not in tags_data: + msg = f"Tag {tag} not found in the result file" + logger.error(msg) + raise ValueError + while ch_mask: ch = (ch_mask & -ch_mask).bit_length() - 1 pmt_no = ch + 1 @@ -307,31 +359,36 @@ def main(): vals, errs, units, mapping, _keys = get_values( data[measurement][pmt_no] if measurement == "linear_gain" - else data[measurement][tags][pmt_no] + else data[measurement][tag][pmt_no] ) logger.info("collected measurement results") # collect pmt info - pmt_info = get_pmt_info(pmt_no, aux, tag_aux) + pmt_info = get_pmt_info(pmt_no, aux, tag) logger.info("collected PMT info") # get environment info - env_info = get_env_info(aux, tag_aux) + env_info = get_env_info(aux, tag) logger.info("collected environment info") sw_info = SoftwareInfo( - framework="mint-xyz", pe_reconstruction=reco, sftp_path=sftp_dir, run_tags=tags + framework="mint-xyz", pe_reconstruction=reco, sftp_path=sftp_dir, run_tags=tag ) logger.info("collected software info") # get used devices - pmt_devices = f"pmt_{pmt_no}" + # Get settings from DB + # TODO replace with module level logic + with ProductionDatabase(**db_opts) as db: + overview = db.get_hemisphere_overview(str(sftp_root_dir), "tum") + pmt_name = overview.get("pmt-unit").get(str(pmt_no)).get("uid") + hemisphere = str(sftp_root_dir) # Build measurement result res = MeasurementResult( measurement_type=measurement, measurement_location="MINT", - devices_used=pmt_devices, + devices_used=[pmt_name, hemisphere], result=vals, result_unc=errs, units=units, @@ -342,7 +399,14 @@ def main(): ) logger.info("Build data object") - # TODO Upload to database + # Upload to database + if args.dry: + msg = f"The following dict would be uploaded to the DB: {asdict(res)}" + logger.info(msg) + else: + with ProductionDatabase(**db_opts) as db: + db.client["mint"]["Measurements_Pmt"].insert_one(asdict(res)) + msg = f"Uploaded document {asdict(res)} to mint/Measurements_Pmt" ch_mask &= ch_mask - 1 # clear lowest set bit diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py index f7cc532..550621f 100644 --- a/src/mintanalysis/pmt/ana/utils.py +++ b/src/mintanalysis/pmt/ana/utils.py @@ -79,7 +79,9 @@ def quantity_to_dict(obj, val_key="val", err_key="err", unit_key="unit"): return { val_key: float(val), **({err_key: float(err)} if err is not None else {}), - unit_key: format(obj.units, "~"), + unit_key: ( + str(obj.units) if obj.units.is_compatible_with("ohm") else format(obj.units, "~") + ), } if isinstance(obj, dict): From b5e0c25b5592f9f8b97c3a19936bd8b8a3c9155d Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Dec 2025 11:21:17 -0800 Subject: [PATCH 26/32] Switch from 1 indexed to 0 indexed aux files --- .../pmt/ana/peSpectrumAnalyzer.py | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py index b98e19c..bfc50da 100644 --- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py +++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py @@ -209,20 +209,20 @@ def analyze_run(self, run_name: str, meta: dict[str, Any]) -> dict[int, dict[str # collect figures and write once with PdfPages(pdf_path) as pdf: for ch in lh5.ls(f_dsp): - pmt = int(ch[2:]) + 1 - self.logger.info("Run %s - channel %s (PMT %d)", run_name, ch, pmt) + ch_idx = int(ch[2:]) + self.logger.info("Run %s - channel %s (PMT %d)", run_name, ch, ch_idx + 1) try: - fig, chan_data = self.process_channel(run_name, ch, pmt, meta, f_raw, f_dsp) + fig, chan_data = self.process_channel(run_name, ch, meta, f_raw, f_dsp) # fig may be None if plotting skipped if fig is not None: pdf.savefig(fig) plt.close(fig) - run_results[pmt] = chan_data + run_results[ch_idx] = chan_data except Exception as exc: self.logger.exception( "Channel-level error run=%s ch=%s: %s", run_name, ch, exc ) - run_results[pmt] = {"status": "error", "reason": str(exc)} + run_results[ch_idx] = {"status": "error", "reason": str(exc)} self.logger.info("Wrote PDF for run %s to %s", run_name, pdf_path) return run_results @@ -234,7 +234,6 @@ def process_channel( self, run_name: str, ch: str, - pmt: int, meta: dict[str, Any], f_raw: Path, f_dsp: Path, @@ -245,7 +244,7 @@ def process_channel( """ result: dict[str, Any] = {} ch_idx = int(ch[2:]) - result["voltage"] = meta[ch_idx + 1]["v10"] + result["voltage"] = meta[ch_idx]["v10"] # load data try: @@ -269,7 +268,7 @@ def process_channel( pe_vals, bins=self.bins, histtype="step", - label=f"channel {ch} (PMT {pmt}) at {result['voltage'].magnitude:.2f}" + label=f"channel {ch} (PMT {ch_idx+1}) at {result['voltage'].magnitude:.2f}" f" {format(result['voltage'].units,'~')}", ) @@ -563,7 +562,7 @@ def compute_snr(self) -> None: run_snr[pmt] = 1 - noise / signal except Exception as e: self.logger.warning( - "Failed to compute SNR for run %s PMT %s: %s", run_name, pmt, e + "Failed to compute SNR for run %s channel %s: %s", run_name, pmt, e ) snr[run_name] = run_snr @@ -575,9 +574,9 @@ def compute_snr(self) -> None: vals = [pmt_dict[p].n for p in pmts] errs = [pmt_dict[p].s for p in pmts] ax.errorbar(pmts, vals, errs, label=run_name, fmt="o") - ax.set_xlabel("PMT") + ax.set_xlabel("Channel") ax.set_ylabel("SNR (a.u.)") - ax.set_title("SNR per PMT (= 1 - valley/peak)") + ax.set_title("SNR per Channel (= 1 - valley/peak)") ax.legend() plt.tight_layout() plot_path = self.plot_folder / "snr_plot.png" @@ -601,7 +600,7 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None: runtime_info = info.get("runtime", {}) if time_mode not in runtime_info: self.logger.warning( - "Run %s PMT %s: missing runtime '%s'; skipping DCR.", + "Run %s channel %s: missing runtime '%s'; skipping DCR.", run_name, pmt, time_mode, @@ -615,7 +614,7 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None: except Exception as e: self.logger.warning( - "Failed to compute DCR for run %s PMT %s: %s", run_name, pmt, e + "Failed to compute DCR for run %s channel %s: %s", run_name, pmt, e ) dcr[run_name] = run_dcr self._save_results(dcr, "dcr") @@ -626,9 +625,9 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None: vals = [pmt_dict[p].n for p in pmts] errs = [pmt_dict[p].s for p in pmts] ax.errorbar(pmts, vals, errs, label=run_name, fmt="o") - ax.set_xlabel("PMT") + ax.set_xlabel("Channel") ax.set_ylabel("DCR (Hz)") - ax.set_title("Dark Count Rate per PMT") + ax.set_title("Dark Count Rate per Channel") ax.legend() plt.tight_layout() plot_path = self.plot_folder / "dcr_plot.png" @@ -670,7 +669,7 @@ def compute_linear_gain_fit(self) -> None: [i.m for i in pmt["voltage"]], [i.n for i in pmt["vals"]], [i.s for i in pmt["vals"]], - label=f"PMT {key}", + label=f"Channel {key}", fmt="o", ) ax.set_xlabel(f"Voltage ({format(xunit,'~')})") From 01a9ce7a4bc0fd1fb12cdb1d63e2474d9abc090a Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Dec 2025 12:59:08 -0800 Subject: [PATCH 27/32] save hemisphere info to result file --- src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py index bfc50da..93647c5 100644 --- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py +++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py @@ -87,6 +87,7 @@ def __init__( self.bins = np.arange(-100, 10000, bin_size) self.lim = lim self.override_results = override_results + self.hemispheres = {} self.logger = logger or setup_logging() self.plot_folder.mkdir(parents=True, exist_ok=True) self.calibrator = calibrator @@ -116,6 +117,8 @@ def __init__( } self._save_results(cal_dict, "calibration_constants") + self._save_results(self.hemispheres, "hemispheres") + # ---------------------- # I/O helpers # ---------------------- @@ -139,6 +142,7 @@ def _load_aux(self) -> dict[str, Any]: else: msg = f"Key {k} not in aux file, skipping." self.logger.warning(msg) + self.hemispheres = {"A": aux.get("hemisphere_a"), "B": aux.get("hemisphere_b")} return ret def _load_results(self) -> dict: @@ -165,7 +169,8 @@ def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str) existing[key] = quantity_to_dict(results) with open(self.result_yaml, "w") as f: yaml.safe_dump(existing, f, default_flow_style=False) - self.logger.info("Updated result YAML at %s", self.result_yaml) + msg = f"Updated {key} YAML at {self.result_yaml}" + self.logger.info(msg) else: with open(self.result_yaml, "w") as f: From 174c0961022caddbaee7deee4b7890a3ce2f6902 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Dec 2025 17:59:19 -0800 Subject: [PATCH 28/32] added insert time to document --- src/mintanalysis/pmt/ana/uploadToDB.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py index 3ea8d5d..ae934bc 100644 --- a/src/mintanalysis/pmt/ana/uploadToDB.py +++ b/src/mintanalysis/pmt/ana/uploadToDB.py @@ -7,7 +7,8 @@ import argparse import logging -from dataclasses import asdict, dataclass +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone from pathlib import Path from typing import Any, List @@ -69,6 +70,7 @@ class MeasurementResult: result: NumberOrList result_unc: NumberOrList units: StrOrList + insert_time: str = field(init=False) pmt_info: PMTInfo env_info: EnvironmentInfo @@ -106,6 +108,8 @@ def __post_init__(self): msg = "result, result_unc, units, and mapping.parameters must all have same length" raise ValueError(msg) + self.insert_time = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f") + def get_values( obj: Any, @@ -257,9 +261,10 @@ def main(): ch_mask = args.ch_mask tag = args.key db_opts = { + "mongo_user": "mint", "use_tunnel": True, "ssh_keyring_service": "mongo-prod", - "mongo_keyring_service": "mongo-prod", + "mongo_keyring_service": "mongo-mint", "logger": logger.info, } @@ -381,14 +386,17 @@ def main(): # TODO replace with module level logic with ProductionDatabase(**db_opts) as db: overview = db.get_hemisphere_overview(str(sftp_root_dir), "tum") - pmt_name = overview.get("pmt-unit").get(str(pmt_no)).get("uid") - hemisphere = str(sftp_root_dir) + pmt_obj = { + "device_type": "pmt-unit", + "uid": overview.get("pmt-unit").get(str(pmt_no)).get("uid"), + "_id": overview.get("pmt-unit").get(str(pmt_no)).get("_id"), + } # Build measurement result res = MeasurementResult( measurement_type=measurement, measurement_location="MINT", - devices_used=[pmt_name, hemisphere], + devices_used=pmt_obj, result=vals, result_unc=errs, units=units, @@ -402,12 +410,12 @@ def main(): # Upload to database if args.dry: msg = f"The following dict would be uploaded to the DB: {asdict(res)}" - logger.info(msg) else: with ProductionDatabase(**db_opts) as db: db.client["mint"]["Measurements_Pmt"].insert_one(asdict(res)) msg = f"Uploaded document {asdict(res)} to mint/Measurements_Pmt" + logger.info(msg) ch_mask &= ch_mask - 1 # clear lowest set bit From 6853011d9151f550774183c9393be710c39ccda5 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Dec 2025 18:12:31 -0800 Subject: [PATCH 29/32] Add DB connection settings to arg parser --- src/mintanalysis/pmt/ana/uploadToDB.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py index ae934bc..58fdc9d 100644 --- a/src/mintanalysis/pmt/ana/uploadToDB.py +++ b/src/mintanalysis/pmt/ana/uploadToDB.py @@ -253,6 +253,19 @@ def main(): "-r", "--reco", default="NNLS", help="Reconstruction algorithm used in DSP" ) + parser.add_argument("--mongo_user", default="mint", help="Specify user for mongoDB login") + parser.add_argument( + "--mongo_keyring", + default=None, + help="Specify keyring for mongoDB login (will prompt password if None)", + ) + parser.add_argument( + "--ssh_keyring", + default=None, + help="Specify keyring for ssh tunnel login (will prompt password if None)", + ) + parser.add_argument("--ssh_user", default="mint", help="Specify user for ssh tunnel login") + args = parser.parse_args() result_yaml = args.f_ana aux_file = args.f_aux @@ -261,10 +274,11 @@ def main(): ch_mask = args.ch_mask tag = args.key db_opts = { - "mongo_user": "mint", + "mongo_user": args.mongo_user, "use_tunnel": True, - "ssh_keyring_service": "mongo-prod", - "mongo_keyring_service": "mongo-mint", + "ssh_keyring_service": args.ssh_keyring, + "ssh_user": args.ssh_user, + "mongo_keyring_service": args.mongo_keyring, "logger": logger.info, } From a470796f435653c0e0d29cc9bf9b72249a15f245 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Dec 2025 18:30:47 -0800 Subject: [PATCH 30/32] define version automatically --- pyproject.toml | 15 ++++++++------- src/mintanalysis/pmt/ana/uploadToDB.py | 21 +++++++++++---------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0d80565..53fe75d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,14 @@ [build-system] -requires = [ - "setuptools>=61.2", - "setuptools_scm[toml]>=7" -] +requires = ["setuptools>=64", "setuptools_scm>=8"] build-backend = "setuptools.build_meta" +[tool.setuptools_scm] +write_to = "src/mintanalysis/_version.py" +# 'node-and-date' or 'node-and-timestamp' appends the short hash +local_scheme = "node-and-date" +# 'guess-next-dev' ensures versions like 1.0.1.dev1+gabc123 +version_scheme = "guess-next-dev" + [tool.setuptools] include-package-data = true zip-safe = false @@ -20,9 +24,6 @@ py-modules = [] where = ["src"] -[tool.setuptools_scm] -write_to = "src/mintanalysis/_version.py" - [project.urls] Homepage = "https://github.com/pone-software/mint-analysis" Issues = "https://github.com/pone-software/mint-analysis/issues" diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py index 58fdc9d..dac62b7 100644 --- a/src/mintanalysis/pmt/ana/uploadToDB.py +++ b/src/mintanalysis/pmt/ana/uploadToDB.py @@ -9,6 +9,7 @@ import logging from dataclasses import asdict, dataclass, field from datetime import datetime, timezone +from importlib.metadata import PackageNotFoundError, version from pathlib import Path from typing import Any, List @@ -19,15 +20,6 @@ from .utils import get_physics_object, quantity_to_dict, setup_logging -ROOT_DIR = Path("/home/pkrause/software/mint-analysis/debug_out") - -# binary channel mask to upload -# 0x0001 is PMT 1 only -# 0x0002 is PMT 2 only -# 0x0003 is PMT 1,2 -# 0x0004 is PMT 3 only - - NumberOrList = float | List[float] StrOrList = str | List[str] @@ -390,8 +382,17 @@ def main(): env_info = get_env_info(aux, tag) logger.info("collected environment info") + # get software info + try: + framework = version("mint-analysis") + except PackageNotFoundError: + framework = "unknown" + sw_info = SoftwareInfo( - framework="mint-xyz", pe_reconstruction=reco, sftp_path=sftp_dir, run_tags=tag + framework="mint_analyis_" + framework, + pe_reconstruction=reco, + sftp_path=sftp_dir, + run_tags=tag, ) logger.info("collected software info") From 158584cc65b14602c757945949facde5df4c7402 Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Dec 2025 18:34:31 -0800 Subject: [PATCH 31/32] Remove uv dependencies --- pyproject.toml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 53fe75d..dd2e136 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,15 +96,6 @@ docs = [ build_ana = "mintanalysis.pmt.ana.peSpectrumAnalyzer:main" upload_ana = "mintanalysis.pmt.ana.uploadToDB:main" -#[[tool.uv.index]] -# Optional name for the index. -#name = "dspeed-pone" -# Required URL for the index. -#url = "github://pone-software/dspeed-pone" - -[tool.uv.workspace] -exclude = ["sim_data", "data", "dspeed"] - [tool.pytest.ini_options] minversion = "6.0" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] From 3eb3472b61b2ca9d88b0b7d79af1730d8818226f Mon Sep 17 00:00:00 2001 From: Patrick Krause Date: Tue, 16 Dec 2025 18:43:23 -0800 Subject: [PATCH 32/32] Revise README for MINT analysis framework details Updated README to clarify MINT analysis framework and installation instructions. --- README.md | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 1be341c..4e91519 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,18 @@ # mint-analysis -Collection of scripts for MINT tests +A framework to analysis p-one PMT data. +The MINT analysis package processes data recorded by the Andromeda package (e.g., PMT or acoustic data) and applies advanced analysis routines that lie outside Andromeda’s scope. -## Installation - -Clone the repo - -To install packages I recommend using uv [https://docs.astral.sh/uv/], which -is a fast modern python package handler that also handles virtual environments. - -You'll need to install this first which is shown here [https://docs.astral.sh/uv/getting-started/installation/]. +For PMT data, two analysis frameworks are planned: +- Pygama – Modern, Python-based, fast-turnaround DSP toolkit for time-series data. +Status: Implemented. +- IceTray – The classical IceCube framework and future default for P-ONE data. +Status: Not yet implemented. -Once you have it installed go to the cloned repo and inside you should see a pyproject.toml file -this contains all the info on the packages needed. +Detailed unboarding instructions can be found [on confluence](https://p-one.atlassian.net/wiki/spaces/PONE/pages/1797849089/MINT+Analysis+User+Onboarding). -To install the packages simply run `uv sync`and they should all install. - -Then to open a jupyter notebook: `uv run --with jupyter jupyter notebook`. (In fact you don't even need the `uv sync` and can -simply do the run, uv will handle the rest). This will run a jupyter notebook instance inside the virtual environment. - -Some packages may cause issues such as h5py which may need to be installed separately via brew. +## Installation -All new instances you make are completely independent so can use different package/python versions as -specified in the pyproject.toml file. +Clone the repo +cd into repo +`pip install .`