From e522379d0a479e3fc17c4f475dc5a1c6505216f4 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Fri, 21 Nov 2025 11:47:10 -0800
Subject: [PATCH 01/32] plot p.e. spectrum and get DCR

---
 src/mintanalysis/ana/__init__.py         |   6 +
 src/mintanalysis/ana/plot_pe_spectrum.py | 140 +++++++++++++++++++++++
 2 files changed, 146 insertions(+)
 create mode 100644 src/mintanalysis/ana/__init__.py
 create mode 100644 src/mintanalysis/ana/plot_pe_spectrum.py

diff --git a/src/mintanalysis/ana/__init__.py b/src/mintanalysis/ana/__init__.py
new file mode 100644
index 0000000..7ee3e1c
--- /dev/null
+++ b/src/mintanalysis/ana/__init__.py
@@ -0,0 +1,6 @@
+"""
+Routines for the final analysis of PMT data
+"""
+
+
+__all__ = []
\ No newline at end of file
diff --git a/src/mintanalysis/ana/plot_pe_spectrum.py b/src/mintanalysis/ana/plot_pe_spectrum.py
new file mode 100644
index 0000000..2692038
--- /dev/null
+++ b/src/mintanalysis/ana/plot_pe_spectrum.py
@@ -0,0 +1,140 @@
+# Imports
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_pdf import PdfPages
+import awkward as ak
+from iminuit import cost, Minuit
+from numba_stats import truncnorm, truncexpon, uniform
+
+
+# LEGEND specific imports
+from lgdo import lh5
+from dspeed import build_dsp
+
+from scipy.signal import find_peaks
+
+def valley_index_strict(y):
+    """
+    Return the index of the valley between the first strict peak
+    and the next rise. Returns None if not found.
+    """
+    n = len(y)
+    if n < 3:
+        return None
+
+    # 1. Find first strict peak
+    peak = None
+    for i in range(1, n-1):
+        if y[i] > y[i-1] and y[i] > y[i+1]:
+            peak = i
+            break
+    if peak is None:
+        return None
+
+    # 2. Walk downward and track the minimum
+    valley = peak
+    i = peak + 1
+
+    while i < n:
+        if y[i] > y[i-1]:      # rising again → done
+            break
+        if y[i] < y[valley]:
+            valley = i
+        i += 1
+
+    if i == n:                # never rose again
+        return None
+
+    return peak, valley
+
+def gaussian(x, amp, mu, sigma):
+    return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
+
+
+
+if __name__ == "__main__":
+
+    f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020_2025_10_22_23_43_47.lh5"
+    f_dsp = f_raw.replace("raw","dsp") 
+
+    bin_size=20
+    bins = np.arange(-100,10000,bin_size)
+    lim =20
+    A4_LANDSCAPE = (11.69, 8.27)
+    result_dic ={}
+
+    with PdfPages("pe_spectra.pdf") as pdf:
+        for ch in lh5.ls(f_dsp):
+            result_dic[ch] = {}
+            fig, ax = plt.subplots()
+            fig.set_figwidth(A4_LANDSCAPE[0])
+            fig.set_figheight(A4_LANDSCAPE[1])
+            
+        
+            d = lh5.read_as(f"{ch}/dsp",f_dsp,"ak")
+            vals =ak.to_numpy(d.nnls_solution.values,allow_missing=False)
+            pe_vals = np.nansum(np.where(vals>lim,vals,np.nan),axis=1)
+            
+            n, bins, patches = ax.hist(pe_vals,bins=bins,histtype="step",label=f"channel {ch} ON ($V_N$)")
+
+            noise_peak, valley_idx = valley_index_strict(n)
+            pe_peak, _ = valley_index_strict(n[noise_peak:])
+            ax.axvline(bins[valley_idx],color='red',ls='--')
+            ax.axvline(bins[noise_peak:][pe_peak],color='green',ls='--')
+
+            bin_centers = 0.5*(bins[1:]+ bins[:-1])
+            # Restrict range
+            x_min, x_max = bins[valley_idx], 2*bins[noise_peak:][pe_peak] #- bins[valley_idx]
+            mask = (bin_centers >= x_min) & (bin_centers <= x_max)
+
+            bin_centers_fit = bin_centers[mask]
+            n_fit = n[mask]
+
+            def nll(amp, mu, sigma):
+                """Poisson NLL for binned data"""
+                expected = gaussian(bin_centers_fit, amp, mu, sigma)
+                expected[expected <= 0] = 1e-10  # avoid log(0)
+                return np.sum(expected - n_fit * np.log(expected))
+            
+            # Initial guesses
+            amp0 =n[noise_peak:][pe_peak]
+            mu0 = bins[noise_peak:][pe_peak]
+            sigma0 = 100.0
+
+            m = Minuit(nll, amp=amp0, mu=mu0, sigma=sigma0)
+            m.errordef = Minuit.LIKELIHOOD  # important for Poisson NLL
+            m.migrad(iterate=10)  # perform minimization
+
+            fit_vals = m.values.to_dict()
+            fit_errs = m.errors.to_dict()
+
+            result_dic[ch]["pe_peak"] = {"vals":fit_vals,"errs":fit_errs}
+            
+
+            y_fit = gaussian(bin_centers,
+                                   amp=m.values["amp"],
+                                   mu=m.values["mu"],
+                                   sigma=m.values["sigma"])
+
+            ax.plot(bin_centers, y_fit, 'r-', label='NLL fit (Minuit)')
+            
+            dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:])
+
+            # timestamps in rawfile are currently broken
+            # ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np")
+            dt = 25.021 #from log file
+
+            result_dic[ch]["dcr"] = {"dcr":{"dcr":dcts/dt,"counts":dcts,"runtime_s":dt}}
+
+            ax.set_xlim(-10,2.5e3)
+            ax.set_ylim(0.5,None)
+            ax.set_yscale('log')
+            ax.set_ylabel(f"Counts/{bin_size} NNLS units")
+            ax.legend()
+            ax.set_xlabel("NNLS units")
+            ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)")
+
+            pdf.savefig()
+            plt.close()
+
+    print(result_dic)

From 742f0dc24a36884d4af42c7c94ac83b218743361 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 24 Nov 2025 22:53:00 +0000
Subject: [PATCH 02/32] style: pre-commit fixes

---
 src/mintanalysis/ana/__init__.py         |  3 +-
 src/mintanalysis/ana/plot_pe_spectrum.py | 83 +++++++++++-------------
 2 files changed, 40 insertions(+), 46 deletions(-)

diff --git a/src/mintanalysis/ana/__init__.py b/src/mintanalysis/ana/__init__.py
index 7ee3e1c..714298a 100644
--- a/src/mintanalysis/ana/__init__.py
+++ b/src/mintanalysis/ana/__init__.py
@@ -2,5 +2,4 @@
 Routines for the final analysis of PMT data
 """
 
-
-__all__ = []
\ No newline at end of file
+__all__ = []
diff --git a/src/mintanalysis/ana/plot_pe_spectrum.py b/src/mintanalysis/ana/plot_pe_spectrum.py
index 2692038..0a39d4a 100644
--- a/src/mintanalysis/ana/plot_pe_spectrum.py
+++ b/src/mintanalysis/ana/plot_pe_spectrum.py
@@ -1,17 +1,13 @@
 # Imports
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.backends.backend_pdf import PdfPages
 import awkward as ak
-from iminuit import cost, Minuit
-from numba_stats import truncnorm, truncexpon, uniform
-
+import matplotlib.pyplot as plt
+import numpy as np
+from iminuit import Minuit
 
 # LEGEND specific imports
 from lgdo import lh5
-from dspeed import build_dsp
+from matplotlib.backends.backend_pdf import PdfPages
 
-from scipy.signal import find_peaks
 
 def valley_index_strict(y):
     """
@@ -24,8 +20,8 @@ def valley_index_strict(y):
 
     # 1. Find first strict peak
     peak = None
-    for i in range(1, n-1):
-        if y[i] > y[i-1] and y[i] > y[i+1]:
+    for i in range(1, n - 1):
+        if y[i] > y[i - 1] and y[i] > y[i + 1]:
             peak = i
             break
     if peak is None:
@@ -36,32 +32,32 @@ def valley_index_strict(y):
     i = peak + 1
 
     while i < n:
-        if y[i] > y[i-1]:      # rising again → done
+        if y[i] > y[i - 1]:  # rising again → done
             break
         if y[i] < y[valley]:
             valley = i
         i += 1
 
-    if i == n:                # never rose again
+    if i == n:  # never rose again
         return None
 
     return peak, valley
 
+
 def gaussian(x, amp, mu, sigma):
     return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
 
 
-
 if __name__ == "__main__":
 
     f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020_2025_10_22_23_43_47.lh5"
-    f_dsp = f_raw.replace("raw","dsp") 
+    f_dsp = f_raw.replace("raw", "dsp")
 
-    bin_size=20
-    bins = np.arange(-100,10000,bin_size)
-    lim =20
+    bin_size = 20
+    bins = np.arange(-100, 10000, bin_size)
+    lim = 20
     A4_LANDSCAPE = (11.69, 8.27)
-    result_dic ={}
+    result_dic = {}
 
     with PdfPages("pe_spectra.pdf") as pdf:
         for ch in lh5.ls(f_dsp):
@@ -69,22 +65,23 @@ def gaussian(x, amp, mu, sigma):
             fig, ax = plt.subplots()
             fig.set_figwidth(A4_LANDSCAPE[0])
             fig.set_figheight(A4_LANDSCAPE[1])
-            
-        
-            d = lh5.read_as(f"{ch}/dsp",f_dsp,"ak")
-            vals =ak.to_numpy(d.nnls_solution.values,allow_missing=False)
-            pe_vals = np.nansum(np.where(vals>lim,vals,np.nan),axis=1)
-            
-            n, bins, patches = ax.hist(pe_vals,bins=bins,histtype="step",label=f"channel {ch} ON ($V_N$)")
+
+            d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak")
+            vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False)
+            pe_vals = np.nansum(np.where(vals > lim, vals, np.nan), axis=1)
+
+            n, bins, patches = ax.hist(
+                pe_vals, bins=bins, histtype="step", label=f"channel {ch} ON ($V_N$)"
+            )
 
             noise_peak, valley_idx = valley_index_strict(n)
             pe_peak, _ = valley_index_strict(n[noise_peak:])
-            ax.axvline(bins[valley_idx],color='red',ls='--')
-            ax.axvline(bins[noise_peak:][pe_peak],color='green',ls='--')
+            ax.axvline(bins[valley_idx], color="red", ls="--")
+            ax.axvline(bins[noise_peak:][pe_peak], color="green", ls="--")
 
-            bin_centers = 0.5*(bins[1:]+ bins[:-1])
+            bin_centers = 0.5 * (bins[1:] + bins[:-1])
             # Restrict range
-            x_min, x_max = bins[valley_idx], 2*bins[noise_peak:][pe_peak] #- bins[valley_idx]
+            x_min, x_max = bins[valley_idx], 2 * bins[noise_peak:][pe_peak]  # - bins[valley_idx]
             mask = (bin_centers >= x_min) & (bin_centers <= x_max)
 
             bin_centers_fit = bin_centers[mask]
@@ -95,9 +92,9 @@ def nll(amp, mu, sigma):
                 expected = gaussian(bin_centers_fit, amp, mu, sigma)
                 expected[expected <= 0] = 1e-10  # avoid log(0)
                 return np.sum(expected - n_fit * np.log(expected))
-            
+
             # Initial guesses
-            amp0 =n[noise_peak:][pe_peak]
+            amp0 = n[noise_peak:][pe_peak]
             mu0 = bins[noise_peak:][pe_peak]
             sigma0 = 100.0
 
@@ -108,27 +105,25 @@ def nll(amp, mu, sigma):
             fit_vals = m.values.to_dict()
             fit_errs = m.errors.to_dict()
 
-            result_dic[ch]["pe_peak"] = {"vals":fit_vals,"errs":fit_errs}
-            
+            result_dic[ch]["pe_peak"] = {"vals": fit_vals, "errs": fit_errs}
+
+            y_fit = gaussian(
+                bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"]
+            )
 
-            y_fit = gaussian(bin_centers,
-                                   amp=m.values["amp"],
-                                   mu=m.values["mu"],
-                                   sigma=m.values["sigma"])
+            ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)")
 
-            ax.plot(bin_centers, y_fit, 'r-', label='NLL fit (Minuit)')
-            
             dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:])
 
             # timestamps in rawfile are currently broken
             # ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np")
-            dt = 25.021 #from log file
+            dt = 25.021  # from log file
 
-            result_dic[ch]["dcr"] = {"dcr":{"dcr":dcts/dt,"counts":dcts,"runtime_s":dt}}
+            result_dic[ch]["dcr"] = {"dcr": {"dcr": dcts / dt, "counts": dcts, "runtime_s": dt}}
 
-            ax.set_xlim(-10,2.5e3)
-            ax.set_ylim(0.5,None)
-            ax.set_yscale('log')
+            ax.set_xlim(-10, 2.5e3)
+            ax.set_ylim(0.5, None)
+            ax.set_yscale("log")
             ax.set_ylabel(f"Counts/{bin_size} NNLS units")
             ax.legend()
             ax.set_xlabel("NNLS units")

From b46e608ce14e8f61f32a0159268eff698e3f804f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 24 Nov 2025 23:01:54 +0000
Subject: [PATCH 03/32] style: pre-commit fixes

---
 src/mintanalysis/pmt/dsp/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mintanalysis/pmt/dsp/__init__.py b/src/mintanalysis/pmt/dsp/__init__.py
index 76c15b0..810ab4c 100644
--- a/src/mintanalysis/pmt/dsp/__init__.py
+++ b/src/mintanalysis/pmt/dsp/__init__.py
@@ -3,7 +3,6 @@
 """
 
 from .build_nnls_matrix import build_nnls_matrix
-
 from .lib_pulse_reco import pulse_analysis
 from .template_wf import build_template_waveform
 

From c72c2a2ce1edb7469e0dadc9b944c29621bab093 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Mon, 24 Nov 2025 15:12:20 -0800
Subject: [PATCH 04/32] update paths

---
 src/mintanalysis/{ => pmt}/ana/__init__.py         | 0
 src/mintanalysis/{ => pmt}/ana/plot_pe_spectrum.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename src/mintanalysis/{ => pmt}/ana/__init__.py (100%)
 rename src/mintanalysis/{ => pmt}/ana/plot_pe_spectrum.py (100%)

diff --git a/src/mintanalysis/ana/__init__.py b/src/mintanalysis/pmt/ana/__init__.py
similarity index 100%
rename from src/mintanalysis/ana/__init__.py
rename to src/mintanalysis/pmt/ana/__init__.py
diff --git a/src/mintanalysis/ana/plot_pe_spectrum.py b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
similarity index 100%
rename from src/mintanalysis/ana/plot_pe_spectrum.py
rename to src/mintanalysis/pmt/ana/plot_pe_spectrum.py

From 1ce1685b5d40626d3c57977856df4092056cae76 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Mon, 24 Nov 2025 16:17:16 -0800
Subject: [PATCH 05/32] add temp dt from raw

---
 src/mintanalysis/pmt/ana/plot_pe_spectrum.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
index 0a39d4a..62cee3c 100644
--- a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
+++ b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
@@ -51,6 +51,7 @@ def gaussian(x, amp, mu, sigma):
 if __name__ == "__main__":
 
     f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020_2025_10_22_23_43_47.lh5"
+    # f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r018/r018_2025_09_26_18_44_17.lh5"
     f_dsp = f_raw.replace("raw", "dsp")
 
     bin_size = 20
@@ -115,9 +116,9 @@ def nll(amp, mu, sigma):
 
             dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:])
 
-            # timestamps in rawfile are currently broken
-            # ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np")
-            dt = 25.021  # from log file
+            # TODO timestamps in rawfile are currently broken (they are sample numbers currently)
+            ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np")
+            dt = (ts.max() -ts.min())*4.8e-9
 
             result_dic[ch]["dcr"] = {"dcr": {"dcr": dcts / dt, "counts": dcts, "runtime_s": dt}}
 

From 51977b3b6f754acdc27a194af407555700f6f945 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 25 Nov 2025 00:17:52 +0000
Subject: [PATCH 06/32] style: pre-commit fixes

---
 src/mintanalysis/pmt/ana/plot_pe_spectrum.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
index 62cee3c..d315b35 100644
--- a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
+++ b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
@@ -117,8 +117,8 @@ def nll(amp, mu, sigma):
             dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:])
 
             # TODO timestamps in rawfile are currently broken (they are sample numbers currently)
-            ts = lh5.read_as(f"{ch}/raw/timestamp",f_raw,"np")
-            dt = (ts.max() -ts.min())*4.8e-9
+            ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np")
+            dt = (ts.max() - ts.min()) * 4.8e-9
 
             result_dic[ch]["dcr"] = {"dcr": {"dcr": dcts / dt, "counts": dcts, "runtime_s": dt}}
 

From 7cef207b8e408135901541412089c772c64ff672 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 25 Nov 2025 14:50:21 -0800
Subject: [PATCH 07/32] add gain plot generation

---
 src/mintanalysis/pmt/ana/do_pmt_analysis.py  | 247 +++++++++++++++++++
 src/mintanalysis/pmt/ana/plot_pe_spectrum.py | 136 ----------
 2 files changed, 247 insertions(+), 136 deletions(-)
 create mode 100644 src/mintanalysis/pmt/ana/do_pmt_analysis.py
 delete mode 100644 src/mintanalysis/pmt/ana/plot_pe_spectrum.py

diff --git a/src/mintanalysis/pmt/ana/do_pmt_analysis.py b/src/mintanalysis/pmt/ana/do_pmt_analysis.py
new file mode 100644
index 0000000..5f22da5
--- /dev/null
+++ b/src/mintanalysis/pmt/ana/do_pmt_analysis.py
@@ -0,0 +1,247 @@
+import awkward as ak
+import matplotlib.pyplot as plt
+import numpy as np
+import yaml
+from iminuit import Minuit
+from lgdo import lh5
+from matplotlib.backends.backend_pdf import PdfPages
+from scipy.optimize import curve_fit
+
+
+def valley_index_strict(y):
+    """
+    Return the index of the valley between the first strict peak
+    and the next rise. Returns None if not found.
+    """
+    n = len(y)
+    if n < 3:
+        return None
+
+    # 1. Find first strict peak
+    peak = None
+    for i in range(1, n - 1):
+        if y[i] > y[i - 1] and y[i] > y[i + 1]:
+            peak = i
+            break
+    if peak is None:
+        return None
+
+    # 2. Walk downward and track the minimum
+    valley = peak
+    i = peak + 1
+
+    while i < n:
+        if y[i] > y[i - 1]:  # rising again → done
+            break
+        if y[i] < y[valley]:
+            valley = i
+        i += 1
+
+    if i == n:  # never rose again
+        return None
+
+    return peak, valley
+
+
+def nll(amp, mu, sigma, bin_centers_fit, n_fit):
+    """Poisson NLL for binned data"""
+    expected = gaussian(bin_centers_fit, amp, mu, sigma)
+    expected[expected <= 0] = 1e-10  # avoid log(0)
+    return np.sum(expected - n_fit * np.log(expected))
+
+
+def gaussian(x, amp, mu, sigma):
+    return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
+
+
+def linear_func(x, a, b):
+    return a * x + b
+
+
+if __name__ == "__main__":
+
+    with open(
+        "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml",
+    ) as file:
+        aux_dict = yaml.safe_load(file)
+
+    raw_path = (
+        "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020"
+    )
+    out_folder = "/home/pkrause/software/mint-analysis/debug_out/"
+
+    f_result = "result.yaml"
+    bin_size = 20
+    bins = np.arange(-100, 10000, bin_size)
+    lim = 20
+    A4_LANDSCAPE = (11.69, 8.27)
+    result_dic = {}
+
+    for run in aux_dict:
+        result_dic[run] = {}
+        f_raw = raw_path + aux_dict[run]["daq"].split("/")[-1].replace("daq", "").replace(
+            "data", "lh5"
+        )
+        f_dsp = f_raw.replace("raw", "dsp")
+
+        with PdfPages(out_folder + f"pe_spectra_{run}.pdf") as pdf:
+            for ch in lh5.ls(f_dsp):
+                pmt = int(ch[2:]) + 1
+                result_dic[run][pmt] = {}
+                result_dic[run][pmt]["voltages_in_V"] = aux_dict[run]["voltages_in_V"][int(ch[2:])]
+                fig, ax = plt.subplots()
+                fig.set_figwidth(A4_LANDSCAPE[0])
+                fig.set_figheight(A4_LANDSCAPE[1])
+
+                d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak")
+                vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False)
+                pe_vals = np.nansum(np.where(vals > lim, vals, np.nan), axis=1)
+
+                n, bins, patches = ax.hist(
+                    pe_vals,
+                    bins=bins,
+                    histtype="step",
+                    label=f"channel {ch} ON ({result_dic[run][pmt]['voltages_in_V']:.2f} V)",
+                )
+
+                if aux_dict[run]["voltages_in_V"][int(ch[2:])] == 0:
+                    ax.set_xlim(-10, 2.5e3)
+                    ax.set_ylim(0.5, None)
+                    ax.set_yscale("log")
+                    ax.set_ylabel(f"Counts/{bin_size} NNLS units")
+                    ax.legend()
+                    ax.set_xlabel("NNLS units")
+                    ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)")
+
+                    pdf.savefig()
+                    plt.close()
+                    continue
+
+                ######################
+                # Fit p.e. peak      #
+                ######################
+
+                noise_peak, valley_idx = valley_index_strict(n)
+                pe_peak, _ = valley_index_strict(n[noise_peak:])
+                ax.axvline(bins[valley_idx], color="red", ls="--")
+                ax.axvline(bins[noise_peak:][pe_peak], color="green", ls="--")
+
+                bin_centers = 0.5 * (bins[1:] + bins[:-1])
+                # Restrict range
+                x_min, x_max = (
+                    bins[valley_idx],
+                    2 * bins[noise_peak:][pe_peak],
+                )  # - bins[valley_idx]
+                mask = (bin_centers >= x_min) & (bin_centers <= x_max)
+
+                bin_centers_fit = bin_centers[mask]
+                n_fit = n[mask]
+
+                # Initial guesses
+                amp0 = n[noise_peak:][pe_peak]
+                mu0 = bins[noise_peak:][pe_peak]
+                sigma0 = 100.0
+
+                m = Minuit(
+                    lambda amp, mu, sigma: nll(
+                        amp, mu, sigma, bin_centers_fit=bin_centers_fit, n_fit=n_fit
+                    ),
+                    amp=amp0,
+                    mu=mu0,
+                    sigma=sigma0,
+                )
+                m.errordef = Minuit.LIKELIHOOD  # important for Poisson NLL
+                m.migrad(iterate=10)  # perform minimization
+
+                fit_vals = m.values.to_dict()
+                fit_errs = m.errors.to_dict()
+
+                result_dic[run][pmt]["pe_peak"] = {"vals": fit_vals, "errs": fit_errs}
+
+                y_fit = gaussian(
+                    bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"]
+                )
+
+                ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)")
+
+                ax.set_xlim(-10, 2.5e3)
+                ax.set_ylim(0.5, None)
+                ax.set_yscale("log")
+                ax.set_ylabel(f"Counts/{bin_size} NNLS units")
+                ax.legend()
+                ax.set_xlabel("NNLS units")
+                ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)")
+
+                pdf.savefig()
+                plt.close()
+
+                ######################
+                # DCR calculation    #
+                ######################
+
+                dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:])
+
+                # TODO fix timestamps in raw (they are sample numbers currently)
+                ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np")
+                dt = (ts.max() - ts.min()) * 4.8e-9
+
+                result_dic[run][pmt]["dcr"] = {
+                    "dcr": {"dcr": float(dcts / dt), "counts": float(dcts), "runtime_s": float(dt)}
+                }
+
+    ######################
+    # Gain calculation   #
+    ######################
+    tmp_dic = {}
+    for _, run in result_dic.items():
+        for pmt in run:
+            if pmt not in tmp_dic:
+                tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []}
+            v = run[pmt]["voltages_in_V"]
+            if v == 0:
+                continue
+            tmp_dic[pmt]["voltage"].append(v)
+            tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak"]["vals"]["mu"])
+            tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak"]["errs"]["mu"])
+
+    with PdfPages(out_folder + "gain_plots.pdf") as pdf:
+        for key, pmt in tmp_dic.items():
+            fig, ax = plt.subplots()
+            fig.set_figwidth(A4_LANDSCAPE[0])
+            fig.set_figheight(A4_LANDSCAPE[1])
+            ax.errorbar(
+                pmt["voltage"],
+                pmt["vals"],
+                pmt["errs"],
+                label=f"PMT {key}",
+                fmt="o",
+            )
+            ax.set_ylabel("PMT position (NNLS units)")
+            ax.set_xlabel("Voltage (V)")
+
+            params, covariance = curve_fit(
+                linear_func,
+                pmt["voltage"],
+                pmt["vals"],
+                sigma=pmt["errs"],
+                absolute_sigma=True,
+            )
+            a_opt, b_opt = params
+            perr = np.sqrt(np.diag(covariance))
+            x = np.linspace(-1 * b_opt / a_opt, 110, 1000)
+            ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit")
+
+            tmp_dic[key]["fit"] = {
+                "vals": {"a": float(a_opt), "b": float(b_opt)},
+                "errs": {"a": float(perr[0]), "b": float(perr[1])},
+                "func": "gain_in_NNLS_units = a*voltage_in_V+b",
+            }
+
+            ax.legend()
+            pdf.savefig()
+            plt.close()
+
+    result_dic["gain"] = tmp_dic
+
+with open(out_folder + "results.yaml", "w") as file:
+    aux_dict = yaml.safe_dump(result_dic, file, default_flow_style=False)
diff --git a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py b/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
deleted file mode 100644
index d315b35..0000000
--- a/src/mintanalysis/pmt/ana/plot_pe_spectrum.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Imports
-import awkward as ak
-import matplotlib.pyplot as plt
-import numpy as np
-from iminuit import Minuit
-
-# LEGEND specific imports
-from lgdo import lh5
-from matplotlib.backends.backend_pdf import PdfPages
-
-
-def valley_index_strict(y):
-    """
-    Return the index of the valley between the first strict peak
-    and the next rise. Returns None if not found.
-    """
-    n = len(y)
-    if n < 3:
-        return None
-
-    # 1. Find first strict peak
-    peak = None
-    for i in range(1, n - 1):
-        if y[i] > y[i - 1] and y[i] > y[i + 1]:
-            peak = i
-            break
-    if peak is None:
-        return None
-
-    # 2. Walk downward and track the minimum
-    valley = peak
-    i = peak + 1
-
-    while i < n:
-        if y[i] > y[i - 1]:  # rising again → done
-            break
-        if y[i] < y[valley]:
-            valley = i
-        i += 1
-
-    if i == n:  # never rose again
-        return None
-
-    return peak, valley
-
-
-def gaussian(x, amp, mu, sigma):
-    return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
-
-
-if __name__ == "__main__":
-
-    f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020_2025_10_22_23_43_47.lh5"
-    # f_raw = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r018/r018_2025_09_26_18_44_17.lh5"
-    f_dsp = f_raw.replace("raw", "dsp")
-
-    bin_size = 20
-    bins = np.arange(-100, 10000, bin_size)
-    lim = 20
-    A4_LANDSCAPE = (11.69, 8.27)
-    result_dic = {}
-
-    with PdfPages("pe_spectra.pdf") as pdf:
-        for ch in lh5.ls(f_dsp):
-            result_dic[ch] = {}
-            fig, ax = plt.subplots()
-            fig.set_figwidth(A4_LANDSCAPE[0])
-            fig.set_figheight(A4_LANDSCAPE[1])
-
-            d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak")
-            vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False)
-            pe_vals = np.nansum(np.where(vals > lim, vals, np.nan), axis=1)
-
-            n, bins, patches = ax.hist(
-                pe_vals, bins=bins, histtype="step", label=f"channel {ch} ON ($V_N$)"
-            )
-
-            noise_peak, valley_idx = valley_index_strict(n)
-            pe_peak, _ = valley_index_strict(n[noise_peak:])
-            ax.axvline(bins[valley_idx], color="red", ls="--")
-            ax.axvline(bins[noise_peak:][pe_peak], color="green", ls="--")
-
-            bin_centers = 0.5 * (bins[1:] + bins[:-1])
-            # Restrict range
-            x_min, x_max = bins[valley_idx], 2 * bins[noise_peak:][pe_peak]  # - bins[valley_idx]
-            mask = (bin_centers >= x_min) & (bin_centers <= x_max)
-
-            bin_centers_fit = bin_centers[mask]
-            n_fit = n[mask]
-
-            def nll(amp, mu, sigma):
-                """Poisson NLL for binned data"""
-                expected = gaussian(bin_centers_fit, amp, mu, sigma)
-                expected[expected <= 0] = 1e-10  # avoid log(0)
-                return np.sum(expected - n_fit * np.log(expected))
-
-            # Initial guesses
-            amp0 = n[noise_peak:][pe_peak]
-            mu0 = bins[noise_peak:][pe_peak]
-            sigma0 = 100.0
-
-            m = Minuit(nll, amp=amp0, mu=mu0, sigma=sigma0)
-            m.errordef = Minuit.LIKELIHOOD  # important for Poisson NLL
-            m.migrad(iterate=10)  # perform minimization
-
-            fit_vals = m.values.to_dict()
-            fit_errs = m.errors.to_dict()
-
-            result_dic[ch]["pe_peak"] = {"vals": fit_vals, "errs": fit_errs}
-
-            y_fit = gaussian(
-                bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"]
-            )
-
-            ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)")
-
-            dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:])
-
-            # TODO timestamps in rawfile are currently broken (they are sample numbers currently)
-            ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np")
-            dt = (ts.max() - ts.min()) * 4.8e-9
-
-            result_dic[ch]["dcr"] = {"dcr": {"dcr": dcts / dt, "counts": dcts, "runtime_s": dt}}
-
-            ax.set_xlim(-10, 2.5e3)
-            ax.set_ylim(0.5, None)
-            ax.set_yscale("log")
-            ax.set_ylabel(f"Counts/{bin_size} NNLS units")
-            ax.legend()
-            ax.set_xlabel("NNLS units")
-            ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)")
-
-            pdf.savefig()
-            plt.close()
-
-    print(result_dic)

From 376806878e30ecd79a16c595e87a28d37e0fe0c8 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Wed, 26 Nov 2025 19:29:34 -0800
Subject: [PATCH 08/32] split analysis into multiple scripts

---
 src/mintanalysis/pmt/ana/dcr_analysis.py      |  50 ++++++
 .../pmt/ana/linear_gain_analysis.py           | 123 +++++++++++++++
 ...mt_analysis.py => pe_spectrum_analysis.py} | 145 +++++++++---------
 3 files changed, 243 insertions(+), 75 deletions(-)
 create mode 100644 src/mintanalysis/pmt/ana/dcr_analysis.py
 create mode 100644 src/mintanalysis/pmt/ana/linear_gain_analysis.py
 rename src/mintanalysis/pmt/ana/{do_pmt_analysis.py => pe_spectrum_analysis.py} (59%)

diff --git a/src/mintanalysis/pmt/ana/dcr_analysis.py b/src/mintanalysis/pmt/ana/dcr_analysis.py
new file mode 100644
index 0000000..f982a23
--- /dev/null
+++ b/src/mintanalysis/pmt/ana/dcr_analysis.py
@@ -0,0 +1,50 @@
+import os
+
+import yaml
+
+
+def linear_func(x, a, b):
+    return a * x + b
+
+
+if __name__ == "__main__":
+
+    f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml"
+    override_results = True
+    time_mode = "aux"
+
+    if os.path.exists(f_result):
+        with open(f_result) as file:
+            result_dic = yaml.safe_load(file)
+    else:
+        raise RuntimeError("File does not exist")
+
+    if "pe_spectrum" not in result_dic.keys():
+        raise RuntimeError("pe_spectrum info not present!")
+
+    if "dcr" in result_dic and not override_results:
+        raise RuntimeError("results already exist and override flag not set")
+    result_dic["dcr"] = {}
+    for rk, run in result_dic["pe_spectrum"].items():
+        result_dic["dcr"][rk] = {}
+        for key, pmt in run.items():
+            if pmt["voltage"]["val"] == 0:
+                continue
+            if "dcr" in pmt and not override_results:
+                print(
+                    f"DCR key exists but override flag is not set! --> skipping {pmt} in run {run}"
+                )
+                continue
+            dcts = (
+                pmt["statistics"]["1st_pe_fit_integral_below_valley"]["val"]
+                + pmt["statistics"]["cts_above_valley"]["val"]
+            )
+            result_dic["dcr"][rk][key] = {
+                "val": dcts / pmt["runtime"][time_mode],
+                "err": (dcts**0.5) / pmt["runtime"][time_mode],
+                "unit": "Hz",
+            }
+
+
+with open(f_result, "w") as file:
+    yaml.safe_dump(result_dic, file, default_flow_style=False)
diff --git a/src/mintanalysis/pmt/ana/linear_gain_analysis.py b/src/mintanalysis/pmt/ana/linear_gain_analysis.py
new file mode 100644
index 0000000..39a101a
--- /dev/null
+++ b/src/mintanalysis/pmt/ana/linear_gain_analysis.py
@@ -0,0 +1,123 @@
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import yaml
+from matplotlib.backends.backend_pdf import PdfPages
+from scipy.optimize import curve_fit
+
+
+def linear_func(x, a, b):
+    return a * x + b
+
+
+if __name__ == "__main__":
+
+    f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml"
+    with open(f_result) as file:
+        result_dic = yaml.safe_load(file)
+
+    out_folder = "/home/pkrause/software/mint-analysis/debug_out/gain/"
+
+    bin_size = 20
+    bins = np.arange(-100, 10000, bin_size)
+    lim = 20
+    A4_LANDSCAPE = (11.69, 8.27)
+
+    override_results = True
+
+    if os.path.exists(f_result):
+        with open(f_result) as file:
+            result_dic = yaml.safe_load(file)
+    else:
+        raise RuntimeError("File does not exist")
+
+    if "pe_spectrum" not in result_dic:
+        raise RuntimeError("pe_spectrum info not present!")
+
+    if "linear_gain" in result_dic and not override_results:
+        raise RuntimeError("results already exist and override flag not set")
+
+    ######################
+    # Gain calculation   #
+    ######################
+    tmp_dic = {"used_keys": []}
+    y_unit = None
+    x_unit = None
+    for key, run in result_dic["pe_spectrum"].items():
+        tmp_dic["used_keys"].append(key)
+        for pmt in run:
+            if pmt not in tmp_dic:
+                tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []}
+            v = run[pmt]["voltage"]["val"]
+            xu = run[pmt]["voltage"]["unit"]
+            if x_unit is None:
+                x_unit = xu
+            else:
+                assert x_unit == xu
+            if v == 0:
+                continue
+
+            tmp_dic[pmt]["voltage"].append(v)
+            tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]["val"])
+            tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak_fit"]["mean"]["err"])
+
+            yu = run[pmt]["pe_peak_fit"]["mean"]["unit"]
+            if y_unit is None:
+                y_unit = yu
+            else:
+                assert y_unit == yu
+
+    with PdfPages(out_folder + "gain_plots.pdf") as pdf:
+        for key, pmt in tmp_dic.items():
+            if key == "used_keys":
+                continue
+            fig, ax = plt.subplots()
+            fig.set_figwidth(A4_LANDSCAPE[0])
+            fig.set_figheight(A4_LANDSCAPE[1])
+            ax.errorbar(
+                pmt["voltage"],
+                pmt["vals"],
+                pmt["errs"],
+                label=f"PMT {key}",
+                fmt="o",
+            )
+            ax.set_ylabel(f"PMT position ({y_unit})")
+            ax.set_xlabel(f"Voltage ({x_unit})")
+
+            params, covariance = curve_fit(
+                linear_func,
+                pmt["voltage"],
+                pmt["vals"],
+                sigma=pmt["errs"],
+                absolute_sigma=True,
+            )
+            a_opt, b_opt = params
+            perr = np.sqrt(np.diag(covariance))
+            x = np.linspace(-1 * b_opt / a_opt, 110, 1000)
+            ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit")
+
+            tmp_dic[key]["a"] = {
+                "val": float(a_opt),
+                "err": float(perr[0]),
+                "unit": f"{y_unit}/{x_unit}",
+            }
+            tmp_dic[key]["b"] = {
+                "val": float(b_opt),
+                "err": float(perr[1]),
+                "unit": f"{y_unit}/{x_unit}",
+            }
+            tmp_dic[key]["func"] = "G = a*voltage+b"
+
+            pmt.pop("errs")
+            pmt.pop("vals")
+            pmt.pop("voltage")
+            ax.legend()
+            pdf.savefig()
+            plt.close()
+
+    result_dic["linear_gain"] = tmp_dic
+
+
+with open(f_result, "w") as file:
+    yaml.safe_dump(result_dic, file, default_flow_style=False)
diff --git a/src/mintanalysis/pmt/ana/do_pmt_analysis.py b/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py
similarity index 59%
rename from src/mintanalysis/pmt/ana/do_pmt_analysis.py
rename to src/mintanalysis/pmt/ana/pe_spectrum_analysis.py
index 5f22da5..3a179b2 100644
--- a/src/mintanalysis/pmt/ana/do_pmt_analysis.py
+++ b/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py
@@ -1,3 +1,5 @@
+import os
+
 import awkward as ak
 import matplotlib.pyplot as plt
 import numpy as np
@@ -5,7 +7,6 @@
 from iminuit import Minuit
 from lgdo import lh5
 from matplotlib.backends.backend_pdf import PdfPages
-from scipy.optimize import curve_fit
 
 
 def valley_index_strict(y):
@@ -60,17 +61,18 @@ def linear_func(x, a, b):
 
 if __name__ == "__main__":
 
-    with open(
-        "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml",
-    ) as file:
+    f_aux = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml"
+    f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml"
+    with open(f_aux) as file:
         aux_dict = yaml.safe_load(file)
 
     raw_path = (
         "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020"
     )
-    out_folder = "/home/pkrause/software/mint-analysis/debug_out/"
+    plot_folder = "/home/pkrause/software/mint-analysis/debug_out/pe_spectra/"
+
+    override_results = True
 
-    f_result = "result.yaml"
     bin_size = 20
     bins = np.arange(-100, 10000, bin_size)
     lim = 20
@@ -84,11 +86,14 @@ def linear_func(x, a, b):
         )
         f_dsp = f_raw.replace("raw", "dsp")
 
-        with PdfPages(out_folder + f"pe_spectra_{run}.pdf") as pdf:
+        with PdfPages(plot_folder + f"pe_spectra_{run}.pdf") as pdf:
             for ch in lh5.ls(f_dsp):
                 pmt = int(ch[2:]) + 1
                 result_dic[run][pmt] = {}
-                result_dic[run][pmt]["voltages_in_V"] = aux_dict[run]["voltages_in_V"][int(ch[2:])]
+                result_dic[run][pmt]["voltage"] = {
+                    "val": aux_dict[run]["voltages_in_V"][int(ch[2:])],
+                    "unit": "V",
+                }
                 fig, ax = plt.subplots()
                 fig.set_figwidth(A4_LANDSCAPE[0])
                 fig.set_figheight(A4_LANDSCAPE[1])
@@ -101,7 +106,7 @@ def linear_func(x, a, b):
                     pe_vals,
                     bins=bins,
                     histtype="step",
-                    label=f"channel {ch} ON ({result_dic[run][pmt]['voltages_in_V']:.2f} V)",
+                    label=f"channel {ch} ON ({result_dic[run][pmt]['voltage']['val']:.2f} {result_dic[run][pmt]['voltage']['unit']})",
                 )
 
                 if aux_dict[run]["voltages_in_V"][int(ch[2:])] == 0:
@@ -156,7 +161,11 @@ def linear_func(x, a, b):
                 fit_vals = m.values.to_dict()
                 fit_errs = m.errors.to_dict()
 
-                result_dic[run][pmt]["pe_peak"] = {"vals": fit_vals, "errs": fit_errs}
+                result_dic[run][pmt]["pe_peak_fit"] = {
+                    "mean": {"val": fit_vals["mu"], "err": fit_errs["mu"], "unit": "NNLS"},
+                    "sigma": {"val": fit_vals["sigma"], "err": fit_errs["sigma"], "unit": "NNLS"},
+                    "amp": {"val": fit_vals["amp"], "err": fit_errs["amp"], "unit": ""},
+                }
 
                 y_fit = gaussian(
                     bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"]
@@ -176,72 +185,58 @@ def linear_func(x, a, b):
                 plt.close()
 
                 ######################
-                # DCR calculation    #
+                # spectrum values    #
                 ######################
 
-                dcts = np.sum(y_fit[:valley_idx]) + np.sum(n[valley_idx:])
-
-                # TODO fix timestamps in raw (they are sample numbers currently)
-                ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np")
-                dt = (ts.max() - ts.min()) * 4.8e-9
-
-                result_dic[run][pmt]["dcr"] = {
-                    "dcr": {"dcr": float(dcts / dt), "counts": float(dcts), "runtime_s": float(dt)}
+                result_dic[run][pmt]["statistics"] = {
+                    "1st_pe_fit_integral_below_valley": {
+                        "val": float(np.sum(y_fit[:valley_idx])),
+                        "unit": "",
+                    },
+                    "cts_above_valley": {"val": int(np.sum(n[:valley_idx])), "unit": ""},
+                    "cts_below_valley": {"val": int(np.sum(n[valley_idx:])), "unit": ""},
+                    "1st_pe_fit_integral": {"val": int(float(np.sum(y_fit))), "unit": ""},
+                    "total_cts": {"val": int(np.sum(n)), "unit": ""},
+                    "valley": {
+                        "pos": {"val": float(bin_centers[valley_idx]), "unit": "NNLS"},
+                        "amp": int(n[valley_idx]),
+                    },
+                    "1st_pe_guess": {
+                        "pos": {"val": float(mu0), "unit": "NNLS"},
+                        "amp": {"val": int(amp0), "unit": ""},
+                    },
                 }
 
-    ######################
-    # Gain calculation   #
-    ######################
-    tmp_dic = {}
-    for _, run in result_dic.items():
-        for pmt in run:
-            if pmt not in tmp_dic:
-                tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []}
-            v = run[pmt]["voltages_in_V"]
-            if v == 0:
-                continue
-            tmp_dic[pmt]["voltage"].append(v)
-            tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak"]["vals"]["mu"])
-            tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak"]["errs"]["mu"])
-
-    with PdfPages(out_folder + "gain_plots.pdf") as pdf:
-        for key, pmt in tmp_dic.items():
-            fig, ax = plt.subplots()
-            fig.set_figwidth(A4_LANDSCAPE[0])
-            fig.set_figheight(A4_LANDSCAPE[1])
-            ax.errorbar(
-                pmt["voltage"],
-                pmt["vals"],
-                pmt["errs"],
-                label=f"PMT {key}",
-                fmt="o",
-            )
-            ax.set_ylabel("PMT position (NNLS units)")
-            ax.set_xlabel("Voltage (V)")
-
-            params, covariance = curve_fit(
-                linear_func,
-                pmt["voltage"],
-                pmt["vals"],
-                sigma=pmt["errs"],
-                absolute_sigma=True,
-            )
-            a_opt, b_opt = params
-            perr = np.sqrt(np.diag(covariance))
-            x = np.linspace(-1 * b_opt / a_opt, 110, 1000)
-            ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit")
-
-            tmp_dic[key]["fit"] = {
-                "vals": {"a": float(a_opt), "b": float(b_opt)},
-                "errs": {"a": float(perr[0]), "b": float(perr[1])},
-                "func": "gain_in_NNLS_units = a*voltage_in_V+b",
-            }
-
-            ax.legend()
-            pdf.savefig()
-            plt.close()
-
-    result_dic["gain"] = tmp_dic
-
-with open(out_folder + "results.yaml", "w") as file:
-    aux_dict = yaml.safe_dump(result_dic, file, default_flow_style=False)
+                result_dic[run][pmt]["runtime"] = {"unit": "s"}
+                if "runtime_in_s" in aux_dict[run]:
+                    result_dic[run][pmt]["runtime"]["aux"] = aux_dict[run]["runtime_in_s"]
+
+                if f"{ch}/raw/timestamp_sec" in lh5.ls(
+                    f_raw, f"{ch}/raw/"
+                ) and f"{ch}/raw/timestamp_ps" in lh5.ls(f_raw, f"{ch}/raw/"):
+                    ts = lh5.read_as(f"{ch}/raw/timestamp_sec", f_raw, "np")
+                    ts_ps = lh5.read_as(f"{ch}/raw/timestamp_ps", f_raw, "np")
+                    result_dic[run][pmt]["runtime"]["raw"] = float(
+                        ts[ts.argmax()]
+                        + ts_ps[ts.argmax()] * 1e-12
+                        - (ts[ts.argmin()] + ts_ps[ts.argmin()] * 1e-12)
+                    )
+
+                # support for old raw files
+                elif f"{ch}/raw/timestamp" in lh5.ls(f_raw, f"{ch}/raw/"):
+                    ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np")
+                    result_dic[run][pmt]["runtime"]["raw"] = float((ts.max() - ts.min()) * 4.8e-9)
+
+if os.path.exists(f_result):
+    with open(f_result) as file:
+        tmp_dic = yaml.safe_load(file)
+    if "pe_spectrum" in tmp_dic.keys() and not override_results:
+        raise RuntimeError("Already exists and override flag is not set")
+    else:
+        tmp_dic["pe_spectrum"] = result_dic
+        with open(f_result, "w") as file:
+            yaml.safe_dump(tmp_dic, file, default_flow_style=False)
+
+else:
+    with open(f_result, "w") as file:
+        yaml.safe_dump({"pe_spectrum": result_dic}, file, default_flow_style=False)

From 261cf332b57d42a1dfaef3392b44efcf416a0675 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Thu, 27 Nov 2025 12:50:11 -0800
Subject: [PATCH 09/32] Refactor scripts to analyser class

---
 src/mintanalysis/pmt/ana/analyser_class.py    | 725 ++++++++++++++++++
 src/mintanalysis/pmt/ana/dcr_analysis.py      |  50 --
 .../pmt/ana/linear_gain_analysis.py           | 123 ---
 .../pmt/ana/pe_spectrum_analysis.py           | 242 ------
 4 files changed, 725 insertions(+), 415 deletions(-)
 create mode 100644 src/mintanalysis/pmt/ana/analyser_class.py
 delete mode 100644 src/mintanalysis/pmt/ana/dcr_analysis.py
 delete mode 100644 src/mintanalysis/pmt/ana/linear_gain_analysis.py
 delete mode 100644 src/mintanalysis/pmt/ana/pe_spectrum_analysis.py

diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py
new file mode 100644
index 0000000..badab7f
--- /dev/null
+++ b/src/mintanalysis/pmt/ana/analyser_class.py
@@ -0,0 +1,725 @@
+"""
+PESpectrumAnalyzer
+
+- Logging to stdout + file.
+- Missing critical files -> hard fail.
+- Per-channel fit/peak failures -> skip and continue.
+- One PDF per run (written once; pages appended in-memory during run processing).
+- Cleaner decomposition and minimal repetition.
+- 1st p.e. fit: single gauss
+- DCR estimate and plot
+- linear gain fit
+- SNR plot
+
+Usage: Run via CLI with desired flags
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import awkward as ak
+import matplotlib.pyplot as plt
+import numpy as np
+import yaml
+from iminuit import Minuit
+from lgdo import lh5
+from matplotlib.backends.backend_pdf import PdfPages
+from scipy.optimize import curve_fit
+
+# --------------------------
+# Configuration / Constants
+# --------------------------
+AUX_YAML = Path(
+    "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml"
+)
+RAW_DIR = Path("/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020")
+RESULT_YAML = Path("/home/pkrause/software/mint-analysis/debug_out/results.yaml")
+PLOT_FOLDER = Path("/home/pkrause/software/mint-analysis/debug_out/pe_spectra/")
+LOG_FILE = Path("/home/pkrause/software/mint-analysis/debug_out/pe_spectrum.log")
+
+# Analysis constants
+BIN_SIZE = 20
+BINS = np.arange(-100, 10000, BIN_SIZE)
+LIM = 20
+A4_LANDSCAPE = (11.69, 8.27)
+OVERRIDE_RESULTS = True
+
+
+# --------------------------
+# Logging Setup
+# --------------------------
+
+
+def setup_logging(log_file: Path = LOG_FILE, level: int = logging.INFO) -> logging.Logger:
+    logger = logging.getLogger("PESpectrum")
+    logger.setLevel(level)
+    logger.propagate = False
+
+    if not logger.handlers:
+        fmt = logging.Formatter(
+            "[%(asctime)s] [%(name)s - %(funcName)s] [%(levelname)s] %(message)s"
+        )
+
+        sh = logging.StreamHandler()
+        sh.setLevel(level)
+        sh.setFormatter(fmt)
+        logger.addHandler(sh)
+
+        fh = logging.FileHandler(log_file, mode="w")
+        fh.setLevel(level)
+        fh.setFormatter(fmt)
+        logger.addHandler(fh)
+
+    return logger
+
+
+# --------------------------
+# Small helpers
+# --------------------------
+
+
+@dataclass
+class ChannelResult:
+    status: str  # 'ok', 'skipped', 'error'
+    data: dict[str, Any]
+
+
+def linear_func(x, a, b):
+    return a * x + b
+
+
+def gaussian(x: np.ndarray, amp: float, mu: float, sigma: float) -> np.ndarray:
+    return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
+
+
+def poisson_nll(amp: float, mu: float, sigma: float, x: np.ndarray, y: np.ndarray) -> float:
+    expected = gaussian(x, amp, mu, sigma)
+    expected = np.clip(expected, 1e-10, None)
+    return float(np.sum(expected - y * np.log(expected)))
+
+
+def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None:
+    """Return (peak_idx, valley_idx) or None."""
+    n = len(y)
+    if n < 3:
+        return None
+
+    peak = None
+    for i in range(1, n - 1):
+        if y[i] > y[i - 1] and y[i] > y[i + 1]:
+            peak = i
+            break
+    if peak is None:
+        return None
+
+    valley = peak
+    i = peak + 1
+    while i < n:
+        if y[i] > y[i - 1]:
+            return peak, valley
+        if y[i] < y[valley]:
+            valley = i
+        i += 1
+
+    return None
+
+
+# --------------------------
+# Analyzer class
+# --------------------------
+class PESpectrumAnalyzer:
+    def __init__(
+        self,
+        aux_yaml: Path = AUX_YAML,
+        raw_dir: Path = RAW_DIR,
+        plot_folder: Path = PLOT_FOLDER,
+        result_yaml: Path = RESULT_YAML,
+        bins: np.ndarray = BINS,
+        bin_size: int = BIN_SIZE,
+        lim: int = LIM,
+        override_results: bool = OVERRIDE_RESULTS,
+        logger: logging.Logger | None = None,
+    ) -> None:
+        self.aux_yaml = aux_yaml
+        self.raw_dir = raw_dir
+        self.plot_folder = plot_folder
+        self.result_yaml = result_yaml
+        self.bins = bins
+        self.bin_size = bin_size
+        self.lim = lim
+        self.override_results = override_results
+        self.logger = logger or setup_logging()
+        self.aux = self._load_aux()
+        self.plot_folder.mkdir(parents=True, exist_ok=True)
+
+    # ----------------------
+    # I/O helpers
+    # ----------------------
+    def _load_aux(self) -> dict[str, Any]:
+        if not self.aux_yaml.exists():
+            msg = f"Aux file not found: {self.aux_yaml}"
+            raise FileNotFoundError(msg)
+        with open(self.aux_yaml) as f:
+            aux = yaml.safe_load(f)
+        self.logger.info("Loaded aux YAML: %s", self.aux_yaml)
+        return aux
+
+    # ----------------------
+    # Public entrypoints
+    # ----------------------
+    def run(self) -> None:
+        results: dict[str, dict[int, dict[str, Any]]] = {}
+        for run_name, meta in self.aux.items():
+            self.logger.info("Starting run: %s", run_name)
+            try:
+                results[run_name] = self.analyze_run(run_name, meta)
+            except FileNotFoundError as e:
+                self.logger.exception("Critical file missing for run %s: %s", run_name, e)
+                raise
+        self._save_results(results)
+        self.logger.info("All runs processed.")
+
+    # ----------------------
+    # Per-run flow
+    # ----------------------
+    def analyze_run(self, run_name: str, meta: dict[str, Any]) -> dict[int, dict[str, Any]]:
+        # build file paths
+        fname = meta["daq"].split("/")[-1].replace("daq", "r020").replace("data", "lh5")
+        f_raw = self.raw_dir / fname
+        if not f_raw.exists():
+            msg = f"Raw file for run {run_name} not found: {f_raw}"
+            raise FileNotFoundError(msg)
+        f_dsp = Path(str(f_raw).replace("raw", "dsp"))
+        if not f_dsp.exists():
+            msg = f"DSP file for run {run_name} not found: {f_dsp}"
+            raise FileNotFoundError(msg)
+
+        run_results: dict[int, dict[str, Any]] = {}
+        pdf_path = self.plot_folder / f"pe_spectra_{run_name}.pdf"
+
+        # collect figures and write once
+        with PdfPages(pdf_path) as pdf:
+            for ch in lh5.ls(f_dsp):
+                pmt = int(ch[2:]) + 1
+                self.logger.info("Run %s - channel %s (PMT %d)", run_name, ch, pmt)
+                try:
+                    fig, chan_data = self.process_channel(run_name, ch, pmt, meta, f_raw, f_dsp)
+                    # fig may be None if plotting skipped
+                    if fig is not None:
+                        pdf.savefig(fig)
+                        plt.close(fig)
+                    run_results[pmt] = chan_data
+                except Exception as exc:
+                    self.logger.exception(
+                        "Channel-level error run=%s ch=%s: %s", run_name, ch, exc
+                    )
+                    run_results[pmt] = {"status": "error", "reason": str(exc)}
+
+        self.logger.info("Wrote PDF for run %s to %s", run_name, pdf_path)
+        return run_results
+
+    # ----------------------
+    # Per-channel processing
+    # ----------------------
+    def process_channel(
+        self,
+        run_name: str,
+        ch: str,
+        pmt: int,
+        meta: dict[str, Any],
+        f_raw: Path,
+        f_dsp: Path,
+    ) -> tuple[plt.Figure | None, dict[str, Any]]:
+        """Process channel. Returns (figure_or_None, channel_result_dict).
+
+        Non-critical failures return a result dict with status 'skipped'
+        """
+        result: dict[str, Any] = {}
+        ch_idx = int(ch[2:])
+        voltage = float(meta["voltages_in_V"][ch_idx])
+        result["voltage"] = {"val": voltage, "unit": "V"}
+
+        # load data
+        try:
+            d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak")
+        except Exception as e:
+            msg = f"Failed to read DSP for {ch} in {f_dsp}: {e}"
+            self.logger.warning(msg)
+            return None, {"status": "skipped", "reason": msg}
+
+        # compute pe-values
+        try:
+            vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False)
+            pe_vals = np.nansum(np.where(vals > self.lim, vals, np.nan), axis=1)
+        except Exception as e:
+            msg = f"Failed to compute pe values for {ch}: {e}"
+            self.logger.warning(msg)
+            return None, {"status": "skipped", "reason": msg}
+
+        # histogram
+        fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
+        n, bins, _ = ax.hist(
+            pe_vals,
+            bins=self.bins,
+            histtype="step",
+            label=f"channel {ch} (PMT {pmt}) at {voltage:.2f} V",
+        )
+
+        bin_centers = 0.5 * (bins[1:] + bins[:-1])
+
+        # noise-only
+        if voltage == 0:
+            self._decorate_axis(ax)
+            # minimal result (no fit)
+            result.update(
+                {"status": "ok", "statistics": {}, "pe_peak_fit": {}, "runtime": {"unit": "s"}}
+            )
+            if "runtime_in_s" in meta:
+                result["runtime"]["aux"] = meta["runtime_in_s"]
+            raw_runtime = self._extract_runtime_if_present(f_raw, ch)
+            if raw_runtime is not None:
+                result["runtime"]["raw"] = raw_runtime
+            return fig, result
+
+        # detect valley & peaks
+        vi = valley_index_strict(n)
+        if vi is None:
+            msg = "Valley detection failed (no strict peak/valley)."
+            self.logger.warning("Run %s ch %s: %s", run_name, ch, msg)
+            self._decorate_axis(ax)
+            return fig, {"status": "skipped", "reason": msg}
+
+        noise_peak, valley_idx = vi
+
+        # find first p.e. peak after noise_peak
+        sub = n[noise_peak:]
+        pe_vi = valley_index_strict(sub)
+        if pe_vi is None:
+            msg = "1st-p.e. detection failed after noise peak."
+            self.logger.warning("Run %s ch %s: %s", run_name, ch, msg)
+            self._decorate_axis(ax)
+            return fig, {"status": "skipped", "reason": msg}
+
+        pe_peak_rel, _ = pe_vi
+        pe_peak_idx = noise_peak + pe_peak_rel
+
+        # annotate choices
+        ax.axvline(bin_centers[valley_idx], color="red", ls="--", label="valley")
+        ax.axvline(bin_centers[pe_peak_idx], color="green", ls="--", label="1st pe guess")
+
+        # fit window
+        x_min = bin_centers[valley_idx]
+        x_max = 2 * bin_centers[pe_peak_idx]
+        mask = (bin_centers >= x_min) & (bin_centers <= x_max)
+        bin_centers_fit = bin_centers[mask]
+        n_fit = n[mask]
+
+        if len(bin_centers_fit) < 3:
+            msg = f"Insufficient bins for fitting (n_fit={len(bin_centers_fit)})."
+            self.logger.warning("Run %s ch %s: %s", run_name, ch, msg)
+            self._decorate_axis(ax)
+            return fig, {"status": "skipped", "reason": msg}
+
+        amp0 = float(n[pe_peak_idx])
+        mu0 = float(bin_centers[pe_peak_idx])
+        sigma0 = 100.0
+
+        try:
+            m = Minuit(
+                lambda amp, mu, sigma: poisson_nll(amp, mu, sigma, bin_centers_fit, n_fit),
+                amp=amp0,
+                mu=mu0,
+                sigma=sigma0,
+            )
+            m.errordef = Minuit.LIKELIHOOD
+            m.migrad(iterate=10)
+        except Exception as e:
+            msg = f"Minuit error for {ch}: {e}"
+            self.logger.warning("Run %s ch %s: %s", run_name, ch, msg)
+            self._decorate_axis(ax)
+            return fig, {"status": "skipped", "reason": msg}
+
+        # Basic validity check
+        if not getattr(m, "valid", True):
+            self.logger.warning("Minuit invalid result for run %s ch %s", run_name, ch)
+            self._decorate_axis(ax)
+            return fig, {"status": "skipped", "reason": "Minuit invalid"}
+
+        fit_vals = {k: float(v) for k, v in m.values.to_dict().items()}
+        fit_errs = {k: float(v) for k, v in m.errors.to_dict().items()}
+
+        result["pe_peak_fit"] = {
+            "mean": {"val": fit_vals["mu"], "err": fit_errs["mu"], "unit": "NNLS"},
+            "sigma": {"val": fit_vals["sigma"], "err": fit_errs["sigma"], "unit": "NNLS"},
+            "amp": {"val": fit_vals["amp"], "err": fit_errs["amp"], "unit": ""},
+        }
+
+        # full fit curve over bins for plotting & integrals
+        y_fit = gaussian(bin_centers, fit_vals["amp"], fit_vals["mu"], fit_vals["sigma"])
+        ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)")
+
+        self._decorate_axis(ax)
+
+        # statistics
+        try:
+            result["statistics"] = {
+                "1st_pe_fit_integral_below_valley": {
+                    "val": float(np.sum(y_fit[:valley_idx])),
+                    "unit": "",
+                },
+                "cts_above_valley": {"val": int(np.sum(n[:valley_idx])), "unit": ""},
+                "cts_below_valley": {"val": int(np.sum(n[valley_idx:])), "unit": ""},
+                "1st_pe_fit_integral": {"val": int(float(np.sum(y_fit))), "unit": ""},
+                "total_cts": {"val": int(np.sum(n)), "unit": ""},
+                "valley": {
+                    "pos": {"val": float(bin_centers[valley_idx]), "unit": "NNLS"},
+                    "amp": {"val": int(n[valley_idx]), "unit": ""},
+                },
+                "1st_pe_guess": {
+                    "pos": {"val": float(mu0), "unit": "NNLS"},
+                    "amp": {"val": int(amp0), "unit": ""},
+                },
+            }
+        except Exception as e:
+            self.logger.warning(
+                "Statistics computation failed for run %s ch %s: %s", run_name, ch, e
+            )
+            result.setdefault("statistics", {})
+            result["statistics"]["error"] = str(e)
+
+        # runtime
+        result["runtime"] = {"unit": "s"}
+        if "runtime_in_s" in meta:
+            result["runtime"]["aux"] = meta["runtime_in_s"]
+        raw_runtime = self._extract_runtime_if_present(f_raw, ch)
+        if raw_runtime is not None:
+            result["runtime"]["raw"] = raw_runtime
+
+        result["status"] = "ok"
+        return fig, result
+
+    # ----------------------
+    # Utilities
+    # ----------------------
+    def _decorate_axis(self, ax: plt.Axes) -> None:
+        ax.set_xlim(-10, 2.5e3)
+        ax.set_ylim(0.5, None)
+        ax.set_yscale("log")
+        ax.set_ylabel(f"Counts/{self.bin_size} NNLS units")
+        ax.set_xlabel("NNLS units")
+        ax.set_title(f"pygama-NNLS reconstruction ({self.lim} units solution cut-off)")
+        ax.legend()
+
+    def _extract_runtime_if_present(self, f_raw: Path, ch: str) -> float | None:
+        try:
+            entries = set(lh5.ls(f_raw, f"{ch}/raw/"))
+        except Exception:
+            entries = set()
+
+        sec_key = f"{ch}/raw/timestamp_sec"
+        ps_key = f"{ch}/raw/timestamp_ps"
+        if sec_key in entries and ps_key in entries:
+            try:
+                ts = lh5.read_as(sec_key, f_raw, "np")
+                ts_ps = lh5.read_as(ps_key, f_raw, "np")
+                idx_max = int(ts.argmax())
+                idx_min = int(ts.argmin())
+                return float(
+                    ts[idx_max] + ts_ps[idx_max] * 1e-12 - (ts[idx_min] + ts_ps[idx_min] * 1e-12)
+                )
+            except Exception:
+                self.logger.debug("Failed to extract new-style timestamps for %s in %s", ch, f_raw)
+
+        legacy_key = f"{ch}/raw/timestamp"
+        if legacy_key in entries:
+            try:
+                t = lh5.read_as(legacy_key, f_raw, "np")
+                return float((t.max() - t.min()) * 4.8e-9)
+            except Exception:
+                self.logger.debug("Failed to extract legacy timestamps for %s in %s", ch, f_raw)
+
+        return None
+
+    def _save_results(self, results: dict[str, dict[int, dict[str, Any]]]) -> None:
+        if self.result_yaml.exists():
+            with open(self.result_yaml) as f:
+                existing = yaml.safe_load(f) or {}
+            if "pe_spectrum" in existing and not self.override_results:
+                msg = "Results already present and override flag is False."
+                raise RuntimeError(msg)
+            existing["pe_spectrum"] = results
+            with open(self.result_yaml, "w") as f:
+                yaml.safe_dump(existing, f, default_flow_style=False)
+            self.logger.info("Updated result YAML at %s", self.result_yaml)
+        else:
+            with open(self.result_yaml, "w") as f:
+                yaml.safe_dump({"pe_spectrum": results}, f, default_flow_style=False)
+            self.logger.info("Wrote new result YAML at %s", self.result_yaml)
+
+    # ----------------------
+    # Signal to Noise Ratio (SNR)
+    # ----------------------
+    def plot_snr(self) -> None:
+        if not self.result_yaml.exists():
+            msg = f"Result YAML not found: {self.result_yaml}"
+            raise FileNotFoundError(msg)
+        with open(self.result_yaml) as f:
+            data = yaml.safe_load(f) or {}
+        if "pe_spectrum" not in data:
+            msg = "pe_spectrum info not present in result YAML; run analysis first."
+            raise RuntimeError(msg)
+
+        snr = {}
+        for run_name, pmt_dict in data["pe_spectrum"].items():
+            run_snr = {}
+            for pmt, info in pmt_dict.items():
+                try:
+                    if info.get("voltage", {}).get("val", 0) == 0:
+                        continue
+                    noise = {
+                        "val": info.get("statistics", {})
+                        .get("valley", {})
+                        .get("amp", {})
+                        .get("val", 0.0)
+                    }
+                    noise["err"] = noise["val"] ** 0.5
+                    if info.get("status", "skipped") == "ok":
+                        signal = {
+                            "val": info.get("pe_peak_fit").get("amp", {}).get("val", 0.0),
+                            "err": info.get("pe_peak_fit").get("amp", {}).get("err", 0.0),
+                        }
+                    else:
+                        signal = {
+                            "val": info.get("statistics", {})
+                            .get("1st_pe_guess", {})
+                            .get("amp", {})
+                            .get("val", 0)
+                        }
+                        signal["err"] = signal["val"] ** 0.5
+                    run_snr[pmt] = {
+                        "val": 1 - noise["val"] / signal["val"],
+                        "err": (
+                            noise["err"] ** 2 / signal["val"] ** 2
+                            + (noise["val"] ** 2 * signal["err"] ** 2) / signal["val"] ** 4
+                        ),
+                        "unit": "",
+                    }
+                except Exception as e:
+                    self.logger.warning(
+                        "Failed to compute SNR for run %s PMT %s: %s", run_name, pmt, e
+                    )
+            snr[run_name] = run_snr
+
+        fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
+        for run_name, pmt_dict in snr.items():
+            pmts = sorted(pmt_dict.keys())
+            vals = [pmt_dict[p]["val"] for p in pmts]
+            ax.plot(pmts, vals, marker="o", label=run_name)
+        ax.set_xlabel("PMT")
+        ax.set_ylabel("SNR (a.u.)")
+        ax.set_title("SNR per PMT (= 1 - valley/peak)")
+        ax.legend()
+        plt.tight_layout()
+        plot_path = self.plot_folder / "snr_plot.png"
+        fig.savefig(plot_path)
+        plt.close(fig)
+        self.logger.info("SNR plot saved to %s", plot_path)
+
+    # ----------------------
+    # Dark Count Rate (DCR)
+    # ----------------------
+    def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
+        if not self.result_yaml.exists():
+            msg = f"Result YAML not found: {self.result_yaml}"
+            raise FileNotFoundError(msg)
+        with open(self.result_yaml) as f:
+            data = yaml.safe_load(f) or {}
+        if "pe_spectrum" not in data:
+            msg = "pe_spectrum info not present in result YAML; run analysis first."
+            raise RuntimeError(msg)
+        if "dcr" in data and not self.override_results:
+            msg = "DCR already exists and override flag is False."
+            raise RuntimeError(msg)
+        dcr = {}
+        for run_name, pmt_dict in data["pe_spectrum"].items():
+            run_dcr = {}
+            for pmt, info in pmt_dict.items():
+                try:
+                    if info.get("voltage", {}).get("val", 0) == 0:
+                        continue
+                    stats = info.get("statistics", {})
+                    runtime_info = info.get("runtime", {})
+                    if time_mode not in runtime_info:
+                        self.logger.warning(
+                            "Run %s PMT %s: missing runtime '%s'; skipping DCR.",
+                            run_name,
+                            pmt,
+                            time_mode,
+                        )
+                        continue
+                    dcts = stats.get("1st_pe_fit_integral_below_valley", {}).get(
+                        "val", 0.0
+                    ) + stats.get("cts_above_valley", {}).get("val", 0)
+                    runtime = runtime_info[time_mode]
+                    run_dcr[pmt] = {
+                        "val": float(dcts) / float(runtime),
+                        "err": float(dcts**0.5) / float(runtime),
+                        "unit": "Hz",
+                    }
+                except Exception as e:
+                    self.logger.warning(
+                        "Failed to compute DCR for run %s PMT %s: %s", run_name, pmt, e
+                    )
+            dcr[run_name] = run_dcr
+        data["dcr"] = dcr
+        with open(self.result_yaml, "w") as f:
+            yaml.safe_dump(data, f, default_flow_style=False)
+        self.logger.info("Wrote DCR results to %s", self.result_yaml)
+
+        fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
+        for run_name, pmt_dict in dcr.items():
+            pmts = sorted(pmt_dict.keys())
+            vals = [pmt_dict[p]["val"] for p in pmts]
+            ax.plot(pmts, vals, marker="o", label=run_name)
+        ax.set_xlabel("PMT")
+        ax.set_ylabel("DCR (Hz)")
+        ax.set_title("Dark Count Rate per PMT")
+        ax.legend()
+        plt.tight_layout()
+        plot_path = self.plot_folder / "dcr_plot.png"
+        fig.savefig(plot_path)
+        plt.close(fig)
+        self.logger.info("DCR plot saved to %s", plot_path)
+
+    # ----------------------
+    # Linear Gain Fit computation
+    # ----------------------
+    def compute_linear_gain_fit(self) -> None:
+        if not self.result_yaml.exists():
+            msg = f"Result YAML not found: {self.result_yaml}"
+            raise FileNotFoundError(msg)
+        with open(self.result_yaml) as f:
+            data = yaml.safe_load(f) or {}
+        if "pe_spectrum" not in data:
+            msg = "pe_spectrum info not present in result YAML; run analysis first."
+            raise RuntimeError(msg)
+
+        tmp_dic = {"used_keys": []}
+        y_unit = None
+        x_unit = None
+        for key, run in data["pe_spectrum"].items():
+            tmp_dic["used_keys"].append(key)
+            for pmt in run:
+                if pmt not in tmp_dic:
+                    tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []}
+                v = run[pmt]["voltage"]["val"]
+                xu = run[pmt]["voltage"]["unit"]
+                if x_unit is None:
+                    x_unit = xu
+                else:
+                    assert x_unit == xu
+                if v == 0:
+                    continue
+                tmp_dic[pmt]["voltage"].append(v)
+                tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]["val"])
+                tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak_fit"]["mean"]["err"])
+                yu = run[pmt]["pe_peak_fit"]["mean"]["unit"]
+                if y_unit is None:
+                    y_unit = yu
+                else:
+                    assert y_unit == yu
+
+        pdf_path = self.plot_folder / "gain_plots.pdf"
+        with PdfPages(pdf_path) as pdf:
+            for key, pmt in tmp_dic.items():
+                if key == "used_keys":
+                    continue
+                fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
+                ax.errorbar(pmt["voltage"], pmt["vals"], pmt["errs"], fmt="o", label=f"PMT {key}")
+                ax.set_xlabel(f"Voltage ({x_unit})")
+                ax.set_ylabel(f"PMT position ({y_unit})")
+                params, covariance = curve_fit(
+                    linear_func,
+                    pmt["voltage"],
+                    pmt["vals"],
+                    sigma=pmt["errs"],
+                    absolute_sigma=True,
+                )
+                a_opt, b_opt = params
+                perr = np.sqrt(np.diag(covariance))
+                x = np.linspace(-1 * b_opt / a_opt, max(pmt["voltage"]) + 10, 1000)
+                ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit")
+                tmp_dic[key]["a"] = {
+                    "val": float(a_opt),
+                    "err": float(perr[0]),
+                    "unit": f"{y_unit}/{x_unit}",
+                }
+                tmp_dic[key]["b"] = {
+                    "val": float(b_opt),
+                    "err": float(perr[1]),
+                    "unit": f"{y_unit}/{x_unit}",
+                }
+                tmp_dic[key]["func"] = "G = a*voltage+b"
+                pmt.pop("voltage")
+                pmt.pop("vals")
+                pmt.pop("errs")
+                ax.legend()
+                pdf.savefig(fig)
+                plt.close(fig)
+        data["linear_gain"] = tmp_dic
+        with open(self.result_yaml, "w") as f:
+            yaml.safe_dump(data, f, default_flow_style=False)
+        self.logger.info("Linear gain fit results saved to %s", self.result_yaml)
+
+
+# --------------------------
+# CLI entrypoint
+# --------------------------
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="PE Spectrum Analyzer with DCR and linear gain fit"
+    )
+    parser.add_argument(
+        "-p", "--compute-pe", action="store_true", help="Do p.e. spectrum analysis"
+    )
+    parser.add_argument(
+        "-d", "--compute-dcr", action="store_true", help="Compute DCR after analysis"
+    )
+    parser.add_argument(
+        "-g", "--compute-gain", action="store_true", help="Compute linear gain fit after analysis"
+    )
+    parser.add_argument(
+        "-s", "--compute-snr", action="store_true", help="Compute SNR after analysis"
+    )
+    args = parser.parse_args()
+
+    logger = setup_logging(LOG_FILE, level=logging.INFO)
+    try:
+        analyzer = PESpectrumAnalyzer(logger=logger)
+        if args.compute_pe:
+            analyzer.run()
+        if args.compute_dcr:
+            analyzer.compute_dark_count_rate(time_mode="aux")
+        if args.compute_gain:
+            analyzer.compute_linear_gain_fit()
+        if args.compute_snr:
+            analyzer.plot_snr()
+
+        logger.info("Processing complete.")
+    except Exception as e:
+        logger.exception("Fatal error: %s", e)
+        raise
+
+
+# --------------------------
+# CLI entrypoint
+# --------------------------
+if __name__ == "__main__":
+    main()
diff --git a/src/mintanalysis/pmt/ana/dcr_analysis.py b/src/mintanalysis/pmt/ana/dcr_analysis.py
deleted file mode 100644
index f982a23..0000000
--- a/src/mintanalysis/pmt/ana/dcr_analysis.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import os
-
-import yaml
-
-
-def linear_func(x, a, b):
-    return a * x + b
-
-
-if __name__ == "__main__":
-
-    f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml"
-    override_results = True
-    time_mode = "aux"
-
-    if os.path.exists(f_result):
-        with open(f_result) as file:
-            result_dic = yaml.safe_load(file)
-    else:
-        raise RuntimeError("File does not exist")
-
-    if "pe_spectrum" not in result_dic.keys():
-        raise RuntimeError("pe_spectrum info not present!")
-
-    if "dcr" in result_dic and not override_results:
-        raise RuntimeError("results already exist and override flag not set")
-    result_dic["dcr"] = {}
-    for rk, run in result_dic["pe_spectrum"].items():
-        result_dic["dcr"][rk] = {}
-        for key, pmt in run.items():
-            if pmt["voltage"]["val"] == 0:
-                continue
-            if "dcr" in pmt and not override_results:
-                print(
-                    f"DCR key exists but override flag is not set! --> skipping {pmt} in run {run}"
-                )
-                continue
-            dcts = (
-                pmt["statistics"]["1st_pe_fit_integral_below_valley"]["val"]
-                + pmt["statistics"]["cts_above_valley"]["val"]
-            )
-            result_dic["dcr"][rk][key] = {
-                "val": dcts / pmt["runtime"][time_mode],
-                "err": (dcts**0.5) / pmt["runtime"][time_mode],
-                "unit": "Hz",
-            }
-
-
-with open(f_result, "w") as file:
-    yaml.safe_dump(result_dic, file, default_flow_style=False)
diff --git a/src/mintanalysis/pmt/ana/linear_gain_analysis.py b/src/mintanalysis/pmt/ana/linear_gain_analysis.py
deleted file mode 100644
index 39a101a..0000000
--- a/src/mintanalysis/pmt/ana/linear_gain_analysis.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import os
-
-import matplotlib.pyplot as plt
-import numpy as np
-import yaml
-from matplotlib.backends.backend_pdf import PdfPages
-from scipy.optimize import curve_fit
-
-
-def linear_func(x, a, b):
-    return a * x + b
-
-
-if __name__ == "__main__":
-
-    f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml"
-    with open(f_result) as file:
-        result_dic = yaml.safe_load(file)
-
-    out_folder = "/home/pkrause/software/mint-analysis/debug_out/gain/"
-
-    bin_size = 20
-    bins = np.arange(-100, 10000, bin_size)
-    lim = 20
-    A4_LANDSCAPE = (11.69, 8.27)
-
-    override_results = True
-
-    if os.path.exists(f_result):
-        with open(f_result) as file:
-            result_dic = yaml.safe_load(file)
-    else:
-        raise RuntimeError("File does not exist")
-
-    if "pe_spectrum" not in result_dic:
-        raise RuntimeError("pe_spectrum info not present!")
-
-    if "linear_gain" in result_dic and not override_results:
-        raise RuntimeError("results already exist and override flag not set")
-
-    ######################
-    # Gain calculation   #
-    ######################
-    tmp_dic = {"used_keys": []}
-    y_unit = None
-    x_unit = None
-    for key, run in result_dic["pe_spectrum"].items():
-        tmp_dic["used_keys"].append(key)
-        for pmt in run:
-            if pmt not in tmp_dic:
-                tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []}
-            v = run[pmt]["voltage"]["val"]
-            xu = run[pmt]["voltage"]["unit"]
-            if x_unit is None:
-                x_unit = xu
-            else:
-                assert x_unit == xu
-            if v == 0:
-                continue
-
-            tmp_dic[pmt]["voltage"].append(v)
-            tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]["val"])
-            tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak_fit"]["mean"]["err"])
-
-            yu = run[pmt]["pe_peak_fit"]["mean"]["unit"]
-            if y_unit is None:
-                y_unit = yu
-            else:
-                assert y_unit == yu
-
-    with PdfPages(out_folder + "gain_plots.pdf") as pdf:
-        for key, pmt in tmp_dic.items():
-            if key == "used_keys":
-                continue
-            fig, ax = plt.subplots()
-            fig.set_figwidth(A4_LANDSCAPE[0])
-            fig.set_figheight(A4_LANDSCAPE[1])
-            ax.errorbar(
-                pmt["voltage"],
-                pmt["vals"],
-                pmt["errs"],
-                label=f"PMT {key}",
-                fmt="o",
-            )
-            ax.set_ylabel(f"PMT position ({y_unit})")
-            ax.set_xlabel(f"Voltage ({x_unit})")
-
-            params, covariance = curve_fit(
-                linear_func,
-                pmt["voltage"],
-                pmt["vals"],
-                sigma=pmt["errs"],
-                absolute_sigma=True,
-            )
-            a_opt, b_opt = params
-            perr = np.sqrt(np.diag(covariance))
-            x = np.linspace(-1 * b_opt / a_opt, 110, 1000)
-            ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit")
-
-            tmp_dic[key]["a"] = {
-                "val": float(a_opt),
-                "err": float(perr[0]),
-                "unit": f"{y_unit}/{x_unit}",
-            }
-            tmp_dic[key]["b"] = {
-                "val": float(b_opt),
-                "err": float(perr[1]),
-                "unit": f"{y_unit}/{x_unit}",
-            }
-            tmp_dic[key]["func"] = "G = a*voltage+b"
-
-            pmt.pop("errs")
-            pmt.pop("vals")
-            pmt.pop("voltage")
-            ax.legend()
-            pdf.savefig()
-            plt.close()
-
-    result_dic["linear_gain"] = tmp_dic
-
-
-with open(f_result, "w") as file:
-    yaml.safe_dump(result_dic, file, default_flow_style=False)
diff --git a/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py b/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py
deleted file mode 100644
index 3a179b2..0000000
--- a/src/mintanalysis/pmt/ana/pe_spectrum_analysis.py
+++ /dev/null
@@ -1,242 +0,0 @@
-import os
-
-import awkward as ak
-import matplotlib.pyplot as plt
-import numpy as np
-import yaml
-from iminuit import Minuit
-from lgdo import lh5
-from matplotlib.backends.backend_pdf import PdfPages
-
-
-def valley_index_strict(y):
-    """
-    Return the index of the valley between the first strict peak
-    and the next rise. Returns None if not found.
-    """
-    n = len(y)
-    if n < 3:
-        return None
-
-    # 1. Find first strict peak
-    peak = None
-    for i in range(1, n - 1):
-        if y[i] > y[i - 1] and y[i] > y[i + 1]:
-            peak = i
-            break
-    if peak is None:
-        return None
-
-    # 2. Walk downward and track the minimum
-    valley = peak
-    i = peak + 1
-
-    while i < n:
-        if y[i] > y[i - 1]:  # rising again → done
-            break
-        if y[i] < y[valley]:
-            valley = i
-        i += 1
-
-    if i == n:  # never rose again
-        return None
-
-    return peak, valley
-
-
-def nll(amp, mu, sigma, bin_centers_fit, n_fit):
-    """Poisson NLL for binned data"""
-    expected = gaussian(bin_centers_fit, amp, mu, sigma)
-    expected[expected <= 0] = 1e-10  # avoid log(0)
-    return np.sum(expected - n_fit * np.log(expected))
-
-
-def gaussian(x, amp, mu, sigma):
-    return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
-
-
-def linear_func(x, a, b):
-    return a * x + b
-
-
-if __name__ == "__main__":
-
-    f_aux = "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml"
-    f_result = "/home/pkrause/software/mint-analysis/debug_out/results.yaml"
-    with open(f_aux) as file:
-        aux_dict = yaml.safe_load(file)
-
-    raw_path = (
-        "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020/r020"
-    )
-    plot_folder = "/home/pkrause/software/mint-analysis/debug_out/pe_spectra/"
-
-    override_results = True
-
-    bin_size = 20
-    bins = np.arange(-100, 10000, bin_size)
-    lim = 20
-    A4_LANDSCAPE = (11.69, 8.27)
-    result_dic = {}
-
-    for run in aux_dict:
-        result_dic[run] = {}
-        f_raw = raw_path + aux_dict[run]["daq"].split("/")[-1].replace("daq", "").replace(
-            "data", "lh5"
-        )
-        f_dsp = f_raw.replace("raw", "dsp")
-
-        with PdfPages(plot_folder + f"pe_spectra_{run}.pdf") as pdf:
-            for ch in lh5.ls(f_dsp):
-                pmt = int(ch[2:]) + 1
-                result_dic[run][pmt] = {}
-                result_dic[run][pmt]["voltage"] = {
-                    "val": aux_dict[run]["voltages_in_V"][int(ch[2:])],
-                    "unit": "V",
-                }
-                fig, ax = plt.subplots()
-                fig.set_figwidth(A4_LANDSCAPE[0])
-                fig.set_figheight(A4_LANDSCAPE[1])
-
-                d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak")
-                vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False)
-                pe_vals = np.nansum(np.where(vals > lim, vals, np.nan), axis=1)
-
-                n, bins, patches = ax.hist(
-                    pe_vals,
-                    bins=bins,
-                    histtype="step",
-                    label=f"channel {ch} ON ({result_dic[run][pmt]['voltage']['val']:.2f} {result_dic[run][pmt]['voltage']['unit']})",
-                )
-
-                if aux_dict[run]["voltages_in_V"][int(ch[2:])] == 0:
-                    ax.set_xlim(-10, 2.5e3)
-                    ax.set_ylim(0.5, None)
-                    ax.set_yscale("log")
-                    ax.set_ylabel(f"Counts/{bin_size} NNLS units")
-                    ax.legend()
-                    ax.set_xlabel("NNLS units")
-                    ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)")
-
-                    pdf.savefig()
-                    plt.close()
-                    continue
-
-                ######################
-                # Fit p.e. peak      #
-                ######################
-
-                noise_peak, valley_idx = valley_index_strict(n)
-                pe_peak, _ = valley_index_strict(n[noise_peak:])
-                ax.axvline(bins[valley_idx], color="red", ls="--")
-                ax.axvline(bins[noise_peak:][pe_peak], color="green", ls="--")
-
-                bin_centers = 0.5 * (bins[1:] + bins[:-1])
-                # Restrict range
-                x_min, x_max = (
-                    bins[valley_idx],
-                    2 * bins[noise_peak:][pe_peak],
-                )  # - bins[valley_idx]
-                mask = (bin_centers >= x_min) & (bin_centers <= x_max)
-
-                bin_centers_fit = bin_centers[mask]
-                n_fit = n[mask]
-
-                # Initial guesses
-                amp0 = n[noise_peak:][pe_peak]
-                mu0 = bins[noise_peak:][pe_peak]
-                sigma0 = 100.0
-
-                m = Minuit(
-                    lambda amp, mu, sigma: nll(
-                        amp, mu, sigma, bin_centers_fit=bin_centers_fit, n_fit=n_fit
-                    ),
-                    amp=amp0,
-                    mu=mu0,
-                    sigma=sigma0,
-                )
-                m.errordef = Minuit.LIKELIHOOD  # important for Poisson NLL
-                m.migrad(iterate=10)  # perform minimization
-
-                fit_vals = m.values.to_dict()
-                fit_errs = m.errors.to_dict()
-
-                result_dic[run][pmt]["pe_peak_fit"] = {
-                    "mean": {"val": fit_vals["mu"], "err": fit_errs["mu"], "unit": "NNLS"},
-                    "sigma": {"val": fit_vals["sigma"], "err": fit_errs["sigma"], "unit": "NNLS"},
-                    "amp": {"val": fit_vals["amp"], "err": fit_errs["amp"], "unit": ""},
-                }
-
-                y_fit = gaussian(
-                    bin_centers, amp=m.values["amp"], mu=m.values["mu"], sigma=m.values["sigma"]
-                )
-
-                ax.plot(bin_centers, y_fit, "r-", label="NLL fit (Minuit)")
-
-                ax.set_xlim(-10, 2.5e3)
-                ax.set_ylim(0.5, None)
-                ax.set_yscale("log")
-                ax.set_ylabel(f"Counts/{bin_size} NNLS units")
-                ax.legend()
-                ax.set_xlabel("NNLS units")
-                ax.set_title("pygama-NNLS reconstruction (20 units solution cut-off)")
-
-                pdf.savefig()
-                plt.close()
-
-                ######################
-                # spectrum values    #
-                ######################
-
-                result_dic[run][pmt]["statistics"] = {
-                    "1st_pe_fit_integral_below_valley": {
-                        "val": float(np.sum(y_fit[:valley_idx])),
-                        "unit": "",
-                    },
-                    "cts_above_valley": {"val": int(np.sum(n[:valley_idx])), "unit": ""},
-                    "cts_below_valley": {"val": int(np.sum(n[valley_idx:])), "unit": ""},
-                    "1st_pe_fit_integral": {"val": int(float(np.sum(y_fit))), "unit": ""},
-                    "total_cts": {"val": int(np.sum(n)), "unit": ""},
-                    "valley": {
-                        "pos": {"val": float(bin_centers[valley_idx]), "unit": "NNLS"},
-                        "amp": int(n[valley_idx]),
-                    },
-                    "1st_pe_guess": {
-                        "pos": {"val": float(mu0), "unit": "NNLS"},
-                        "amp": {"val": int(amp0), "unit": ""},
-                    },
-                }
-
-                result_dic[run][pmt]["runtime"] = {"unit": "s"}
-                if "runtime_in_s" in aux_dict[run]:
-                    result_dic[run][pmt]["runtime"]["aux"] = aux_dict[run]["runtime_in_s"]
-
-                if f"{ch}/raw/timestamp_sec" in lh5.ls(
-                    f_raw, f"{ch}/raw/"
-                ) and f"{ch}/raw/timestamp_ps" in lh5.ls(f_raw, f"{ch}/raw/"):
-                    ts = lh5.read_as(f"{ch}/raw/timestamp_sec", f_raw, "np")
-                    ts_ps = lh5.read_as(f"{ch}/raw/timestamp_ps", f_raw, "np")
-                    result_dic[run][pmt]["runtime"]["raw"] = float(
-                        ts[ts.argmax()]
-                        + ts_ps[ts.argmax()] * 1e-12
-                        - (ts[ts.argmin()] + ts_ps[ts.argmin()] * 1e-12)
-                    )
-
-                # support for old raw files
-                elif f"{ch}/raw/timestamp" in lh5.ls(f_raw, f"{ch}/raw/"):
-                    ts = lh5.read_as(f"{ch}/raw/timestamp", f_raw, "np")
-                    result_dic[run][pmt]["runtime"]["raw"] = float((ts.max() - ts.min()) * 4.8e-9)
-
-if os.path.exists(f_result):
-    with open(f_result) as file:
-        tmp_dic = yaml.safe_load(file)
-    if "pe_spectrum" in tmp_dic.keys() and not override_results:
-        raise RuntimeError("Already exists and override flag is not set")
-    else:
-        tmp_dic["pe_spectrum"] = result_dic
-        with open(f_result, "w") as file:
-            yaml.safe_dump(tmp_dic, file, default_flow_style=False)
-
-else:
-    with open(f_result, "w") as file:
-        yaml.safe_dump({"pe_spectrum": result_dic}, file, default_flow_style=False)

From 0727907801ff714f82aedc92dd6c78504a6b800a Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Thu, 27 Nov 2025 18:05:49 -0800
Subject: [PATCH 10/32] Add functionality to convert NNLS units to other units

---
 src/mintanalysis/pmt/ana/analyser_class.py | 187 ++++++++++++++++++++-
 1 file changed, 186 insertions(+), 1 deletion(-)

diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py
index badab7f..f4cae76 100644
--- a/src/mintanalysis/pmt/ana/analyser_class.py
+++ b/src/mintanalysis/pmt/ana/analyser_class.py
@@ -144,6 +144,11 @@ def __init__(
         lim: int = LIM,
         override_results: bool = OVERRIDE_RESULTS,
         logger: logging.Logger | None = None,
+        up_sampling_ratio: float = 24 / 240,
+        v_per_adc: float = 0.25e-3,
+        adc_impedance: float = 75.0,
+        sampling_time: float = 4.8e-9,
+        calib: str = "None",
     ) -> None:
         self.aux_yaml = aux_yaml
         self.raw_dir = raw_dir
@@ -156,6 +161,11 @@ def __init__(
         self.logger = logger or setup_logging()
         self.aux = self._load_aux()
         self.plot_folder.mkdir(parents=True, exist_ok=True)
+        self.up_sampling_ratio = up_sampling_ratio
+        self.v_per_adc = v_per_adc
+        self.adc_impedance = adc_impedance
+        self.sampling_time = sampling_time
+        self.calib = calib
 
     # ----------------------
     # I/O helpers
@@ -270,6 +280,9 @@ def process_channel(
             label=f"channel {ch} (PMT {pmt}) at {voltage:.2f} V",
         )
 
+        if self.calib != "None":
+            self._add_charge_axis(ax, False)
+
         bin_centers = 0.5 * (bins[1:] + bins[:-1])
 
         # noise-only
@@ -461,6 +474,71 @@ def _save_results(self, results: dict[str, dict[int, dict[str, Any]]]) -> None:
                 yaml.safe_dump({"pe_spectrum": results}, f, default_flow_style=False)
             self.logger.info("Wrote new result YAML at %s", self.result_yaml)
 
+    def _add_charge_axis(self, ax: plt.Axes, is_y) -> None:
+        """
+        This function add a axis in new units to a given axis plot.
+
+        Parameters
+        ----------
+        ax : plt.Axes
+            The plot to which to add the pC axis.
+        is_y : bool
+            If True add a new y-axis. Else an x-axis.
+        """
+        if self.calib not in ["pC", "adc", "gain"]:
+            self.logger.warning("Invalid calibration unit, not calibrating.")
+            return
+
+        label = "Charge "
+        if self.calib == "pC":
+            func = (
+                lambda x: x
+                * (
+                    (self.v_per_adc * self.up_sampling_ratio * self.sampling_time)
+                    / self.adc_impedance
+                )
+                * 1e12,
+                lambda y: y
+                / (
+                    (
+                        (self.v_per_adc * self.up_sampling_ratio * self.sampling_time)
+                        / self.adc_impedance
+                    )
+                    * 1e12
+                ),
+            )
+            label += "(pC)"
+
+        elif self.calib == "gain":
+            label = "Gain (a.u.)"
+            elem = 1.602e-19  # C (elementary charge)
+            func = (
+                lambda x: x
+                * (
+                    (self.v_per_adc * self.up_sampling_ratio * self.sampling_time)
+                    / self.adc_impedance
+                )
+                / elem,
+                lambda y: y
+                / (
+                    (
+                        (self.v_per_adc * self.up_sampling_ratio * self.sampling_time)
+                        / self.adc_impedance
+                    )
+                    / elem
+                ),
+            )
+
+        elif self.calib == "adc":
+            label += f"(ADC x {self.sampling_time*1e9:.1f} ns)"
+            func = (lambda x: x * self.up_sampling_ratio, lambda y: y / self.up_sampling_ratio)
+        if is_y:
+            secax_y = ax.secondary_yaxis("right", functions=func)
+            secax_y.set_ylabel(label)
+        else:
+            secax_x = ax.secondary_xaxis("top", functions=func)
+            secax_x.set_xlabel(label)
+
     # ----------------------
     # Signal to Noise Ratio (SNR)
     # ----------------------
@@ -643,6 +721,10 @@ def compute_linear_gain_fit(self) -> None:
                 ax.errorbar(pmt["voltage"], pmt["vals"], pmt["errs"], fmt="o", label=f"PMT {key}")
                 ax.set_xlabel(f"Voltage ({x_unit})")
                 ax.set_ylabel(f"PMT position ({y_unit})")
+
+                if self.calib != "None":
+                    self._add_charge_axis(ax, True)
+
                 params, covariance = curve_fit(
                     linear_func,
                     pmt["voltage"],
@@ -676,6 +758,72 @@ def compute_linear_gain_fit(self) -> None:
             yaml.safe_dump(data, f, default_flow_style=False)
         self.logger.info("Linear gain fit results saved to %s", self.result_yaml)
 
+    def calibrate_nnls_values(self, calibration_func, output_file, new_unit):
+        """
+        Reads the current results.yaml, finds all entries that contain a dict with
+        keys {"val": <number>, "unit": "NNLS"}, applies calibration_func(val)
+        and writes a calibrated result file.
+        """
+        factor = calibration_func(1)
+        try:
+            with open(self.result_yaml) as f:
+                data = yaml.safe_load(f)
+        except Exception as e:
+            msg = f"Failed to load file: {e}"
+            self.logger.error(msg)
+            raise
+
+        def classify(unit: str):
+            unit = str(unit)
+            if unit == "NNLS" or unit.startswith("NNLS/"):
+                return "nnls"
+            if "/NNLS" in unit:
+                return "per_nnls"
+            if "NNLS" in unit:
+                return "nnls"
+            return None
+
+        def recurse(obj):
+            if isinstance(obj, dict):
+                if set(obj.keys()) >= {"val", "unit"}:
+                    kind = classify(obj.get("unit"))
+                    if kind == "nnls":
+                        try:
+                            obj["val"] = obj["val"] * factor
+                            if "err" in obj:
+                                obj["err"] = obj["err"] * factor
+                            obj["unit"] = obj["unit"].replace("NNLS", new_unit)
+                        except Exception as e:
+                            msg = f"NNLS calibration failed for value {obj}: {e}"
+                            self.logger.error(msg)
+                    elif kind == "per_nnls":
+                        try:
+                            obj["val"] = obj["val"] / factor
+                            if "err" in obj:
+                                obj["err"] = obj["err"] / factor
+                            obj["unit"] = obj["unit"].replace("NNLS", new_unit)
+                        except Exception as e:
+                            msg = f"per-NNLS calibration failed for value {obj}: {e}"
+                            self.logger.error(msg)
+                for _, v in obj.items():
+                    recurse(v)
+            elif isinstance(obj, list):
+                for item in obj:
+                    recurse(item)
+
+        recurse(data)
+
+        try:
+            with open(output_file, "w") as f:
+                yaml.safe_dump(data, f, default_flow_style=False)
+        except Exception as e:
+            msg = f"Failed to write calibrated file: {e}"
+            self.logger.error(msg)
+            raise
+
+        msg = f"Calibrated results written to {output_file}"
+        self.logger.info(msg)
+
 
 # --------------------------
 # CLI entrypoint
@@ -698,11 +846,18 @@ def main() -> None:
     parser.add_argument(
         "-s", "--compute-snr", action="store_true", help="Compute SNR after analysis"
     )
+    parser.add_argument(
+        "-c",
+        "--calibrate",
+        default="None",
+        choices=["pC", "adc", "gain", "None"],
+        help="Choose a charge calibration value (pC, adc, gain, or None)",
+    )
     args = parser.parse_args()
 
     logger = setup_logging(LOG_FILE, level=logging.INFO)
     try:
-        analyzer = PESpectrumAnalyzer(logger=logger)
+        analyzer = PESpectrumAnalyzer(logger=logger, calib=args.calibrate)
         if args.compute_pe:
             analyzer.run()
         if args.compute_dcr:
@@ -711,6 +866,36 @@ def main() -> None:
             analyzer.compute_linear_gain_fit()
         if args.compute_snr:
             analyzer.plot_snr()
+        if args.calibrate != "None":
+            if args.calibrate == "pC":
+                analyzer.calibrate_nnls_values(
+                    lambda x: x
+                    * (
+                        (analyzer.v_per_adc * analyzer.up_sampling_ratio * analyzer.sampling_time)
+                        / analyzer.adc_impedance
+                    )
+                    * 1e12,
+                    str(analyzer.result_yaml).replace(".yaml", "_pC_calibrated.yaml"),
+                    "pC",
+                )
+            elif args.calibrate == "adc":
+                analyzer.calibrate_nnls_values(
+                    lambda x: x * analyzer.up_sampling_ratio,
+                    str(analyzer.result_yaml).replace(".yaml", "_adc_calibrated.yaml"),
+                    "ADC",
+                )
+            elif args.calibrate == "gain":
+                elem = 1.602e-19  # C (elementary charge)
+                analyzer.calibrate_nnls_values(
+                    lambda x: x
+                    * (
+                        (analyzer.v_per_adc * analyzer.up_sampling_ratio * analyzer.sampling_time)
+                        / analyzer.adc_impedance
+                    )
+                    / elem,
+                    str(analyzer.result_yaml).replace(".yaml", "_gain_calibrated.yaml"),
+                    "a.u.",
+                )
 
         logger.info("Processing complete.")
     except Exception as e:

From 947a2d95ede4062dd9b0e5fa7f504d74ee9681d8 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Thu, 27 Nov 2025 18:38:28 -0800
Subject: [PATCH 11/32] Adding more CLI options

---
 src/mintanalysis/pmt/ana/analyser_class.py | 77 ++++++++++++++--------
 1 file changed, 49 insertions(+), 28 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py
index f4cae76..f687288 100644
--- a/src/mintanalysis/pmt/ana/analyser_class.py
+++ b/src/mintanalysis/pmt/ana/analyser_class.py
@@ -32,22 +32,15 @@
 from scipy.optimize import curve_fit
 
 # --------------------------
-# Configuration / Constants
+# TODO For Debugging only!
 # --------------------------
-AUX_YAML = Path(
-    "/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/aux/r020/p-1-1-om-hs-31.yaml"
-)
 RAW_DIR = Path("/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020")
-RESULT_YAML = Path("/home/pkrause/software/mint-analysis/debug_out/results.yaml")
-PLOT_FOLDER = Path("/home/pkrause/software/mint-analysis/debug_out/pe_spectra/")
-LOG_FILE = Path("/home/pkrause/software/mint-analysis/debug_out/pe_spectrum.log")
-
-# Analysis constants
-BIN_SIZE = 20
-BINS = np.arange(-100, 10000, BIN_SIZE)
-LIM = 20
+RESULT_DIR = Path("/home/pkrause/software/mint-analysis/debug_out")
+
+# --------------------------
+# Constants
+# --------------------------
 A4_LANDSCAPE = (11.69, 8.27)
-OVERRIDE_RESULTS = True
 
 
 # --------------------------
@@ -55,7 +48,9 @@
 # --------------------------
 
 
-def setup_logging(log_file: Path = LOG_FILE, level: int = logging.INFO) -> logging.Logger:
+def setup_logging(
+    log_file: Path = RESULT_DIR / "analysis.log", level: int = logging.INFO
+) -> logging.Logger:
     logger = logging.getLogger("PESpectrum")
     logger.setLevel(level)
     logger.propagate = False
@@ -135,14 +130,10 @@ def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None:
 class PESpectrumAnalyzer:
     def __init__(
         self,
-        aux_yaml: Path = AUX_YAML,
-        raw_dir: Path = RAW_DIR,
-        plot_folder: Path = PLOT_FOLDER,
-        result_yaml: Path = RESULT_YAML,
-        bins: np.ndarray = BINS,
-        bin_size: int = BIN_SIZE,
-        lim: int = LIM,
-        override_results: bool = OVERRIDE_RESULTS,
+        aux_yaml: Path,
+        bin_size: int = 20,
+        lim: float = 20,
+        override_results: bool = False,
         logger: logging.Logger | None = None,
         up_sampling_ratio: float = 24 / 240,
         v_per_adc: float = 0.25e-3,
@@ -151,11 +142,11 @@ def __init__(
         calib: str = "None",
     ) -> None:
         self.aux_yaml = aux_yaml
-        self.raw_dir = raw_dir
-        self.plot_folder = plot_folder
-        self.result_yaml = result_yaml
-        self.bins = bins
+        self.raw_dir = RAW_DIR
+        self.plot_folder = RESULT_DIR / "plots"
+        self.result_yaml = RESULT_DIR / "results.yaml"
         self.bin_size = bin_size
+        self.bins = np.arange(-100, 10000, bin_size)
         self.lim = lim
         self.override_results = override_results
         self.logger = logger or setup_logging()
@@ -853,11 +844,41 @@ def main() -> None:
         choices=["pC", "adc", "gain", "None"],
         help="Choose a charge calibration value (pC, adc, gain, or None)",
     )
+    parser.add_argument(
+        "-b",
+        "--bin_size",
+        type=int,
+        default=20,
+        help="Number of bins used for analysis",
+    )
+    parser.add_argument(
+        "-l",
+        "--nnls_limit",
+        type=float,
+        default=20,
+        help="Lower limit for solutions in the NNLS solution vector to be accepted.",
+    )
+    parser.add_argument(
+        "-a",
+        "--aux_file",
+        help="Path to auxiliary file",
+    )
+    parser.add_argument(
+        "-o", "--override", action="store_true", help="Override results if existing"
+    )
+
     args = parser.parse_args()
 
-    logger = setup_logging(LOG_FILE, level=logging.INFO)
+    logger = setup_logging(level=logging.INFO)
     try:
-        analyzer = PESpectrumAnalyzer(logger=logger, calib=args.calibrate)
+        analyzer = PESpectrumAnalyzer(
+            logger=logger,
+            aux_yaml=Path(args.aux_file),
+            bin_size=args.bin_size,
+            lim=args.nnls_limit,
+            override_results=args.override,
+            calib=args.calibrate,
+        )
         if args.compute_pe:
             analyzer.run()
         if args.compute_dcr:

From 24e5c984c10b5e83553c46e97feccd76a27ccef7 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Thu, 27 Nov 2025 19:44:53 -0800
Subject: [PATCH 12/32] Remove awkward data loading

---
 src/mintanalysis/pmt/ana/analyser_class.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py
index f687288..993ec3b 100644
--- a/src/mintanalysis/pmt/ana/analyser_class.py
+++ b/src/mintanalysis/pmt/ana/analyser_class.py
@@ -22,7 +22,6 @@
 from pathlib import Path
 from typing import Any
 
-import awkward as ak
 import matplotlib.pyplot as plt
 import numpy as np
 import yaml
@@ -247,7 +246,7 @@ def process_channel(
 
         # load data
         try:
-            d = lh5.read_as(f"{ch}/dsp", f_dsp, "ak")
+            vals = lh5.read_as(f"{ch}/dsp/nnls_solution/values", f_dsp, "np")
         except Exception as e:
             msg = f"Failed to read DSP for {ch} in {f_dsp}: {e}"
             self.logger.warning(msg)
@@ -255,7 +254,6 @@ def process_channel(
 
         # compute pe-values
         try:
-            vals = ak.to_numpy(d.nnls_solution.values, allow_missing=False)
             pe_vals = np.nansum(np.where(vals > self.lim, vals, np.nan), axis=1)
         except Exception as e:
             msg = f"Failed to compute pe values for {ch}: {e}"

From fc057db5689fc1fb437d597928a34c84970c5d15 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Fri, 28 Nov 2025 15:05:16 -0800
Subject: [PATCH 13/32] Add key selection

---
 src/mintanalysis/pmt/ana/analyser_class.py | 101 +++++++++++++--------
 1 file changed, 64 insertions(+), 37 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py
index 993ec3b..1864d36 100644
--- a/src/mintanalysis/pmt/ana/analyser_class.py
+++ b/src/mintanalysis/pmt/ana/analyser_class.py
@@ -18,6 +18,7 @@
 
 import argparse
 import logging
+from collections.abc import Callable
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
@@ -130,6 +131,7 @@ class PESpectrumAnalyzer:
     def __init__(
         self,
         aux_yaml: Path,
+        keys: list | None = None,
         bin_size: int = 20,
         lim: float = 20,
         override_results: bool = False,
@@ -141,6 +143,7 @@ def __init__(
         calib: str = "None",
     ) -> None:
         self.aux_yaml = aux_yaml
+        self.keys = keys
         self.raw_dir = RAW_DIR
         self.plot_folder = RESULT_DIR / "plots"
         self.result_yaml = RESULT_DIR / "results.yaml"
@@ -167,7 +170,36 @@ def _load_aux(self) -> dict[str, Any]:
         with open(self.aux_yaml) as f:
             aux = yaml.safe_load(f)
         self.logger.info("Loaded aux YAML: %s", self.aux_yaml)
-        return aux
+
+        if self.keys is None:
+            return aux
+        ret = {}
+        for k in self.keys:
+            if k in aux:
+                ret[k] = aux[k]
+            else:
+                msg = f"Key {k} not in aux file, skipping."
+                self.logger.warning(msg)
+        return ret
+
+    def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str) -> None:
+        if self.result_yaml.exists():
+            with open(self.result_yaml) as f:
+                existing = yaml.safe_load(f) or {}
+            if key in existing and not self.override_results:
+                msg = key + " results already present and override flag is False."
+                self.logger.error(msg)
+                raise RuntimeError(msg)
+
+            existing[key] = results
+            with open(self.result_yaml, "w") as f:
+                yaml.safe_dump(existing, f, default_flow_style=False)
+            self.logger.info("Updated result YAML at %s", self.result_yaml)
+
+        else:
+            with open(self.result_yaml, "w") as f:
+                yaml.safe_dump({key: results}, f, default_flow_style=False)
+            self.logger.info("Wrote new result YAML at %s", self.result_yaml)
 
     # ----------------------
     # Public entrypoints
@@ -181,7 +213,7 @@ def run(self) -> None:
             except FileNotFoundError as e:
                 self.logger.exception("Critical file missing for run %s: %s", run_name, e)
                 raise
-        self._save_results(results)
+        self._save_results(results, "pe_spectrum")
         self.logger.info("All runs processed.")
 
     # ----------------------
@@ -274,18 +306,27 @@ def process_channel(
 
         bin_centers = 0.5 * (bins[1:] + bins[:-1])
 
+        # total counts
+        result["statistics"] = {
+            "total_cts": {"val": int(np.sum(n)), "unit": ""},
+        }
+
+        # runtime
+        result["runtime"] = {"unit": "s"}
+        if "runtime_in_s" in meta:
+            result["runtime"]["aux"] = meta["runtime_in_s"]
+        raw_runtime = self._extract_runtime_if_present(f_raw, ch)
+        if raw_runtime is not None:
+            result["runtime"]["raw"] = raw_runtime
+
         # noise-only
         if voltage == 0:
             self._decorate_axis(ax)
             # minimal result (no fit)
-            result.update(
-                {"status": "ok", "statistics": {}, "pe_peak_fit": {}, "runtime": {"unit": "s"}}
-            )
-            if "runtime_in_s" in meta:
-                result["runtime"]["aux"] = meta["runtime_in_s"]
-            raw_runtime = self._extract_runtime_if_present(f_raw, ch)
-            if raw_runtime is not None:
-                result["runtime"]["raw"] = raw_runtime
+            msg = "Voltage a 0 --> Noise run"
+            result["status"] = ("skipped",)
+            result["reason"] = (msg,)
+
             return fig, result
 
         # detect valley & peaks
@@ -294,7 +335,9 @@ def process_channel(
             msg = "Valley detection failed (no strict peak/valley)."
             self.logger.warning("Run %s ch %s: %s", run_name, ch, msg)
             self._decorate_axis(ax)
-            return fig, {"status": "skipped", "reason": msg}
+            result["status"] = ("skipped",)
+            result["reason"] = (msg,)
+            return fig, result
 
         noise_peak, valley_idx = vi
 
@@ -394,14 +437,6 @@ def process_channel(
             result.setdefault("statistics", {})
             result["statistics"]["error"] = str(e)
 
-        # runtime
-        result["runtime"] = {"unit": "s"}
-        if "runtime_in_s" in meta:
-            result["runtime"]["aux"] = meta["runtime_in_s"]
-        raw_runtime = self._extract_runtime_if_present(f_raw, ch)
-        if raw_runtime is not None:
-            result["runtime"]["raw"] = raw_runtime
-
         result["status"] = "ok"
         return fig, result
 
@@ -447,25 +482,9 @@ def _extract_runtime_if_present(self, f_raw: Path, ch: str) -> float | None:
 
         return None
 
-    def _save_results(self, results: dict[str, dict[int, dict[str, Any]]]) -> None:
-        if self.result_yaml.exists():
-            with open(self.result_yaml) as f:
-                existing = yaml.safe_load(f) or {}
-            if "pe_spectrum" in existing and not self.override_results:
-                msg = "Results already present and override flag is False."
-                raise RuntimeError(msg)
-            existing["pe_spectrum"] = results
-            with open(self.result_yaml, "w") as f:
-                yaml.safe_dump(existing, f, default_flow_style=False)
-            self.logger.info("Updated result YAML at %s", self.result_yaml)
-        else:
-            with open(self.result_yaml, "w") as f:
-                yaml.safe_dump({"pe_spectrum": results}, f, default_flow_style=False)
-            self.logger.info("Wrote new result YAML at %s", self.result_yaml)
-
     def _add_charge_axis(self, ax: plt.Axes, is_y) -> None:
         """
-        This function add a axis in new units to a given axis plot.
+        This function adds a axis in new units to a given axis plot.
 
         Parameters
         ----------
@@ -747,7 +766,7 @@ def compute_linear_gain_fit(self) -> None:
             yaml.safe_dump(data, f, default_flow_style=False)
         self.logger.info("Linear gain fit results saved to %s", self.result_yaml)
 
-    def calibrate_nnls_values(self, calibration_func, output_file, new_unit):
+    def calibrate_nnls_values(self, calibration_func: Callable, output_file: str, new_unit: str):
         """
         Reads the current results.yaml, finds all entries that contain a dict with
         keys {"val": <number>, "unit": "NNLS"}, applies calibration_func(val)
@@ -864,6 +883,13 @@ def main() -> None:
     parser.add_argument(
         "-o", "--override", action="store_true", help="Override results if existing"
     )
+    parser.add_argument(
+        "-k",
+        "--keys",
+        nargs="+",
+        default=None,
+        help="Only analyse this keys, ignore all other keys in aux file",
+    )
 
     args = parser.parse_args()
 
@@ -872,6 +898,7 @@ def main() -> None:
         analyzer = PESpectrumAnalyzer(
             logger=logger,
             aux_yaml=Path(args.aux_file),
+            keys=args.keys,
             bin_size=args.bin_size,
             lim=args.nnls_limit,
             override_results=args.override,

From ef43d01bf53e7f0d2ff7a7cc8d223aa634f06614 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 2 Dec 2025 16:31:56 -0800
Subject: [PATCH 14/32] adapted to current aux file layout

---
 src/mintanalysis/pmt/ana/analyser_class.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/analyser_class.py
index 1864d36..163d3f1 100644
--- a/src/mintanalysis/pmt/ana/analyser_class.py
+++ b/src/mintanalysis/pmt/ana/analyser_class.py
@@ -273,8 +273,7 @@ def process_channel(
         """
         result: dict[str, Any] = {}
         ch_idx = int(ch[2:])
-        voltage = float(meta["voltages_in_V"][ch_idx])
-        result["voltage"] = {"val": voltage, "unit": "V"}
+        result["voltage"] = meta[ch_idx + 1]["v10"]
 
         # load data
         try:
@@ -298,7 +297,8 @@ def process_channel(
             pe_vals,
             bins=self.bins,
             histtype="step",
-            label=f"channel {ch} (PMT {pmt}) at {voltage:.2f} V",
+            label=f"channel {ch} (PMT {pmt}) at {result['voltage']['val']:.2f}"
+            f" {result['voltage']['unit']}",
         )
 
         if self.calib != "None":
@@ -312,15 +312,15 @@ def process_channel(
         }
 
         # runtime
-        result["runtime"] = {"unit": "s"}
-        if "runtime_in_s" in meta:
-            result["runtime"]["aux"] = meta["runtime_in_s"]
+        result["runtime"] = {}
+        if "runtime" in meta:
+            result["runtime"]["aux"] = meta["runtime"]
         raw_runtime = self._extract_runtime_if_present(f_raw, ch)
         if raw_runtime is not None:
-            result["runtime"]["raw"] = raw_runtime
+            result["runtime"]["raw"] = {"val": raw_runtime, "unit": "s"}
 
         # noise-only
-        if voltage == 0:
+        if result["voltage"].get("val", 0) <= 10:
             self._decorate_axis(ax)
             # minimal result (no fit)
             msg = "Voltage a 0 --> Noise run"
@@ -651,7 +651,7 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
                     dcts = stats.get("1st_pe_fit_integral_below_valley", {}).get(
                         "val", 0.0
                     ) + stats.get("cts_above_valley", {}).get("val", 0)
-                    runtime = runtime_info[time_mode]
+                    runtime = runtime_info[time_mode].get("val", 0)
                     run_dcr[pmt] = {
                         "val": float(dcts) / float(runtime),
                         "err": float(dcts**0.5) / float(runtime),

From 1da1544ba2d71e1f7abcdf1c9e29499e9793fab5 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 2 Dec 2025 17:07:11 -0800
Subject: [PATCH 15/32] make a callable cli command

---
 pyproject.toml                       | 15 +++++++++++----
 src/mintanalysis/pmt/ana/__init__.py |  4 +++-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index bd9891b..daaf459 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,10 +53,15 @@ requires-python = ">=3.10, <3.13"
 dynamic = ["version"]
 
 dependencies = [
+"dbetto",
+"iminuit",
+"pymongo",
+"PyYAML",
+"sshtunnel",
 "pygama>=2.0.0",
 "legend-daq2lh5>=1.2.1",
 "legend-pydataobj>=1.7",
-"numpy==1.26.2",
+"numpy",
 "numba>0.52",
 "pandas",
 "matplotlib",
@@ -65,11 +70,9 @@ dependencies = [
 "ipython",
 "ipympl",
 "boost_histogram<1.5",
-#"dspeed==1.6.2",
+"dspeed==2.0.2",
 ]
 
-#"dspeed @ file:///${PROJECT_ROOT}/dspeed",
-
 [project.optional-dependencies]
 test = [
   "pytest >=6",
@@ -85,6 +88,10 @@ docs = [
   "furo>=2023.08.17",
 ]
 
+[project.scripts]
+# Format: <cli_command_name> = "<module_path>:<function_name>"
+build_ana = "mintanalysis.pmt.ana.analyser_class:main"
+
 #[[tool.uv.index]]
 # Optional name for the index.
 #name = "dspeed-pone"
diff --git a/src/mintanalysis/pmt/ana/__init__.py b/src/mintanalysis/pmt/ana/__init__.py
index 714298a..a83ddd9 100644
--- a/src/mintanalysis/pmt/ana/__init__.py
+++ b/src/mintanalysis/pmt/ana/__init__.py
@@ -2,4 +2,6 @@
 Routines for the final analysis of PMT data
 """
 
-__all__ = []
+from .analyser_class import PESpectrumAnalyzer
+
+__all__ = ["PESpectrumAnalyzer"]

From 0a7e385dd16026656ed4888edb82984f2ee1fbf3 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Wed, 3 Dec 2025 11:49:33 -0800
Subject: [PATCH 16/32] Analyzer class renamed

---
 pyproject.toml                                                  | 2 +-
 .../pmt/ana/{analyser_class.py => peSpectrumAnalyzer.py}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename src/mintanalysis/pmt/ana/{analyser_class.py => peSpectrumAnalyzer.py} (100%)

diff --git a/pyproject.toml b/pyproject.toml
index daaf459..b6caa17 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -90,7 +90,7 @@ docs = [
 
 [project.scripts]
 # Format: <cli_command_name> = "<module_path>:<function_name>"
-build_ana = "mintanalysis.pmt.ana.analyser_class:main"
+build_ana = "mintanalysis.pmt.ana.peSpectrumAnalyzer:main"
 
 #[[tool.uv.index]]
 # Optional name for the index.
diff --git a/src/mintanalysis/pmt/ana/analyser_class.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
similarity index 100%
rename from src/mintanalysis/pmt/ana/analyser_class.py
rename to src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py

From ffbb7310c304c9ec265fc66a9efb3d89775ae49d Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Wed, 10 Dec 2025 11:44:19 -0800
Subject: [PATCH 17/32] Implement robust unit system with pint

---
 src/mintanalysis/pmt/ana/__init__.py          |   2 +-
 .../pmt/ana/peSpectrumAnalyzer.py             | 533 +++++++++---------
 2 files changed, 256 insertions(+), 279 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/__init__.py b/src/mintanalysis/pmt/ana/__init__.py
index a83ddd9..037a007 100644
--- a/src/mintanalysis/pmt/ana/__init__.py
+++ b/src/mintanalysis/pmt/ana/__init__.py
@@ -2,6 +2,6 @@
 Routines for the final analysis of PMT data
 """
 
-from .analyser_class import PESpectrumAnalyzer
+from .peSpectrumAnalyzer import PESpectrumAnalyzer
 
 __all__ = ["PESpectrumAnalyzer"]
diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
index 163d3f1..41a467c 100644
--- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
+++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
@@ -31,6 +31,10 @@
 from matplotlib.backends.backend_pdf import PdfPages
 from scipy.optimize import curve_fit
 
+from pint import UnitRegistry
+from uncertainties import ufloat, UFloat
+
+
 # --------------------------
 # TODO For Debugging only!
 # --------------------------
@@ -76,14 +80,78 @@ def setup_logging(
 # --------------------------
 # Small helpers
 # --------------------------
-
+@dataclass(frozen=True)
+class Calibration:
+    up_sampling_ratio: float
+    v_per_adc: float
+    adc_impedance: float
+    sampling_time: float
+    renormalization_factor: float
 
 @dataclass
 class ChannelResult:
     status: str  # 'ok', 'skipped', 'error'
     data: dict[str, Any]
 
+def get_physics_object(obj, ureg):
+    """
+    Convert a given object recursevly 
+    to a object with all {val,(err),unit} occurences replaced with a pint (and ufloat) object
+    """
+    if isinstance(obj, dict) and {"val", "unit"}.issubset(obj):
+        val = obj["val"]
+        if "err" in obj:
+            err = obj["err"]
+            return ureg.Quantity(ufloat(val, err),ureg(obj["unit"]))
+        else:
+            return ureg.Quantity(val,ureg(obj["unit"]))
+
+    elif isinstance(obj, dict):
+        return {k: get_physics_object(v, ureg) for k, v in obj.items()}
+
+    elif isinstance(obj, (list, tuple)):
+        return type(obj)(get_physics_object(v, ureg) for v in obj)
+
+    else:
+        return obj
+
+def quantity_to_dict(obj):
+    """
+    Recursively replace a pint (and ufloat) object with a dict 
+    {val, (err), unit}
+    """
+    # The pint internal function is broken (see issue 2121)
+    preferred_units = ['Hz']
+    if hasattr(obj, "magnitude") and hasattr(obj, "units"):
+        if not preferred_units is None:
+            for u in preferred_units:
+                if obj.is_compatible_with(u):
+                    obj = obj.to(u)
+        mag = obj.magnitude
+    
+
+        if isinstance(mag, UFloat):
+            val = mag.n
+            err = mag.s
+        else:
+            val = mag
+            err = None
+
+        return {
+            "val": float(val),
+            **({ "err": float(err) } if err is not None else {}),
+            "unit": format(obj.units,"~")
+        }
+
+    elif isinstance(obj, dict):
+        return {k: quantity_to_dict(v) for k, v in obj.items()}
+
+    elif isinstance(obj, (list, tuple)):
+        return type(obj)(quantity_to_dict(v) for v in obj)
 
+    else:
+        return obj
+    
 def linear_func(x, a, b):
     return a * x + b
 
@@ -136,10 +204,7 @@ def __init__(
         lim: float = 20,
         override_results: bool = False,
         logger: logging.Logger | None = None,
-        up_sampling_ratio: float = 24 / 240,
-        v_per_adc: float = 0.25e-3,
-        adc_impedance: float = 75.0,
-        sampling_time: float = 4.8e-9,
+        calibrator: Calibration = Calibration(24/240,0.25e-3,75.,4.8e-9,1),
         calib: str = "None",
     ) -> None:
         self.aux_yaml = aux_yaml
@@ -152,13 +217,26 @@ def __init__(
         self.lim = lim
         self.override_results = override_results
         self.logger = logger or setup_logging()
-        self.aux = self._load_aux()
         self.plot_folder.mkdir(parents=True, exist_ok=True)
-        self.up_sampling_ratio = up_sampling_ratio
-        self.v_per_adc = v_per_adc
-        self.adc_impedance = adc_impedance
-        self.sampling_time = sampling_time
+        self.calibrator = calibrator
         self.calib = calib
+        self.ureg = UnitRegistry()
+        self.aux = self._load_aux()
+
+        # unit handling
+        vadc = self.ureg.Quantity(calibrator.v_per_adc,self.ureg.volt)
+        usr = self.ureg.Quantity(calibrator.up_sampling_ratio, self.ureg.dimensionless)
+        rf = self.ureg.Quantity(calibrator.renormalization_factor, self.ureg.dimensionless)
+        st = self.ureg.Quantity(calibrator.sampling_time,self.ureg.seconds)
+        imp = self.ureg.Quantity(calibrator.adc_impedance,self.ureg.ohm)
+
+        nnls_coloumb_factor = ((vadc*usr*st*rf)/imp)
+        self.ureg.define(f"NNLS = {nnls_coloumb_factor.to('coulomb').magnitude} * coulomb")
+        self.ureg.define(f"ADC = {usr.magnitude}*NNLS")
+
+        
+
+      
 
     # ----------------------
     # I/O helpers
@@ -171,6 +249,9 @@ def _load_aux(self) -> dict[str, Any]:
             aux = yaml.safe_load(f)
         self.logger.info("Loaded aux YAML: %s", self.aux_yaml)
 
+        # convert to physics units
+        aux = get_physics_object(aux,self.ureg)
+
         if self.keys is None:
             return aux
         ret = {}
@@ -181,6 +262,19 @@ def _load_aux(self) -> dict[str, Any]:
                 msg = f"Key {k} not in aux file, skipping."
                 self.logger.warning(msg)
         return ret
+    
+    def _load_results(self) ->dict:
+        if not self.result_yaml.exists():
+            msg = f"Result YAML not found: {self.result_yaml}"
+            raise FileNotFoundError(msg)
+        with open(self.result_yaml) as f:
+            data = yaml.safe_load(f) or {}
+        if "pe_spectrum" not in data:
+            msg = "pe_spectrum info not present in result YAML; run analysis first."
+            raise RuntimeError(msg)
+        
+        return get_physics_object(data,self.ureg)
+
 
     def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str) -> None:
         if self.result_yaml.exists():
@@ -191,14 +285,14 @@ def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str)
                 self.logger.error(msg)
                 raise RuntimeError(msg)
 
-            existing[key] = results
+            existing[key] = quantity_to_dict(results)
             with open(self.result_yaml, "w") as f:
                 yaml.safe_dump(existing, f, default_flow_style=False)
             self.logger.info("Updated result YAML at %s", self.result_yaml)
 
         else:
             with open(self.result_yaml, "w") as f:
-                yaml.safe_dump({key: results}, f, default_flow_style=False)
+                yaml.safe_dump({key: quantity_to_dict(results)}, f, default_flow_style=False)
             self.logger.info("Wrote new result YAML at %s", self.result_yaml)
 
     # ----------------------
@@ -297,8 +391,8 @@ def process_channel(
             pe_vals,
             bins=self.bins,
             histtype="step",
-            label=f"channel {ch} (PMT {pmt}) at {result['voltage']['val']:.2f}"
-            f" {result['voltage']['unit']}",
+            label=f"channel {ch} (PMT {pmt}) at {result['voltage'].magnitude:.2f}"
+            f" {format(result['voltage'].units,'~')}",
         )
 
         if self.calib != "None":
@@ -308,7 +402,7 @@ def process_channel(
 
         # total counts
         result["statistics"] = {
-            "total_cts": {"val": int(np.sum(n)), "unit": ""},
+            "total_cts":  self.ureg.Quantity(ufloat(np.sum(n),np.sum(n)**0.5), 'dimensionless'),
         }
 
         # runtime
@@ -317,10 +411,10 @@ def process_channel(
             result["runtime"]["aux"] = meta["runtime"]
         raw_runtime = self._extract_runtime_if_present(f_raw, ch)
         if raw_runtime is not None:
-            result["runtime"]["raw"] = {"val": raw_runtime, "unit": "s"}
+            result["runtime"]["raw"] = raw_runtime * self.ureg.seconds
 
         # noise-only
-        if result["voltage"].get("val", 0) <= 10:
+        if result["voltage"] <= 10 * self.ureg.volt:
             self._decorate_axis(ax)
             # minimal result (no fit)
             msg = "Voltage a 0 --> Noise run"
@@ -399,9 +493,9 @@ def process_channel(
         fit_errs = {k: float(v) for k, v in m.errors.to_dict().items()}
 
         result["pe_peak_fit"] = {
-            "mean": {"val": fit_vals["mu"], "err": fit_errs["mu"], "unit": "NNLS"},
-            "sigma": {"val": fit_vals["sigma"], "err": fit_errs["sigma"], "unit": "NNLS"},
-            "amp": {"val": fit_vals["amp"], "err": fit_errs["amp"], "unit": ""},
+            "mean": self.ureg.Quantity(ufloat(fit_vals["mu"], fit_errs["mu"]),self.ureg.NNLS),
+            "sigma": self.ureg.Quantity(ufloat(fit_vals["sigma"], fit_errs["sigma"]),self.ureg.NNLS),
+            "amp": self.ureg.Quantity(ufloat(fit_vals["amp"], fit_errs["amp"]),self.ureg.dimensionless)
         }
 
         # full fit curve over bins for plotting & integrals
@@ -413,21 +507,19 @@ def process_channel(
         # statistics
         try:
             result["statistics"] = {
-                "1st_pe_fit_integral_below_valley": {
-                    "val": float(np.sum(y_fit[:valley_idx])),
-                    "unit": "",
-                },
-                "cts_above_valley": {"val": int(np.sum(n[:valley_idx])), "unit": ""},
-                "cts_below_valley": {"val": int(np.sum(n[valley_idx:])), "unit": ""},
-                "1st_pe_fit_integral": {"val": int(float(np.sum(y_fit))), "unit": ""},
-                "total_cts": {"val": int(np.sum(n)), "unit": ""},
+                "1st_pe_fit_integral_below_valley": self.ureg.Quantity(ufloat(np.sum(y_fit[:valley_idx]),
+                                                              np.sum(y_fit[:valley_idx])**0.5), 'dimensionless'),    
+                "cts_above_valley": self.ureg.Quantity(ufloat(np.sum(n[:valley_idx]),
+                                                              np.sum(n[:valley_idx])**0.5), 'dimensionless'),
+                "1st_pe_fit_integral": self.ureg.Quantity(ufloat(np.sum(y_fit), np.sum(y_fit)**0.5), 'dimensionless'),
+                "total_cts": self.ureg.Quantity(ufloat(np.sum(n), np.sum(n)**0.5), 'dimensionless'),
                 "valley": {
-                    "pos": {"val": float(bin_centers[valley_idx]), "unit": "NNLS"},
-                    "amp": {"val": int(n[valley_idx]), "unit": ""},
+                    "pos": float(bin_centers[valley_idx]) * self.ureg.NNLS,
+                    "amp": self.ureg.Quantity(ufloat(n[valley_idx],n[valley_idx]**0.5), 'dimensionless')
                 },
                 "1st_pe_guess": {
-                    "pos": {"val": float(mu0), "unit": "NNLS"},
-                    "amp": {"val": int(amp0), "unit": ""},
+                    "pos": float(mu0) *self.ureg.NNLS,
+                    "amp": self.ureg.Quantity(ufloat(amp0,amp0**0.5), 'dimensionless'),
                 },
             }
         except Exception as e:
@@ -447,8 +539,8 @@ def _decorate_axis(self, ax: plt.Axes) -> None:
         ax.set_xlim(-10, 2.5e3)
         ax.set_ylim(0.5, None)
         ax.set_yscale("log")
-        ax.set_ylabel(f"Counts/{self.bin_size} NNLS units")
-        ax.set_xlabel("NNLS units")
+        ax.set_ylabel(f"Counts/{self.bin_size} NNLS")
+        ax.set_xlabel("Charge (NNLS)")
         ax.set_title(f"pygama-NNLS reconstruction ({self.lim} units solution cut-off)")
         ax.legend()
 
@@ -484,62 +576,32 @@ def _extract_runtime_if_present(self, f_raw: Path, ch: str) -> float | None:
 
     def _add_charge_axis(self, ax: plt.Axes, is_y) -> None:
         """
-        This function adds a axis in new units to a given axis plot.
+        This function adds a axis in new Charge units to a given axis plot.
+        if the unit is not charge convertible (or is gain) will log a warning
+        and fall gracefully.
 
         Parameters
         ----------
         ax : plt.Axes
-            The plot to which to add the pC axis.
+            The plot to which to add the new axis.
         is_y : bool
             If True add a new y-axis. Else an x-axis.
         """
-        if self.calib not in ["pC", "adc", "gain"]:
-            self.logger.warning("Invalid calibration unit, not calibrating.")
-            return
-
-        label = "Charge "
-        if self.calib == "pC":
-            func = (
-                lambda x: x
-                * (
-                    (self.v_per_adc * self.up_sampling_ratio * self.sampling_time)
-                    / self.adc_impedance
-                )
-                * 1e12,
-                lambda y: y
-                / (
-                    (
-                        (self.v_per_adc * self.up_sampling_ratio * self.sampling_time)
-                        / self.adc_impedance
-                    )
-                    * 1e12
-                ),
-            )
-            label += "(pC)"
-
-        elif self.calib == "gain":
+        
+        if self.calib == "gain":
             label = "Gain (a.u.)"
-            elem = 1.602e-19  # C (elementary charge)
-            func = (
-                lambda x: x
-                * (
-                    (self.v_per_adc * self.up_sampling_ratio * self.sampling_time)
-                    / self.adc_impedance
-                )
-                / elem,
-                lambda y: y
-                / (
-                    (
-                        (self.v_per_adc * self.up_sampling_ratio * self.sampling_time)
-                        / self.adc_impedance
-                    )
-                    / elem
-                ),
+            func = (lambda x: ((x*self.ureg.NNLS).to("C")/self.ureg.elementary_charge).m,
+                    lambda y: ((y*self.ureg.elementary_charge).to("NNLS")).m
             )
-
-        elif self.calib == "adc":
-            label += f"(ADC x {self.sampling_time*1e9:.1f} ns)"
-            func = (lambda x: x * self.up_sampling_ratio, lambda y: y / self.up_sampling_ratio)
+        else:
+            if not self.ureg.NNLS.is_compatible_with(self.calib):
+                msg = f"Unit [{self.calib}] not compatible with charge"
+                self.logger.warning(msg)
+            label = f"Charge ({self.calib})"
+            func = (lambda x: (x*self.ureg.NNLS).to(self.calib).m,
+                    lambda y: (y*self.ureg(self.calib)).to("NNLS").m
+            )
+                    
         if is_y:
             secax_y = ax.secondary_yaxis("right", functions=func)
             secax_y.set_ylabel(label)
@@ -547,65 +609,71 @@ def _add_charge_axis(self, ax: plt.Axes, is_y) -> None:
             secax_x = ax.secondary_xaxis("top", functions=func)
             secax_x.set_xlabel(label)
 
+    def _unit_converter(self, v,target_unit, constant = 1):
+        """
+        Take a value v and if its a Quantity
+        apply conversion of the targeted unit.
+        A constant can be multiplied.
+        E.g.:
+        target unit = pC
+        v = 2.5e-24 C**2/V*m
+        returns 2.5 pC**2/V/m
+        """
+        if not isinstance(v, self.ureg.Quantity):
+            return v
+        dims = dict(v._units)
+        # Decompose units
+        dims = dict(v._units)
+        target_units = {k: d for k, d in dims.items() if self.ureg(k).is_compatible_with(target_unit)}
+        other_units  = {k: d for k, d in dims.items() if not self.ureg(k).is_compatible_with(target_unit)}
+
+        # Start with dimensionless
+        v_base = v / v.units
+
+        # Multiply back non-target units
+        for k, d in other_units.items():
+            v_base *= self.ureg(k)**d
+
+        # Convert target units to target unit
+        for k, d in target_units.items():
+            v_base *= (self.ureg(k).to(target_unit))**d
+
+        return v_base * constant
+
+
     # ----------------------
     # Signal to Noise Ratio (SNR)
     # ----------------------
-    def plot_snr(self) -> None:
-        if not self.result_yaml.exists():
-            msg = f"Result YAML not found: {self.result_yaml}"
-            raise FileNotFoundError(msg)
-        with open(self.result_yaml) as f:
-            data = yaml.safe_load(f) or {}
-        if "pe_spectrum" not in data:
-            msg = "pe_spectrum info not present in result YAML; run analysis first."
-            raise RuntimeError(msg)
-
+    def compute_snr(self) -> None:
+        data = self._load_results()
         snr = {}
         for run_name, pmt_dict in data["pe_spectrum"].items():
             run_snr = {}
             for pmt, info in pmt_dict.items():
                 try:
-                    if info.get("voltage", {}).get("val", 0) == 0:
+                    if info.get("voltage") == 0 * self.ureg.V:
                         continue
-                    noise = {
-                        "val": info.get("statistics", {})
-                        .get("valley", {})
-                        .get("amp", {})
-                        .get("val", 0.0)
-                    }
-                    noise["err"] = noise["val"] ** 0.5
+                    noise =info.get("statistics").get("valley").get("amp")
+          
                     if info.get("status", "skipped") == "ok":
-                        signal = {
-                            "val": info.get("pe_peak_fit").get("amp", {}).get("val", 0.0),
-                            "err": info.get("pe_peak_fit").get("amp", {}).get("err", 0.0),
-                        }
+                        signal = info.get("pe_peak_fit").get("amp")
                     else:
-                        signal = {
-                            "val": info.get("statistics", {})
-                            .get("1st_pe_guess", {})
-                            .get("amp", {})
-                            .get("val", 0)
-                        }
-                        signal["err"] = signal["val"] ** 0.5
-                    run_snr[pmt] = {
-                        "val": 1 - noise["val"] / signal["val"],
-                        "err": (
-                            noise["err"] ** 2 / signal["val"] ** 2
-                            + (noise["val"] ** 2 * signal["err"] ** 2) / signal["val"] ** 4
-                        ),
-                        "unit": "",
-                    }
+                        signal =info.get("statistics").get("1st_pe_guess").get("amp")
+                    run_snr[pmt] = 1- noise/signal
                 except Exception as e:
                     self.logger.warning(
                         "Failed to compute SNR for run %s PMT %s: %s", run_name, pmt, e
                     )
             snr[run_name] = run_snr
 
+        self._save_results(snr,"snr")
+
         fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
         for run_name, pmt_dict in snr.items():
             pmts = sorted(pmt_dict.keys())
-            vals = [pmt_dict[p]["val"] for p in pmts]
-            ax.plot(pmts, vals, marker="o", label=run_name)
+            vals = [pmt_dict[p].n for p in pmts]
+            errs = [pmt_dict[p].s for p in pmts]
+            ax.errorbar(pmts, vals, errs, label=run_name, fmt='o')
         ax.set_xlabel("PMT")
         ax.set_ylabel("SNR (a.u.)")
         ax.set_title("SNR per PMT (= 1 - valley/peak)")
@@ -620,23 +688,13 @@ def plot_snr(self) -> None:
     # Dark Count Rate (DCR)
     # ----------------------
     def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
-        if not self.result_yaml.exists():
-            msg = f"Result YAML not found: {self.result_yaml}"
-            raise FileNotFoundError(msg)
-        with open(self.result_yaml) as f:
-            data = yaml.safe_load(f) or {}
-        if "pe_spectrum" not in data:
-            msg = "pe_spectrum info not present in result YAML; run analysis first."
-            raise RuntimeError(msg)
-        if "dcr" in data and not self.override_results:
-            msg = "DCR already exists and override flag is False."
-            raise RuntimeError(msg)
+        data = self._load_results()
         dcr = {}
         for run_name, pmt_dict in data["pe_spectrum"].items():
             run_dcr = {}
             for pmt, info in pmt_dict.items():
                 try:
-                    if info.get("voltage", {}).get("val", 0) == 0:
+                    if info.get("voltage") == 0*self.ureg.volt:
                         continue
                     stats = info.get("statistics", {})
                     runtime_info = info.get("runtime", {})
@@ -648,30 +706,23 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
                             time_mode,
                         )
                         continue
-                    dcts = stats.get("1st_pe_fit_integral_below_valley", {}).get(
-                        "val", 0.0
-                    ) + stats.get("cts_above_valley", {}).get("val", 0)
-                    runtime = runtime_info[time_mode].get("val", 0)
-                    run_dcr[pmt] = {
-                        "val": float(dcts) / float(runtime),
-                        "err": float(dcts**0.5) / float(runtime),
-                        "unit": "Hz",
-                    }
+                    dcts = stats.get("1st_pe_fit_integral_below_valley")+ stats.get("cts_above_valley")
+                    runtime = runtime_info[time_mode]
+                    run_dcr[pmt] = dcts/runtime
+                    
                 except Exception as e:
                     self.logger.warning(
                         "Failed to compute DCR for run %s PMT %s: %s", run_name, pmt, e
                     )
             dcr[run_name] = run_dcr
-        data["dcr"] = dcr
-        with open(self.result_yaml, "w") as f:
-            yaml.safe_dump(data, f, default_flow_style=False)
-        self.logger.info("Wrote DCR results to %s", self.result_yaml)
+        self._save_results(dcr,"dcr")
 
         fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
         for run_name, pmt_dict in dcr.items():
             pmts = sorted(pmt_dict.keys())
-            vals = [pmt_dict[p]["val"] for p in pmts]
-            ax.plot(pmts, vals, marker="o", label=run_name)
+            vals = [pmt_dict[p].n for p in pmts]
+            errs = [pmt_dict[p].s for p in pmts]
+            ax.errorbar(pmts, vals, errs, label=run_name, fmt="o")
         ax.set_xlabel("PMT")
         ax.set_ylabel("DCR (Hz)")
         ax.set_title("Dark Count Rate per PMT")
@@ -686,39 +737,21 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
     # Linear Gain Fit computation
     # ----------------------
     def compute_linear_gain_fit(self) -> None:
-        if not self.result_yaml.exists():
-            msg = f"Result YAML not found: {self.result_yaml}"
-            raise FileNotFoundError(msg)
-        with open(self.result_yaml) as f:
-            data = yaml.safe_load(f) or {}
-        if "pe_spectrum" not in data:
-            msg = "pe_spectrum info not present in result YAML; run analysis first."
-            raise RuntimeError(msg)
-
+        data = self._load_results()
         tmp_dic = {"used_keys": []}
-        y_unit = None
-        x_unit = None
+
         for key, run in data["pe_spectrum"].items():
             tmp_dic["used_keys"].append(key)
             for pmt in run:
                 if pmt not in tmp_dic:
-                    tmp_dic[pmt] = {"voltage": [], "vals": [], "errs": []}
-                v = run[pmt]["voltage"]["val"]
-                xu = run[pmt]["voltage"]["unit"]
-                if x_unit is None:
-                    x_unit = xu
-                else:
-                    assert x_unit == xu
-                if v == 0:
+                    tmp_dic[pmt] = {"voltage": [], "vals": []}
+                v = run[pmt]["voltage"]
+
+                if v == 0 * self.ureg.volts:
                     continue
+
                 tmp_dic[pmt]["voltage"].append(v)
-                tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"]["val"])
-                tmp_dic[pmt]["errs"].append(run[pmt]["pe_peak_fit"]["mean"]["err"])
-                yu = run[pmt]["pe_peak_fit"]["mean"]["unit"]
-                if y_unit is None:
-                    y_unit = yu
-                else:
-                    assert y_unit == yu
+                tmp_dic[pmt]["vals"].append(run[pmt]["pe_peak_fit"]["mean"])
 
         pdf_path = self.plot_folder / "gain_plots.pdf"
         with PdfPages(pdf_path) as pdf:
@@ -726,104 +759,77 @@ def compute_linear_gain_fit(self) -> None:
                 if key == "used_keys":
                     continue
                 fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
-                ax.errorbar(pmt["voltage"], pmt["vals"], pmt["errs"], fmt="o", label=f"PMT {key}")
-                ax.set_xlabel(f"Voltage ({x_unit})")
-                ax.set_ylabel(f"PMT position ({y_unit})")
+                xunit = pmt["voltage"][0].units
+                pmt["voltage"] = [i.to(xunit) for i in pmt["voltage"]]
+                yunit = pmt["vals"][0].units
+                pmt["vals"] =[i.to(yunit) for i in pmt["vals"]]
+                ax.errorbar([i.m for i in pmt["voltage"]], 
+                             [i.n for i in pmt["vals"]], 
+                             [i.s for i in pmt["vals"]],
+                             label=f"PMT {key}",
+                             fmt="o"
+                             )
+                ax.set_xlabel(f"Voltage ({format(xunit,"~")})")
+                ax.set_ylabel(f"Gain ({format(yunit,"~")})")
 
                 if self.calib != "None":
                     self._add_charge_axis(ax, True)
 
                 params, covariance = curve_fit(
                     linear_func,
-                    pmt["voltage"],
-                    pmt["vals"],
-                    sigma=pmt["errs"],
+                    [i.m for i in pmt["voltage"]],
+                    [i.n for i in pmt["vals"]], 
+                    [i.s for i in pmt["vals"]],
                     absolute_sigma=True,
                 )
                 a_opt, b_opt = params
                 perr = np.sqrt(np.diag(covariance))
-                x = np.linspace(-1 * b_opt / a_opt, max(pmt["voltage"]) + 10, 1000)
+                x = np.linspace(-1 * b_opt / a_opt, max([i.m for i in pmt["voltage"]]) + 10, 1000)
                 ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit")
-                tmp_dic[key]["a"] = {
-                    "val": float(a_opt),
-                    "err": float(perr[0]),
-                    "unit": f"{y_unit}/{x_unit}",
-                }
-                tmp_dic[key]["b"] = {
-                    "val": float(b_opt),
-                    "err": float(perr[1]),
-                    "unit": f"{y_unit}/{x_unit}",
-                }
+                tmp_dic[key]["a"] = self.ureg.Quantity(ufloat(a_opt,perr[0]),yunit/xunit)
+                tmp_dic[key]["b"] = self.ureg.Quantity(ufloat(b_opt,perr[1]),yunit)
+
                 tmp_dic[key]["func"] = "G = a*voltage+b"
                 pmt.pop("voltage")
                 pmt.pop("vals")
-                pmt.pop("errs")
                 ax.legend()
                 pdf.savefig(fig)
                 plt.close(fig)
-        data["linear_gain"] = tmp_dic
-        with open(self.result_yaml, "w") as f:
-            yaml.safe_dump(data, f, default_flow_style=False)
-        self.logger.info("Linear gain fit results saved to %s", self.result_yaml)
+        
+        self._save_results(tmp_dic,"linear_gain")
 
-    def calibrate_nnls_values(self, calibration_func: Callable, output_file: str, new_unit: str):
+    def calibrate_nnls_values(self, output_file: str):
         """
-        Reads the current results.yaml, finds all entries that contain a dict with
-        keys {"val": <number>, "unit": "NNLS"}, applies calibration_func(val)
-        and writes a calibrated result file.
+        Reads the current results.yaml, 
+        finds all entries that contain a Charge compatible quantities
+        Applies charge conversion as per self.calib
+        writes out calibrated file to output_file.
         """
-        factor = calibration_func(1)
-        try:
-            with open(self.result_yaml) as f:
-                data = yaml.safe_load(f)
-        except Exception as e:
-            msg = f"Failed to load file: {e}"
+        if self.calib != "gain" and not self.ureg.NNLS.is_compatible_with(self.calib):
+            msg = f"Unit [{self.calib}] not compatible with charge"
             self.logger.error(msg)
-            raise
+            return
+        
+        data = self._load_results()
+
+        def convert_charge_units(d):
+            """Recursively convert all Quantities compatible with charge to new_unit."""
+            for k, v in d.items():
+                if isinstance(v, dict):
+                    convert_charge_units(v)  # recurse
+                elif isinstance(v, self.ureg.Quantity):
+                    if self.calib == "gain":
+                        self._unit_converter(v,'C',self.ureg.elementary_charge)
+                    else:
+                        self._unit_converter(v,self.calib)
+                    
 
-        def classify(unit: str):
-            unit = str(unit)
-            if unit == "NNLS" or unit.startswith("NNLS/"):
-                return "nnls"
-            if "/NNLS" in unit:
-                return "per_nnls"
-            if "NNLS" in unit:
-                return "nnls"
-            return None
-
-        def recurse(obj):
-            if isinstance(obj, dict):
-                if set(obj.keys()) >= {"val", "unit"}:
-                    kind = classify(obj.get("unit"))
-                    if kind == "nnls":
-                        try:
-                            obj["val"] = obj["val"] * factor
-                            if "err" in obj:
-                                obj["err"] = obj["err"] * factor
-                            obj["unit"] = obj["unit"].replace("NNLS", new_unit)
-                        except Exception as e:
-                            msg = f"NNLS calibration failed for value {obj}: {e}"
-                            self.logger.error(msg)
-                    elif kind == "per_nnls":
-                        try:
-                            obj["val"] = obj["val"] / factor
-                            if "err" in obj:
-                                obj["err"] = obj["err"] / factor
-                            obj["unit"] = obj["unit"].replace("NNLS", new_unit)
-                        except Exception as e:
-                            msg = f"per-NNLS calibration failed for value {obj}: {e}"
-                            self.logger.error(msg)
-                for _, v in obj.items():
-                    recurse(v)
-            elif isinstance(obj, list):
-                for item in obj:
-                    recurse(item)
-
-        recurse(data)
+        # Convert all charge-compatible quantities to nanocoulombs
+        convert_charge_units(data)        
 
         try:
             with open(output_file, "w") as f:
-                yaml.safe_dump(data, f, default_flow_style=False)
+                yaml.safe_dump(quantity_to_dict(data), f, default_flow_style=False)
         except Exception as e:
             msg = f"Failed to write calibrated file: {e}"
             self.logger.error(msg)
@@ -858,8 +864,7 @@ def main() -> None:
         "-c",
         "--calibrate",
         default="None",
-        choices=["pC", "adc", "gain", "None"],
-        help="Choose a charge calibration value (pC, adc, gain, or None)",
+        help="Choose a charge calibration unit",
     )
     parser.add_argument(
         "-b",
@@ -911,37 +916,9 @@ def main() -> None:
         if args.compute_gain:
             analyzer.compute_linear_gain_fit()
         if args.compute_snr:
-            analyzer.plot_snr()
+            analyzer.compute_snr()
         if args.calibrate != "None":
-            if args.calibrate == "pC":
-                analyzer.calibrate_nnls_values(
-                    lambda x: x
-                    * (
-                        (analyzer.v_per_adc * analyzer.up_sampling_ratio * analyzer.sampling_time)
-                        / analyzer.adc_impedance
-                    )
-                    * 1e12,
-                    str(analyzer.result_yaml).replace(".yaml", "_pC_calibrated.yaml"),
-                    "pC",
-                )
-            elif args.calibrate == "adc":
-                analyzer.calibrate_nnls_values(
-                    lambda x: x * analyzer.up_sampling_ratio,
-                    str(analyzer.result_yaml).replace(".yaml", "_adc_calibrated.yaml"),
-                    "ADC",
-                )
-            elif args.calibrate == "gain":
-                elem = 1.602e-19  # C (elementary charge)
-                analyzer.calibrate_nnls_values(
-                    lambda x: x
-                    * (
-                        (analyzer.v_per_adc * analyzer.up_sampling_ratio * analyzer.sampling_time)
-                        / analyzer.adc_impedance
-                    )
-                    / elem,
-                    str(analyzer.result_yaml).replace(".yaml", "_gain_calibrated.yaml"),
-                    "a.u.",
-                )
+                analyzer.calibrate_nnls_values(output_file=str(analyzer.result_yaml).replace(".yaml", "_calibrated.yaml"))
 
         logger.info("Processing complete.")
     except Exception as e:

From f171e489c971140c13fbacb6c5b03c57ec78e5fb Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Wed, 10 Dec 2025 12:07:13 -0800
Subject: [PATCH 18/32] moved helper functions to util file

---
 pyproject.toml                                |   2 +
 .../pmt/ana/peSpectrumAnalyzer.py             | 318 ++++++------------
 src/mintanalysis/pmt/ana/utils.py             | 128 +++++++
 3 files changed, 237 insertions(+), 211 deletions(-)
 create mode 100644 src/mintanalysis/pmt/ana/utils.py

diff --git a/pyproject.toml b/pyproject.toml
index b6caa17..c70749a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,6 +57,7 @@ dependencies = [
 "iminuit",
 "pymongo",
 "PyYAML",
+"pint",
 "sshtunnel",
 "pygama>=2.0.0",
 "legend-daq2lh5>=1.2.1",
@@ -65,6 +66,7 @@ dependencies = [
 "numba>0.52",
 "pandas",
 "matplotlib",
+"uncertainties",
 "scipy>=1.14",
 "jupyter",
 "ipython",
diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
index 41a467c..6b87d28 100644
--- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
+++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
@@ -18,7 +18,6 @@
 
 import argparse
 import logging
-from collections.abc import Callable
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
@@ -29,11 +28,19 @@
 from iminuit import Minuit
 from lgdo import lh5
 from matplotlib.backends.backend_pdf import PdfPages
-from scipy.optimize import curve_fit
-
 from pint import UnitRegistry
-from uncertainties import ufloat, UFloat
-
+from scipy.optimize import curve_fit
+from uncertainties import ufloat
+
+from .utils import (
+    gaussian,
+    get_physics_object,
+    linear_func,
+    poisson_nll,
+    quantity_to_dict,
+    setup_logging,
+    valley_index_strict,
+)
 
 # --------------------------
 # TODO For Debugging only!
@@ -47,39 +54,6 @@
 A4_LANDSCAPE = (11.69, 8.27)
 
 
-# --------------------------
-# Logging Setup
-# --------------------------
-
-
-def setup_logging(
-    log_file: Path = RESULT_DIR / "analysis.log", level: int = logging.INFO
-) -> logging.Logger:
-    logger = logging.getLogger("PESpectrum")
-    logger.setLevel(level)
-    logger.propagate = False
-
-    if not logger.handlers:
-        fmt = logging.Formatter(
-            "[%(asctime)s] [%(name)s - %(funcName)s] [%(levelname)s] %(message)s"
-        )
-
-        sh = logging.StreamHandler()
-        sh.setLevel(level)
-        sh.setFormatter(fmt)
-        logger.addHandler(sh)
-
-        fh = logging.FileHandler(log_file, mode="w")
-        fh.setLevel(level)
-        fh.setFormatter(fmt)
-        logger.addHandler(fh)
-
-    return logger
-
-
-# --------------------------
-# Small helpers
-# --------------------------
 @dataclass(frozen=True)
 class Calibration:
     up_sampling_ratio: float
@@ -88,109 +62,12 @@ class Calibration:
     sampling_time: float
     renormalization_factor: float
 
+
 @dataclass
 class ChannelResult:
     status: str  # 'ok', 'skipped', 'error'
     data: dict[str, Any]
 
-def get_physics_object(obj, ureg):
-    """
-    Convert a given object recursevly 
-    to a object with all {val,(err),unit} occurences replaced with a pint (and ufloat) object
-    """
-    if isinstance(obj, dict) and {"val", "unit"}.issubset(obj):
-        val = obj["val"]
-        if "err" in obj:
-            err = obj["err"]
-            return ureg.Quantity(ufloat(val, err),ureg(obj["unit"]))
-        else:
-            return ureg.Quantity(val,ureg(obj["unit"]))
-
-    elif isinstance(obj, dict):
-        return {k: get_physics_object(v, ureg) for k, v in obj.items()}
-
-    elif isinstance(obj, (list, tuple)):
-        return type(obj)(get_physics_object(v, ureg) for v in obj)
-
-    else:
-        return obj
-
-def quantity_to_dict(obj):
-    """
-    Recursively replace a pint (and ufloat) object with a dict 
-    {val, (err), unit}
-    """
-    # The pint internal function is broken (see issue 2121)
-    preferred_units = ['Hz']
-    if hasattr(obj, "magnitude") and hasattr(obj, "units"):
-        if not preferred_units is None:
-            for u in preferred_units:
-                if obj.is_compatible_with(u):
-                    obj = obj.to(u)
-        mag = obj.magnitude
-    
-
-        if isinstance(mag, UFloat):
-            val = mag.n
-            err = mag.s
-        else:
-            val = mag
-            err = None
-
-        return {
-            "val": float(val),
-            **({ "err": float(err) } if err is not None else {}),
-            "unit": format(obj.units,"~")
-        }
-
-    elif isinstance(obj, dict):
-        return {k: quantity_to_dict(v) for k, v in obj.items()}
-
-    elif isinstance(obj, (list, tuple)):
-        return type(obj)(quantity_to_dict(v) for v in obj)
-
-    else:
-        return obj
-    
-def linear_func(x, a, b):
-    return a * x + b
-
-
-def gaussian(x: np.ndarray, amp: float, mu: float, sigma: float) -> np.ndarray:
-    return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
-
-
-def poisson_nll(amp: float, mu: float, sigma: float, x: np.ndarray, y: np.ndarray) -> float:
-    expected = gaussian(x, amp, mu, sigma)
-    expected = np.clip(expected, 1e-10, None)
-    return float(np.sum(expected - y * np.log(expected)))
-
-
-def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None:
-    """Return (peak_idx, valley_idx) or None."""
-    n = len(y)
-    if n < 3:
-        return None
-
-    peak = None
-    for i in range(1, n - 1):
-        if y[i] > y[i - 1] and y[i] > y[i + 1]:
-            peak = i
-            break
-    if peak is None:
-        return None
-
-    valley = peak
-    i = peak + 1
-    while i < n:
-        if y[i] > y[i - 1]:
-            return peak, valley
-        if y[i] < y[valley]:
-            valley = i
-        i += 1
-
-    return None
-
 
 # --------------------------
 # Analyzer class
@@ -204,7 +81,7 @@ def __init__(
         lim: float = 20,
         override_results: bool = False,
         logger: logging.Logger | None = None,
-        calibrator: Calibration = Calibration(24/240,0.25e-3,75.,4.8e-9,1),
+        calibrator: Calibration | None = None,
         calib: str = "None",
     ) -> None:
         self.aux_yaml = aux_yaml
@@ -219,25 +96,23 @@ def __init__(
         self.logger = logger or setup_logging()
         self.plot_folder.mkdir(parents=True, exist_ok=True)
         self.calibrator = calibrator
+        if calibrator is None:
+            self.calibrator = Calibration(24 / 240, 0.25e-3, 75.0, 4.8e-9, 1)
         self.calib = calib
         self.ureg = UnitRegistry()
         self.aux = self._load_aux()
 
         # unit handling
-        vadc = self.ureg.Quantity(calibrator.v_per_adc,self.ureg.volt)
-        usr = self.ureg.Quantity(calibrator.up_sampling_ratio, self.ureg.dimensionless)
-        rf = self.ureg.Quantity(calibrator.renormalization_factor, self.ureg.dimensionless)
-        st = self.ureg.Quantity(calibrator.sampling_time,self.ureg.seconds)
-        imp = self.ureg.Quantity(calibrator.adc_impedance,self.ureg.ohm)
+        vadc = self.ureg.Quantity(self.calibrator.v_per_adc, self.ureg.volt)
+        usr = self.ureg.Quantity(self.calibrator.up_sampling_ratio, self.ureg.dimensionless)
+        rf = self.ureg.Quantity(self.calibrator.renormalization_factor, self.ureg.dimensionless)
+        st = self.ureg.Quantity(self.calibrator.sampling_time, self.ureg.seconds)
+        imp = self.ureg.Quantity(self.calibrator.adc_impedance, self.ureg.ohm)
 
-        nnls_coloumb_factor = ((vadc*usr*st*rf)/imp)
+        nnls_coloumb_factor = (vadc * usr * st * rf) / imp
         self.ureg.define(f"NNLS = {nnls_coloumb_factor.to('coulomb').magnitude} * coulomb")
         self.ureg.define(f"ADC = {usr.magnitude}*NNLS")
 
-        
-
-      
-
     # ----------------------
     # I/O helpers
     # ----------------------
@@ -250,7 +125,7 @@ def _load_aux(self) -> dict[str, Any]:
         self.logger.info("Loaded aux YAML: %s", self.aux_yaml)
 
         # convert to physics units
-        aux = get_physics_object(aux,self.ureg)
+        aux = get_physics_object(aux, self.ureg)
 
         if self.keys is None:
             return aux
@@ -262,8 +137,8 @@ def _load_aux(self) -> dict[str, Any]:
                 msg = f"Key {k} not in aux file, skipping."
                 self.logger.warning(msg)
         return ret
-    
-    def _load_results(self) ->dict:
+
+    def _load_results(self) -> dict:
         if not self.result_yaml.exists():
             msg = f"Result YAML not found: {self.result_yaml}"
             raise FileNotFoundError(msg)
@@ -272,9 +147,8 @@ def _load_results(self) ->dict:
         if "pe_spectrum" not in data:
             msg = "pe_spectrum info not present in result YAML; run analysis first."
             raise RuntimeError(msg)
-        
-        return get_physics_object(data,self.ureg)
 
+        return get_physics_object(data, self.ureg)
 
     def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str) -> None:
         if self.result_yaml.exists():
@@ -402,7 +276,7 @@ def process_channel(
 
         # total counts
         result["statistics"] = {
-            "total_cts":  self.ureg.Quantity(ufloat(np.sum(n),np.sum(n)**0.5), 'dimensionless'),
+            "total_cts": self.ureg.Quantity(ufloat(np.sum(n), np.sum(n) ** 0.5), "dimensionless"),
         }
 
         # runtime
@@ -493,9 +367,13 @@ def process_channel(
         fit_errs = {k: float(v) for k, v in m.errors.to_dict().items()}
 
         result["pe_peak_fit"] = {
-            "mean": self.ureg.Quantity(ufloat(fit_vals["mu"], fit_errs["mu"]),self.ureg.NNLS),
-            "sigma": self.ureg.Quantity(ufloat(fit_vals["sigma"], fit_errs["sigma"]),self.ureg.NNLS),
-            "amp": self.ureg.Quantity(ufloat(fit_vals["amp"], fit_errs["amp"]),self.ureg.dimensionless)
+            "mean": self.ureg.Quantity(ufloat(fit_vals["mu"], fit_errs["mu"]), self.ureg.NNLS),
+            "sigma": self.ureg.Quantity(
+                ufloat(fit_vals["sigma"], fit_errs["sigma"]), self.ureg.NNLS
+            ),
+            "amp": self.ureg.Quantity(
+                ufloat(fit_vals["amp"], fit_errs["amp"]), self.ureg.dimensionless
+            ),
         }
 
         # full fit curve over bins for plotting & integrals
@@ -507,19 +385,28 @@ def process_channel(
         # statistics
         try:
             result["statistics"] = {
-                "1st_pe_fit_integral_below_valley": self.ureg.Quantity(ufloat(np.sum(y_fit[:valley_idx]),
-                                                              np.sum(y_fit[:valley_idx])**0.5), 'dimensionless'),    
-                "cts_above_valley": self.ureg.Quantity(ufloat(np.sum(n[:valley_idx]),
-                                                              np.sum(n[:valley_idx])**0.5), 'dimensionless'),
-                "1st_pe_fit_integral": self.ureg.Quantity(ufloat(np.sum(y_fit), np.sum(y_fit)**0.5), 'dimensionless'),
-                "total_cts": self.ureg.Quantity(ufloat(np.sum(n), np.sum(n)**0.5), 'dimensionless'),
+                "1st_pe_fit_integral_below_valley": self.ureg.Quantity(
+                    ufloat(np.sum(y_fit[:valley_idx]), np.sum(y_fit[:valley_idx]) ** 0.5),
+                    "dimensionless",
+                ),
+                "cts_above_valley": self.ureg.Quantity(
+                    ufloat(np.sum(n[:valley_idx]), np.sum(n[:valley_idx]) ** 0.5), "dimensionless"
+                ),
+                "1st_pe_fit_integral": self.ureg.Quantity(
+                    ufloat(np.sum(y_fit), np.sum(y_fit) ** 0.5), "dimensionless"
+                ),
+                "total_cts": self.ureg.Quantity(
+                    ufloat(np.sum(n), np.sum(n) ** 0.5), "dimensionless"
+                ),
                 "valley": {
                     "pos": float(bin_centers[valley_idx]) * self.ureg.NNLS,
-                    "amp": self.ureg.Quantity(ufloat(n[valley_idx],n[valley_idx]**0.5), 'dimensionless')
+                    "amp": self.ureg.Quantity(
+                        ufloat(n[valley_idx], n[valley_idx] ** 0.5), "dimensionless"
+                    ),
                 },
                 "1st_pe_guess": {
-                    "pos": float(mu0) *self.ureg.NNLS,
-                    "amp": self.ureg.Quantity(ufloat(amp0,amp0**0.5), 'dimensionless'),
+                    "pos": float(mu0) * self.ureg.NNLS,
+                    "amp": self.ureg.Quantity(ufloat(amp0, amp0**0.5), "dimensionless"),
                 },
             }
         except Exception as e:
@@ -587,21 +474,23 @@ def _add_charge_axis(self, ax: plt.Axes, is_y) -> None:
         is_y : bool
             If True add a new y-axis. Else an x-axis.
         """
-        
+
         if self.calib == "gain":
             label = "Gain (a.u.)"
-            func = (lambda x: ((x*self.ureg.NNLS).to("C")/self.ureg.elementary_charge).m,
-                    lambda y: ((y*self.ureg.elementary_charge).to("NNLS")).m
+            func = (
+                lambda x: ((x * self.ureg.NNLS).to("C") / self.ureg.elementary_charge).m,
+                lambda y: ((y * self.ureg.elementary_charge).to("NNLS")).m,
             )
         else:
             if not self.ureg.NNLS.is_compatible_with(self.calib):
                 msg = f"Unit [{self.calib}] not compatible with charge"
                 self.logger.warning(msg)
             label = f"Charge ({self.calib})"
-            func = (lambda x: (x*self.ureg.NNLS).to(self.calib).m,
-                    lambda y: (y*self.ureg(self.calib)).to("NNLS").m
+            func = (
+                lambda x: (x * self.ureg.NNLS).to(self.calib).m,
+                lambda y: (y * self.ureg(self.calib)).to("NNLS").m,
             )
-                    
+
         if is_y:
             secax_y = ax.secondary_yaxis("right", functions=func)
             secax_y.set_ylabel(label)
@@ -609,7 +498,7 @@ def _add_charge_axis(self, ax: plt.Axes, is_y) -> None:
             secax_x = ax.secondary_xaxis("top", functions=func)
             secax_x.set_xlabel(label)
 
-    def _unit_converter(self, v,target_unit, constant = 1):
+    def _unit_converter(self, v, target_unit, constant=1):
         """
         Take a value v and if its a Quantity
         apply conversion of the targeted unit.
@@ -624,23 +513,26 @@ def _unit_converter(self, v,target_unit, constant = 1):
         dims = dict(v._units)
         # Decompose units
         dims = dict(v._units)
-        target_units = {k: d for k, d in dims.items() if self.ureg(k).is_compatible_with(target_unit)}
-        other_units  = {k: d for k, d in dims.items() if not self.ureg(k).is_compatible_with(target_unit)}
+        target_units = {
+            k: d for k, d in dims.items() if self.ureg(k).is_compatible_with(target_unit)
+        }
+        other_units = {
+            k: d for k, d in dims.items() if not self.ureg(k).is_compatible_with(target_unit)
+        }
 
         # Start with dimensionless
         v_base = v / v.units
 
         # Multiply back non-target units
         for k, d in other_units.items():
-            v_base *= self.ureg(k)**d
+            v_base *= self.ureg(k) ** d
 
         # Convert target units to target unit
         for k, d in target_units.items():
-            v_base *= (self.ureg(k).to(target_unit))**d
+            v_base *= (self.ureg(k).to(target_unit)) ** d
 
         return v_base * constant
 
-
     # ----------------------
     # Signal to Noise Ratio (SNR)
     # ----------------------
@@ -653,27 +545,27 @@ def compute_snr(self) -> None:
                 try:
                     if info.get("voltage") == 0 * self.ureg.V:
                         continue
-                    noise =info.get("statistics").get("valley").get("amp")
-          
+                    noise = info.get("statistics").get("valley").get("amp")
+
                     if info.get("status", "skipped") == "ok":
                         signal = info.get("pe_peak_fit").get("amp")
                     else:
-                        signal =info.get("statistics").get("1st_pe_guess").get("amp")
-                    run_snr[pmt] = 1- noise/signal
+                        signal = info.get("statistics").get("1st_pe_guess").get("amp")
+                    run_snr[pmt] = 1 - noise / signal
                 except Exception as e:
                     self.logger.warning(
                         "Failed to compute SNR for run %s PMT %s: %s", run_name, pmt, e
                     )
             snr[run_name] = run_snr
 
-        self._save_results(snr,"snr")
+        self._save_results(snr, "snr")
 
         fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
         for run_name, pmt_dict in snr.items():
             pmts = sorted(pmt_dict.keys())
             vals = [pmt_dict[p].n for p in pmts]
             errs = [pmt_dict[p].s for p in pmts]
-            ax.errorbar(pmts, vals, errs, label=run_name, fmt='o')
+            ax.errorbar(pmts, vals, errs, label=run_name, fmt="o")
         ax.set_xlabel("PMT")
         ax.set_ylabel("SNR (a.u.)")
         ax.set_title("SNR per PMT (= 1 - valley/peak)")
@@ -694,7 +586,7 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
             run_dcr = {}
             for pmt, info in pmt_dict.items():
                 try:
-                    if info.get("voltage") == 0*self.ureg.volt:
+                    if info.get("voltage") == 0 * self.ureg.volt:
                         continue
                     stats = info.get("statistics", {})
                     runtime_info = info.get("runtime", {})
@@ -706,16 +598,18 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
                             time_mode,
                         )
                         continue
-                    dcts = stats.get("1st_pe_fit_integral_below_valley")+ stats.get("cts_above_valley")
+                    dcts = stats.get("1st_pe_fit_integral_below_valley") + stats.get(
+                        "cts_above_valley"
+                    )
                     runtime = runtime_info[time_mode]
-                    run_dcr[pmt] = dcts/runtime
-                    
+                    run_dcr[pmt] = dcts / runtime
+
                 except Exception as e:
                     self.logger.warning(
                         "Failed to compute DCR for run %s PMT %s: %s", run_name, pmt, e
                     )
             dcr[run_name] = run_dcr
-        self._save_results(dcr,"dcr")
+        self._save_results(dcr, "dcr")
 
         fig, ax = plt.subplots(figsize=A4_LANDSCAPE)
         for run_name, pmt_dict in dcr.items():
@@ -762,15 +656,16 @@ def compute_linear_gain_fit(self) -> None:
                 xunit = pmt["voltage"][0].units
                 pmt["voltage"] = [i.to(xunit) for i in pmt["voltage"]]
                 yunit = pmt["vals"][0].units
-                pmt["vals"] =[i.to(yunit) for i in pmt["vals"]]
-                ax.errorbar([i.m for i in pmt["voltage"]], 
-                             [i.n for i in pmt["vals"]], 
-                             [i.s for i in pmt["vals"]],
-                             label=f"PMT {key}",
-                             fmt="o"
-                             )
-                ax.set_xlabel(f"Voltage ({format(xunit,"~")})")
-                ax.set_ylabel(f"Gain ({format(yunit,"~")})")
+                pmt["vals"] = [i.to(yunit) for i in pmt["vals"]]
+                ax.errorbar(
+                    [i.m for i in pmt["voltage"]],
+                    [i.n for i in pmt["vals"]],
+                    [i.s for i in pmt["vals"]],
+                    label=f"PMT {key}",
+                    fmt="o",
+                )
+                ax.set_xlabel(f"Voltage ({format(xunit,'~')})")
+                ax.set_ylabel(f"Gain ({format(yunit,'~')})")
 
                 if self.calib != "None":
                     self._add_charge_axis(ax, True)
@@ -778,7 +673,7 @@ def compute_linear_gain_fit(self) -> None:
                 params, covariance = curve_fit(
                     linear_func,
                     [i.m for i in pmt["voltage"]],
-                    [i.n for i in pmt["vals"]], 
+                    [i.n for i in pmt["vals"]],
                     [i.s for i in pmt["vals"]],
                     absolute_sigma=True,
                 )
@@ -786,8 +681,8 @@ def compute_linear_gain_fit(self) -> None:
                 perr = np.sqrt(np.diag(covariance))
                 x = np.linspace(-1 * b_opt / a_opt, max([i.m for i in pmt["voltage"]]) + 10, 1000)
                 ax.plot(x, linear_func(x, a_opt, b_opt), ls="--", color="red", label="Fit")
-                tmp_dic[key]["a"] = self.ureg.Quantity(ufloat(a_opt,perr[0]),yunit/xunit)
-                tmp_dic[key]["b"] = self.ureg.Quantity(ufloat(b_opt,perr[1]),yunit)
+                tmp_dic[key]["a"] = self.ureg.Quantity(ufloat(a_opt, perr[0]), yunit / xunit)
+                tmp_dic[key]["b"] = self.ureg.Quantity(ufloat(b_opt, perr[1]), yunit)
 
                 tmp_dic[key]["func"] = "G = a*voltage+b"
                 pmt.pop("voltage")
@@ -795,12 +690,12 @@ def compute_linear_gain_fit(self) -> None:
                 ax.legend()
                 pdf.savefig(fig)
                 plt.close(fig)
-        
-        self._save_results(tmp_dic,"linear_gain")
+
+        self._save_results(tmp_dic, "linear_gain")
 
     def calibrate_nnls_values(self, output_file: str):
         """
-        Reads the current results.yaml, 
+        Reads the current results.yaml,
         finds all entries that contain a Charge compatible quantities
         Applies charge conversion as per self.calib
         writes out calibrated file to output_file.
@@ -809,23 +704,22 @@ def calibrate_nnls_values(self, output_file: str):
             msg = f"Unit [{self.calib}] not compatible with charge"
             self.logger.error(msg)
             return
-        
+
         data = self._load_results()
 
         def convert_charge_units(d):
             """Recursively convert all Quantities compatible with charge to new_unit."""
-            for k, v in d.items():
+            for _, v in d.items():
                 if isinstance(v, dict):
                     convert_charge_units(v)  # recurse
                 elif isinstance(v, self.ureg.Quantity):
                     if self.calib == "gain":
-                        self._unit_converter(v,'C',self.ureg.elementary_charge)
+                        self._unit_converter(v, "C", self.ureg.elementary_charge)
                     else:
-                        self._unit_converter(v,self.calib)
-                    
+                        self._unit_converter(v, self.calib)
 
         # Convert all charge-compatible quantities to nanocoulombs
-        convert_charge_units(data)        
+        convert_charge_units(data)
 
         try:
             with open(output_file, "w") as f:
@@ -898,7 +792,7 @@ def main() -> None:
 
     args = parser.parse_args()
 
-    logger = setup_logging(level=logging.INFO)
+    logger = setup_logging(log_file=RESULT_DIR / "analysis.log", level=logging.INFO)
     try:
         analyzer = PESpectrumAnalyzer(
             logger=logger,
@@ -918,7 +812,9 @@ def main() -> None:
         if args.compute_snr:
             analyzer.compute_snr()
         if args.calibrate != "None":
-                analyzer.calibrate_nnls_values(output_file=str(analyzer.result_yaml).replace(".yaml", "_calibrated.yaml"))
+            analyzer.calibrate_nnls_values(
+                output_file=str(analyzer.result_yaml).replace(".yaml", "_calibrated.yaml")
+            )
 
         logger.info("Processing complete.")
     except Exception as e:
diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py
new file mode 100644
index 0000000..2298d7c
--- /dev/null
+++ b/src/mintanalysis/pmt/ana/utils.py
@@ -0,0 +1,128 @@
+'''
+Helper functions for ana tier
+'''
+from __future__ import annotations
+import logging
+import numpy as np
+from pathlib import Path
+from uncertainties import UFloat, ufloat
+
+def setup_logging(
+    log_file: Path = "analysis.log", level: int = logging.INFO
+) -> logging.Logger:
+    logger = logging.getLogger("PESpectrum")
+    logger.setLevel(level)
+    logger.propagate = False
+
+    if not logger.handlers:
+        fmt = logging.Formatter(
+            "[%(asctime)s] [%(name)s - %(funcName)s] [%(levelname)s] %(message)s"
+        )
+
+        sh = logging.StreamHandler()
+        sh.setLevel(level)
+        sh.setFormatter(fmt)
+        logger.addHandler(sh)
+
+        fh = logging.FileHandler(log_file, mode="w")
+        fh.setLevel(level)
+        fh.setFormatter(fmt)
+        logger.addHandler(fh)
+
+    return logger
+
+def get_physics_object(obj, ureg):
+    """
+    Convert a given object recursevly
+    to a object with all {val,(err),unit} occurrences replaced with a pint (and ufloat) object
+    """
+    if isinstance(obj, dict) and {"val", "unit"}.issubset(obj):
+        val = obj["val"]
+        if "err" in obj:
+            err = obj["err"]
+            return ureg.Quantity(ufloat(val, err), ureg(obj["unit"]))
+        return ureg.Quantity(val, ureg(obj["unit"]))
+
+    if isinstance(obj, dict):
+        return {k: get_physics_object(v, ureg) for k, v in obj.items()}
+
+    if isinstance(obj, (list, tuple)):
+        return type(obj)(get_physics_object(v, ureg) for v in obj)
+
+    return obj
+
+
+def quantity_to_dict(obj):
+    """
+    Recursively replace a pint (and ufloat) object with a dict
+    {val, (err), unit}
+    """
+    # The pint internal function is broken (see issue 2121)
+    preferred_units = ["Hz"]
+    if hasattr(obj, "magnitude") and hasattr(obj, "units"):
+        if preferred_units is not None:
+            for u in preferred_units:
+                if obj.is_compatible_with(u):
+                    obj = obj.to(u)
+        mag = obj.magnitude
+
+        if isinstance(mag, UFloat):
+            val = mag.n
+            err = mag.s
+        else:
+            val = mag
+            err = None
+
+        return {
+            "val": float(val),
+            **({"err": float(err)} if err is not None else {}),
+            "unit": format(obj.units, "~"),
+        }
+
+    if isinstance(obj, dict):
+        return {k: quantity_to_dict(v) for k, v in obj.items()}
+
+    if isinstance(obj, (list, tuple)):
+        return type(obj)(quantity_to_dict(v) for v in obj)
+
+    return obj
+
+
+def linear_func(x, a, b):
+    return a * x + b
+
+
+def gaussian(x: np.ndarray, amp: float, mu: float, sigma: float) -> np.ndarray:
+    return amp * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
+
+
+def poisson_nll(amp: float, mu: float, sigma: float, x: np.ndarray, y: np.ndarray) -> float:
+    expected = gaussian(x, amp, mu, sigma)
+    expected = np.clip(expected, 1e-10, None)
+    return float(np.sum(expected - y * np.log(expected)))
+
+
+def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None:
+    """Return (peak_idx, valley_idx) or None."""
+    n = len(y)
+    if n < 3:
+        return None
+
+    peak = None
+    for i in range(1, n - 1):
+        if y[i] > y[i - 1] and y[i] > y[i + 1]:
+            peak = i
+            break
+    if peak is None:
+        return None
+
+    valley = peak
+    i = peak + 1
+    while i < n:
+        if y[i] > y[i - 1]:
+            return peak, valley
+        if y[i] < y[valley]:
+            valley = i
+        i += 1
+
+    return None
\ No newline at end of file

From d7af52f759618917a02445619ddd5f8aded776e2 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Wed, 10 Dec 2025 16:46:23 -0800
Subject: [PATCH 19/32] First commit on uploader script

---
 pyproject.toml                         |   1 +
 src/mintanalysis/pmt/ana/__init__.py   |   3 +-
 src/mintanalysis/pmt/ana/uploadToDB.py | 354 +++++++++++++++++++++++++
 src/mintanalysis/pmt/ana/utils.py      |  17 +-
 4 files changed, 367 insertions(+), 8 deletions(-)
 create mode 100644 src/mintanalysis/pmt/ana/uploadToDB.py

diff --git a/pyproject.toml b/pyproject.toml
index c70749a..0d80565 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,6 +93,7 @@ docs = [
 [project.scripts]
 # Format: <cli_command_name> = "<module_path>:<function_name>"
 build_ana = "mintanalysis.pmt.ana.peSpectrumAnalyzer:main"
+upload_ana = "mintanalysis.pmt.ana.uploadToDB:main"
 
 #[[tool.uv.index]]
 # Optional name for the index.
diff --git a/src/mintanalysis/pmt/ana/__init__.py b/src/mintanalysis/pmt/ana/__init__.py
index 037a007..3d5417f 100644
--- a/src/mintanalysis/pmt/ana/__init__.py
+++ b/src/mintanalysis/pmt/ana/__init__.py
@@ -2,6 +2,7 @@
 Routines for the final analysis of PMT data
 """
 
+from . import uploadToDB
 from .peSpectrumAnalyzer import PESpectrumAnalyzer
 
-__all__ = ["PESpectrumAnalyzer"]
+__all__ = ["PESpectrumAnalyzer", "uploadToDB"]
diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py
new file mode 100644
index 0000000..a3a2fd6
--- /dev/null
+++ b/src/mintanalysis/pmt/ana/uploadToDB.py
@@ -0,0 +1,354 @@
+"""
+Takes selective data from the ANA tier and uploads it to the mongoDB
+mongoDB structure has the shape:
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, List
+
+import yaml
+from pint import UnitRegistry
+
+from .utils import get_physics_object, quantity_to_dict, setup_logging
+
+ROOT_DIR = Path("/home/pkrause/software/mint-analysis/debug_out")
+
+# binary channel mask to upload
+# 0x0001 is PMT 1 only
+# 0x0002 is PMT 2 only
+# 0x0003 is PMT 1,2
+# 0x0004 is PMT 3 only
+
+
+NumberOrList = float | List[float]
+StrOrList = str | List[str]
+
+
+@dataclass
+class Mapping:
+    parameters: list[str]
+    func: str
+
+
+@dataclass
+class PMTInfo:
+    v10_in_volt: NumberOrList
+    di10_in_mA: NumberOrList
+    frac: NumberOrList
+
+
+@dataclass
+class EnvironmentInfo:
+    temperature_in_celsius: NumberOrList
+    pressure_in_hpa: NumberOrList
+    humidity_in_percent: NumberOrList
+    measurement_duration_in_s: NumberOrList
+
+
+@dataclass
+class SoftwareInfo:
+    framework: str
+    pe_reconstruction: str  # e.g. "NNLS" | "Integral"
+    sftp_path: str
+    run_tags: StrOrList
+
+
+@dataclass
+class MeasurementResult:
+    measurement_type: str
+    measurement_location: str
+    devices_used: Any  # TODO: Loosely typed right now, can be tightened with mongoDB object?
+
+    result: NumberOrList
+    result_unc: NumberOrList
+    units: StrOrList
+
+    pmt_info: PMTInfo
+    env_info: EnvironmentInfo
+    software_info: SoftwareInfo
+
+    mapping: Mapping | None = None
+
+    def __post_init__(self):
+        # ---- Type consistency: list or scalar ----
+        is_list_result = isinstance(self.result, list)
+        is_list_unc = isinstance(self.result_unc, list)
+        is_list_units = isinstance(self.units, list)
+
+        if len({is_list_result, is_list_unc, is_list_units}) != 1:
+            msg = "result, result_unc, and units must all be either lists or scalars"
+            raise ValueError(msg)
+
+        # ---- Mapping existence rule ----
+        if self.mapping is not None and not is_list_result:
+            msg = "mapping may only be provided when result, result_unc, and units are lists"
+            raise ValueError()
+
+        # ---- Length consistency (only for list case) ----
+        if is_list_result:
+            lengths = {
+                len(self.result),
+                len(self.result_unc),
+                len(self.units),
+            }
+
+            if self.mapping is not None:
+                lengths.add(len(self.mapping.parameters))
+
+            if len(lengths) != 1:
+                msg = "result, result_unc, units, and mapping.parameters must all have same length"
+                raise ValueError(msg)
+
+
+def get_values(
+    obj: Any,
+) -> tuple[NumberOrList, NumberOrList, StrOrList, Mapping | None, list | None]:
+    if hasattr(obj, "magnitude") and hasattr(obj, "units"):
+        ret = quantity_to_dict(obj)
+        if "err" not in ret:
+            return ret["val"], 0, ret["unit"], None, None
+        return ret["val"], ret["err"], ret["unit"], None, None
+    # else we have multiple results (e.g. gain fit)
+    if isinstance(obj, dict):
+        ret = quantity_to_dict(obj)
+        vals = []
+        errs = []
+        units = []
+        params = []
+        for k, v in ret.items():
+            if not isinstance(v, dict):
+                continue
+            vals.append(v["val"])
+            errs.append(v.get("err", 0))
+            units.append(v["unit"])
+            params.append(k)
+
+        return (
+            vals,
+            errs,
+            units,
+            Mapping(params, ret.get("func", "No mapping function provided")),
+            ret.get("used_keys", []),
+        )
+
+    msg = f"{type(obj)} not supported"
+    raise NotImplementedError(msg)
+
+
+def get_pmt_info(channel: int, aux_dict: dict, keys: str | list) -> PMTInfo:
+
+    if isinstance(keys, str):
+        v10 = aux_dict[keys][channel]["v10"]
+        di10 = aux_dict[keys][channel]["di10"]
+        frac = aux_dict[keys][channel]["frac"]
+        return PMTInfo(v10.to("V").m, di10.to("mA").m, frac.m)
+
+    if isinstance(keys, list):
+        v10 = []
+        di10 = []
+        frac = []
+        for k in keys:
+            v10.append(aux_dict[k][channel]["v10"].to("V").m)
+            di10.append(aux_dict[k][channel]["di10"].to("mA").m)
+            frac.append(aux_dict[k][channel]["frac"].m)
+        return PMTInfo(v10, di10, frac)
+
+    msg = f"{type(keys)} not supported"
+    raise NotImplementedError(msg)
+
+
+def get_env_info(aux_dict: dict, keys: str | list) -> EnvironmentInfo:
+    """
+    Return average env info across runs
+    """
+    if isinstance(keys, str):
+        pth_start = aux_dict[keys]["pth_start"]
+        pth_end = aux_dict[keys]["pth_end"]
+        t = (pth_end["temperature"].to("degC").m + pth_start["temperature"].to("degC").m) / 2
+        p = (pth_end["pressure"].to("hPa").m + pth_start["pressure"].to("hPa").m) / 2
+        h = (pth_end["humidity"].to("percent").m + pth_start["humidity"].to("percent").m) / 2
+        m = aux_dict[keys]["runtime"].to("seconds").m
+        return EnvironmentInfo(t, p, h, m)
+
+    if isinstance(keys, list):
+        t, p, h, m = [], [], [], []
+        for key in keys:
+            pth_start = aux_dict[key]["pth_start"]
+            pth_end = aux_dict[key]["pth_end"]
+            t.append(
+                (pth_end["temperature"].to("degC").m + pth_start["temperature"].to("degC").m) / 2
+            )
+            p.append((pth_end["pressure"].to("hPa").m + pth_start["pressure"].to("hPa").m) / 2)
+            h.append(
+                (pth_end["humidity"].to("percent").m + pth_start["humidity"].to("percent").m) / 2
+            )
+            m.append(aux_dict[key]["runtime"].to("seconds").m)
+
+        return EnvironmentInfo(t, p, h, m)
+
+    msg = f"{type(keys)} not supported"
+    raise NotImplementedError(msg)
+
+
+def auto_int(x):
+    """
+    Custom type converter for argparse that attempts to convert a string
+    to an integer, supporting both decimal and hexadecimal (prefixed with '0x').
+    """
+    try:
+        # Attempt to convert as a decimal integer
+        return int(x)
+    except ValueError:
+        try:
+            # If decimal conversion fails, attempt to convert as hexadecimal
+            # The base 0 in int(x, 0) handles '0x' prefix automatically
+            return int(x, 0)
+        except ValueError as e:
+            msg = f"Invalid integer or hexadecimal value: '{x}'"
+            raise argparse.ArgumentTypeError(msg) from e
+
+
+def main():
+    logger = setup_logging(level=logging.INFO)
+    ureg = UnitRegistry()
+
+    parser = argparse.ArgumentParser(description="Upload analysis results to mongoDB.")
+    parser.add_argument("-x", "--f_aux", help="Path to aux file", required=True)
+    parser.add_argument("-a", "--f_ana", help="Path to ana file", required=True)
+    parser.add_argument(
+        "-c",
+        "--ch_mask",
+        type=auto_int,
+        help="Channel mask (can be decimal or hexadecimal, e.g., 10 or 0xA)",
+        required=True,
+    )
+    parser.add_argument(
+        "-m",
+        "--measurement",
+        choices=["dcr", "linear_gain", "snr"],
+        help="Measurement to upload",
+        required=True,
+    )
+    parser.add_argument(
+        "-k",
+        "--key",
+        default=None,
+        help="key to upload (will be ignored for linear_gain measurement)",
+    )
+    parser.add_argument(
+        "-r", "--reco", default="NNLS", help="Reconstruction algorithm used in DSP"
+    )
+
+    args = parser.parse_args()
+    result_yaml = args.f_ana
+    aux_file = args.f_aux
+    measurement = args.measurement
+    reco = args.reco
+    ch_mask = args.ch_mask
+    tags = args.key
+
+    # TODO For debug only. By definition keys need to be identical in aux and result files
+    tag_aux = "2025_12_02_22_46_06"
+    if measurement == "linear_gain":
+        tag_aux = ["2025_12_02_22_46_06", "2025_12_02_22_46_58"]
+
+    sftp_pmt_dir = "PMT"
+    sftp_base_dir = Path("/mint/mint-data/")
+    p = Path(aux_file)
+    try:
+        index = p.parts.index(sftp_pmt_dir)
+        sftp_root_dir = Path(p.parts[index - 1])
+        sftp_dir = str(sftp_base_dir / sftp_root_dir / sftp_pmt_dir)
+    except ValueError:
+        msg = f"'{sftp_pmt_dir}' not found in the aux path."
+        logger.error(msg)
+
+    try:
+        with open(result_yaml) as f:
+            data = yaml.safe_load(f)
+        data = get_physics_object(data, ureg)
+    except Exception as e:
+        msg = f"Failed to load file: {e}"
+        logger.error(msg)
+        raise
+    try:
+        with open(aux_file) as f:
+            aux = yaml.safe_load(f)
+        aux = get_physics_object(aux, ureg)
+    except Exception as e:
+        msg = f"Failed to load file: {e}"
+        logger.error(msg)
+        raise
+
+    if measurement not in data:
+        msg = f"{measurement} measurement not in result file"
+        logger.error(msg)
+        return
+
+    if measurement == "linear_gain":
+        if tags is not None:
+            logger.warning(
+                "Tags are defined redundandly. Using tags defined in linear gain results"
+            )
+        tags = data[measurement].get("used_keys")
+        if len(tags) < 3:
+            logger.warning("Less than three measurements found. Linear fit result will be bogus")
+
+    while ch_mask:
+        ch = (ch_mask & -ch_mask).bit_length() - 1
+        pmt_no = ch + 1
+
+        # collect value(s)
+        vals, errs, units, mapping, _keys = get_values(
+            data[measurement][pmt_no]
+            if measurement == "linear_gain"
+            else data[measurement][tags][pmt_no]
+        )
+        logger.info("collected measurement results")
+
+        # collect pmt info
+        pmt_info = get_pmt_info(pmt_no, aux, tag_aux)
+        logger.info("collected PMT info")
+
+        # get environment info
+        env_info = get_env_info(aux, tag_aux)
+        logger.info("collected environment info")
+
+        sw_info = SoftwareInfo(
+            framework="mint-xyz", pe_reconstruction=reco, sftp_path=sftp_dir, run_tags=tags
+        )
+        logger.info("collected software info")
+
+        # get used devices
+        pmt_devices = f"pmt_{pmt_no}"
+
+        # Build measurement result
+        res = MeasurementResult(
+            measurement_type=measurement,
+            measurement_location="MINT",
+            devices_used=pmt_devices,
+            result=vals,
+            result_unc=errs,
+            units=units,
+            mapping=mapping,
+            pmt_info=pmt_info,
+            env_info=env_info,
+            software_info=sw_info,
+        )
+        logger.info("Build data object")
+
+        # TODO Upload to database
+
+        ch_mask &= ch_mask - 1  # clear lowest set bit
+
+
+# --------------------------
+# CLI entrypoint
+# --------------------------
+if __name__ == "__main__":
+    main()
diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py
index 2298d7c..8db3c87 100644
--- a/src/mintanalysis/pmt/ana/utils.py
+++ b/src/mintanalysis/pmt/ana/utils.py
@@ -1,15 +1,17 @@
-'''
+"""
 Helper functions for ana tier
-'''
+"""
+
 from __future__ import annotations
+
 import logging
-import numpy as np
 from pathlib import Path
+
+import numpy as np
 from uncertainties import UFloat, ufloat
 
-def setup_logging(
-    log_file: Path = "analysis.log", level: int = logging.INFO
-) -> logging.Logger:
+
+def setup_logging(log_file: Path = "analysis.log", level: int = logging.INFO) -> logging.Logger:
     logger = logging.getLogger("PESpectrum")
     logger.setLevel(level)
     logger.propagate = False
@@ -31,6 +33,7 @@ def setup_logging(
 
     return logger
 
+
 def get_physics_object(obj, ureg):
     """
     Convert a given object recursevly
@@ -125,4 +128,4 @@ def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None:
             valley = i
         i += 1
 
-    return None
\ No newline at end of file
+    return None

From 79d2827b8b6087891149eba5271774b49c7aa1bf Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Sat, 13 Dec 2025 01:59:36 -0800
Subject: [PATCH 20/32] Move to Paths relative to aux file

---
 .../pmt/ana/peSpectrumAnalyzer.py             | 21 ++++++++-----------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
index 6b87d28..90180cf 100644
--- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
+++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
@@ -42,12 +42,6 @@
     valley_index_strict,
 )
 
-# --------------------------
-# TODO For Debugging only!
-# --------------------------
-RAW_DIR = Path("/home/pkrause/noise_hunt/data/p-1-1-om-hs-31/ref-v0.0.0/generated/tier/raw/r020")
-RESULT_DIR = Path("/home/pkrause/software/mint-analysis/debug_out")
-
 # --------------------------
 # Constants
 # --------------------------
@@ -86,9 +80,9 @@ def __init__(
     ) -> None:
         self.aux_yaml = aux_yaml
         self.keys = keys
-        self.raw_dir = RAW_DIR
-        self.plot_folder = RESULT_DIR / "plots"
-        self.result_yaml = RESULT_DIR / "results.yaml"
+
+        self.plot_folder = self.aux_yaml.parent / "../ana/plots"
+        self.result_yaml = self.aux_yaml.parent / "../ana/results.yaml"
         self.bin_size = bin_size
         self.bins = np.arange(-100, 10000, bin_size)
         self.lim = lim
@@ -189,8 +183,9 @@ def run(self) -> None:
     # ----------------------
     def analyze_run(self, run_name: str, meta: dict[str, Any]) -> dict[int, dict[str, Any]]:
         # build file paths
-        fname = meta["daq"].split("/")[-1].replace("daq", "r020").replace("data", "lh5")
-        f_raw = self.raw_dir / fname
+        f_raw = self.aux_yaml.parent / Path(
+            meta["daq"].replace("daq", "raw").replace("data", "lh5")
+        )
         if not f_raw.exists():
             msg = f"Raw file for run {run_name} not found: {f_raw}"
             raise FileNotFoundError(msg)
@@ -792,7 +787,9 @@ def main() -> None:
 
     args = parser.parse_args()
 
-    logger = setup_logging(log_file=RESULT_DIR / "analysis.log", level=logging.INFO)
+    logger = setup_logging(
+        log_file=Path(args.aux_file).parent / "../ana/analysis.log", level=logging.INFO
+    )
     try:
         analyzer = PESpectrumAnalyzer(
             logger=logger,

From 523e00295ba6f96e71848512ca8ecad901e0a94c Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Sat, 13 Dec 2025 02:13:12 -0800
Subject: [PATCH 21/32] Switch pe peak initial guess algorithm, add noise peak
 info to stats

---
 src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
index 90180cf..e7713fc 100644
--- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
+++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
@@ -305,16 +305,15 @@ def process_channel(
         noise_peak, valley_idx = vi
 
         # find first p.e. peak after noise_peak
-        sub = n[noise_peak:]
-        pe_vi = valley_index_strict(sub)
+        sub = n[valley_idx:]
+        pe_vi = np.argmax(sub)
         if pe_vi is None:
             msg = "1st-p.e. detection failed after noise peak."
             self.logger.warning("Run %s ch %s: %s", run_name, ch, msg)
             self._decorate_axis(ax)
             return fig, {"status": "skipped", "reason": msg}
 
-        pe_peak_rel, _ = pe_vi
-        pe_peak_idx = noise_peak + pe_peak_rel
+        pe_peak_idx = valley_idx + pe_vi
 
         # annotate choices
         ax.axvline(bin_centers[valley_idx], color="red", ls="--", label="valley")
@@ -393,6 +392,12 @@ def process_channel(
                 "total_cts": self.ureg.Quantity(
                     ufloat(np.sum(n), np.sum(n) ** 0.5), "dimensionless"
                 ),
+                "noise_peak": {
+                    "pos": float(bin_centers[noise_peak]) * self.ureg.NNLS,
+                    "amp": self.ureg.Quantity(
+                        ufloat(n[noise_peak], n[noise_peak] ** 0.5), "dimensionless"
+                    ),
+                },
                 "valley": {
                     "pos": float(bin_centers[valley_idx]) * self.ureg.NNLS,
                     "amp": self.ureg.Quantity(

From 0a860df65561139aa70c45bbb3f15098959ca16d Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Mon, 15 Dec 2025 10:39:55 -0800
Subject: [PATCH 22/32] Split out utility functions

---
 analysis.log                      |  7 +++++++
 src/mintanalysis/pmt/ana/utils.py | 25 +++++++++++--------------
 2 files changed, 18 insertions(+), 14 deletions(-)
 create mode 100644 analysis.log

diff --git a/analysis.log b/analysis.log
new file mode 100644
index 0000000..dd93656
--- /dev/null
+++ b/analysis.log
@@ -0,0 +1,7 @@
+[2025-12-10 16:29:52,739] [PESpectrum - main] [WARNING] Tags are defined redundandly. Using tags defined in linear gain results
+[2025-12-10 16:29:52,739] [PESpectrum - main] [WARNING] Less than three measurments found. Linear fit result will be bogous
+[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected measurement results
+[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected PMT info
+[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected environment info
+[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected software info
+[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] Build data object
diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py
index 8db3c87..5d02f87 100644
--- a/src/mintanalysis/pmt/ana/utils.py
+++ b/src/mintanalysis/pmt/ana/utils.py
@@ -1,17 +1,15 @@
-"""
+'''
 Helper functions for ana tier
-"""
-
+'''
 from __future__ import annotations
-
 import logging
-from pathlib import Path
-
 import numpy as np
+from pathlib import Path
 from uncertainties import UFloat, ufloat
 
-
-def setup_logging(log_file: Path = "analysis.log", level: int = logging.INFO) -> logging.Logger:
+def setup_logging(
+    log_file: Path = "analysis.log", level: int = logging.INFO
+) -> logging.Logger:
     logger = logging.getLogger("PESpectrum")
     logger.setLevel(level)
     logger.propagate = False
@@ -33,7 +31,6 @@ def setup_logging(log_file: Path = "analysis.log", level: int = logging.INFO) ->
 
     return logger
 
-
 def get_physics_object(obj, ureg):
     """
     Convert a given object recursevly
@@ -55,7 +52,7 @@ def get_physics_object(obj, ureg):
     return obj
 
 
-def quantity_to_dict(obj):
+def quantity_to_dict(obj,val_key="val", err_key="err", unit_key="unit"):
     """
     Recursively replace a pint (and ufloat) object with a dict
     {val, (err), unit}
@@ -77,9 +74,9 @@ def quantity_to_dict(obj):
             err = None
 
         return {
-            "val": float(val),
-            **({"err": float(err)} if err is not None else {}),
-            "unit": format(obj.units, "~"),
+            val_key: float(val),
+            **({err_key: float(err)} if err is not None else {}),
+            unit_key: format(obj.units, "~"),
         }
 
     if isinstance(obj, dict):
@@ -128,4 +125,4 @@ def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None:
             valley = i
         i += 1
 
-    return None
+    return None
\ No newline at end of file

From 9df71d42f1b58158a150e3a832c37a3c5579b776 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 15 Dec 2025 18:40:39 +0000
Subject: [PATCH 23/32] style: pre-commit fixes

---
 src/mintanalysis/pmt/ana/utils.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py
index 5d02f87..f7cc532 100644
--- a/src/mintanalysis/pmt/ana/utils.py
+++ b/src/mintanalysis/pmt/ana/utils.py
@@ -1,15 +1,17 @@
-'''
+"""
 Helper functions for ana tier
-'''
+"""
+
 from __future__ import annotations
+
 import logging
-import numpy as np
 from pathlib import Path
+
+import numpy as np
 from uncertainties import UFloat, ufloat
 
-def setup_logging(
-    log_file: Path = "analysis.log", level: int = logging.INFO
-) -> logging.Logger:
+
+def setup_logging(log_file: Path = "analysis.log", level: int = logging.INFO) -> logging.Logger:
     logger = logging.getLogger("PESpectrum")
     logger.setLevel(level)
     logger.propagate = False
@@ -31,6 +33,7 @@ def setup_logging(
 
     return logger
 
+
 def get_physics_object(obj, ureg):
     """
     Convert a given object recursevly
@@ -52,7 +55,7 @@ def get_physics_object(obj, ureg):
     return obj
 
 
-def quantity_to_dict(obj,val_key="val", err_key="err", unit_key="unit"):
+def quantity_to_dict(obj, val_key="val", err_key="err", unit_key="unit"):
     """
     Recursively replace a pint (and ufloat) object with a dict
     {val, (err), unit}
@@ -125,4 +128,4 @@ def valley_index_strict(y: np.ndarray) -> tuple[int, int] | None:
             valley = i
         i += 1
 
-    return None
\ No newline at end of file
+    return None

From 50be2d5f086f8149a67c16c87ddec2353d7ff589 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Mon, 15 Dec 2025 10:55:43 -0800
Subject: [PATCH 24/32] Create dir if not existing

---
 src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
index e7713fc..fd2e6b2 100644
--- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
+++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
@@ -792,9 +792,10 @@ def main() -> None:
 
     args = parser.parse_args()
 
-    logger = setup_logging(
-        log_file=Path(args.aux_file).parent / "../ana/analysis.log", level=logging.INFO
-    )
+    f_log = Path(args.aux_file).parent / "../ana/analysis.log"
+    f_log.parent.mkdir(parents=True, exist_ok=True)
+
+    logger = setup_logging(log_file=f_log, level=logging.INFO)
     try:
         analyzer = PESpectrumAnalyzer(
             logger=logger,

From 4efe039ecf12f241155664481eb9e445d70798b5 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Mon, 15 Dec 2025 15:06:35 -0800
Subject: [PATCH 25/32] edited uploader

---
 analysis.log                                  |  23 ++--
 .../pmt/ana/peSpectrumAnalyzer.py             |   9 ++
 src/mintanalysis/pmt/ana/uploadToDB.py        | 102 ++++++++++++++----
 src/mintanalysis/pmt/ana/utils.py             |   4 +-
 4 files changed, 111 insertions(+), 27 deletions(-)

diff --git a/analysis.log b/analysis.log
index dd93656..2041109 100644
--- a/analysis.log
+++ b/analysis.log
@@ -1,7 +1,16 @@
-[2025-12-10 16:29:52,739] [PESpectrum - main] [WARNING] Tags are defined redundandly. Using tags defined in linear gain results
-[2025-12-10 16:29:52,739] [PESpectrum - main] [WARNING] Less than three measurments found. Linear fit result will be bogous
-[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected measurement results
-[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected PMT info
-[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected environment info
-[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] collected software info
-[2025-12-10 16:29:52,739] [PESpectrum - main] [INFO] Build data object
+[2025-12-15 15:05:54,438] [PESpectrum - main] [INFO] collected measurement results
+[2025-12-15 15:05:54,438] [PESpectrum - main] [INFO] collected PMT info
+[2025-12-15 15:05:54,439] [PESpectrum - main] [INFO] collected environment info
+[2025-12-15 15:05:54,439] [PESpectrum - main] [INFO] collected software info
+[2025-12-15 15:05:58,512] [PESpectrum - _connect_tunnel] [INFO] [info] SSH tunnel established: 127.0.0.1:43307 -> 127.0.0.1:27017 via production.pacific-neutrino.org
+[2025-12-15 15:05:58,620] [PESpectrum - _connect_client] [INFO] [info] Connected to MongoDB at 127.0.0.1:43307 with user p-one
+[2025-12-15 15:05:58,753] [PESpectrum - main] [INFO] Build data object
+[2025-12-15 15:05:58,753] [PESpectrum - main] [INFO] The following dict would be uploaded to the DB: {'measurement_type': 'dcr', 'measurement_location': 'MINT', 'devices_used': ['p-1-1-pmt-unit-KM56674_275', 'p-1-1-om-hs-01'], 'result': 26463.90079865678, 'result_unc': 30.15089782382669, 'units': 'Hz', 'pmt_info': {'v10_in_volt': 85.09310150146484, 'di10_in_mA': 2.9245901107788086, 'frac': 0.877996027469635}, 'env_info': {'temperature_in_celsius': 29.11357307688087, 'pressure_in_hpa': 1011.6844068085361, 'humidity_in_percent': 23.183785204174583, 'measurement_duration_in_s': 29.110747814178467}, 'software_info': {'framework': 'mint-xyz', 'pe_reconstruction': 'NNLS', 'sftp_path': '/mint/mint-data/p-1-1-om-hs-01/PMT', 'run_tags': '2025_12_13_03_31_52'}, 'mapping': None}
+[2025-12-15 15:05:58,753] [PESpectrum - main] [INFO] collected measurement results
+[2025-12-15 15:05:58,754] [PESpectrum - main] [INFO] collected PMT info
+[2025-12-15 15:05:58,754] [PESpectrum - main] [INFO] collected environment info
+[2025-12-15 15:05:58,754] [PESpectrum - main] [INFO] collected software info
+[2025-12-15 15:05:59,167] [PESpectrum - _connect_tunnel] [INFO] [info] SSH tunnel established: 127.0.0.1:38145 -> 127.0.0.1:27017 via production.pacific-neutrino.org
+[2025-12-15 15:05:59,242] [PESpectrum - _connect_client] [INFO] [info] Connected to MongoDB at 127.0.0.1:38145 with user p-one
+[2025-12-15 15:05:59,379] [PESpectrum - main] [INFO] Build data object
+[2025-12-15 15:05:59,380] [PESpectrum - main] [INFO] The following dict would be uploaded to the DB: {'measurement_type': 'dcr', 'measurement_location': 'MINT', 'devices_used': ['p-1-1-pmt-unit-KM54356_23', 'p-1-1-om-hs-01'], 'result': 26326.463667634333, 'result_unc': 30.07250335177778, 'units': 'Hz', 'pmt_info': {'v10_in_volt': 87.58090209960938, 'di10_in_mA': 1.8282999992370605, 'frac': 0.8174149990081787}, 'env_info': {'temperature_in_celsius': 29.11357307688087, 'pressure_in_hpa': 1011.6844068085361, 'humidity_in_percent': 23.183785204174583, 'measurement_duration_in_s': 29.110747814178467}, 'software_info': {'framework': 'mint-xyz', 'pe_reconstruction': 'NNLS', 'sftp_path': '/mint/mint-data/p-1-1-om-hs-01/PMT', 'run_tags': '2025_12_13_03_31_52'}, 'mapping': None}
diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
index fd2e6b2..b98e19c 100644
--- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
+++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
@@ -107,6 +107,15 @@ def __init__(
         self.ureg.define(f"NNLS = {nnls_coloumb_factor.to('coulomb').magnitude} * coulomb")
         self.ureg.define(f"ADC = {usr.magnitude}*NNLS")
 
+        cal_dict = {
+            "vadc": vadc,
+            "upsampling_ratio": usr,
+            "renormalization_factor": rf,
+            "sampling_time": st,
+            "adc_impedance": imp,
+        }
+        self._save_results(cal_dict, "calibration_constants")
+
     # ----------------------
     # I/O helpers
     # ----------------------
diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py
index a3a2fd6..3ea8d5d 100644
--- a/src/mintanalysis/pmt/ana/uploadToDB.py
+++ b/src/mintanalysis/pmt/ana/uploadToDB.py
@@ -7,12 +7,14 @@
 
 import argparse
 import logging
-from dataclasses import dataclass
+from dataclasses import asdict, dataclass
 from pathlib import Path
 from typing import Any, List
 
 import yaml
+from andromeda.testing.devices.production_database import ProductionDatabase
 from pint import UnitRegistry
+from pint.errors import UndefinedUnitError
 
 from .utils import get_physics_object, quantity_to_dict, setup_logging
 
@@ -220,6 +222,9 @@ def main():
     parser = argparse.ArgumentParser(description="Upload analysis results to mongoDB.")
     parser.add_argument("-x", "--f_aux", help="Path to aux file", required=True)
     parser.add_argument("-a", "--f_ana", help="Path to ana file", required=True)
+    parser.add_argument(
+        "-d", "--dry", action="store_true", help="Perform a dry run only (i.e do not upload to DB)"
+    )
     parser.add_argument(
         "-c",
         "--ch_mask",
@@ -250,13 +255,15 @@ def main():
     measurement = args.measurement
     reco = args.reco
     ch_mask = args.ch_mask
-    tags = args.key
-
-    # TODO For debug only. By definition keys need to be identical in aux and result files
-    tag_aux = "2025_12_02_22_46_06"
-    if measurement == "linear_gain":
-        tag_aux = ["2025_12_02_22_46_06", "2025_12_02_22_46_58"]
-
+    tag = args.key
+    db_opts = {
+        "use_tunnel": True,
+        "ssh_keyring_service": "mongo-prod",
+        "mongo_keyring_service": "mongo-prod",
+        "logger": logger.info,
+    }
+
+    # Build SFTP directory
     sftp_pmt_dir = "PMT"
     sftp_base_dir = Path("/mint/mint-data/")
     p = Path(aux_file)
@@ -268,14 +275,40 @@ def main():
         msg = f"'{sftp_pmt_dir}' not found in the aux path."
         logger.error(msg)
 
+    # Load measurement data
     try:
         with open(result_yaml) as f:
             data = yaml.safe_load(f)
-        data = get_physics_object(data, ureg)
     except Exception as e:
         msg = f"Failed to load file: {e}"
         logger.error(msg)
         raise
+
+    # 'calibration_constants' in aux file define NNLS
+    if "calibration_constants" in data:
+        calib_data = get_physics_object(data["calibration_constants"], ureg)
+        nnls_coloumb_factor = (
+            calib_data.get("vadc")
+            * calib_data.get("upsampling_ratio")
+            * calib_data.get("sampling_time")
+            * calib_data.get("renormalization_factor")
+        ) / calib_data.get("adc_impedance")
+        ureg.define(f"NNLS = {nnls_coloumb_factor.to('coulomb').magnitude} * coulomb")
+        ureg.define(f"ADC = {calib_data.get('upsampling_ratio').magnitude}*NNLS")
+
+    # if no calibration values passed, try to load data without NNLS parameters
+    # if NNLS is in the tagged measurement, raise error.
+    try:
+        data = get_physics_object(data, ureg)
+    except UndefinedUnitError as e:
+        if "NNLS" in str(e):
+            msg = "Unit NNLS not defined. Trying to load reduced data set"
+            logger.warning(msg)
+            data = {measurement: get_physics_object(data[measurement], ureg)}
+        else:
+            raise e
+
+    # Load aux file
     try:
         with open(aux_file) as f:
             aux = yaml.safe_load(f)
@@ -285,20 +318,39 @@ def main():
         logger.error(msg)
         raise
 
+    # check measurement sanity
     if measurement not in data:
         msg = f"{measurement} measurement not in result file"
         logger.error(msg)
         return
 
+    tags_aux = list(aux.keys())
     if measurement == "linear_gain":
-        if tags is not None:
+        if tag is not None:
             logger.warning(
                 "Tags are defined redundandly. Using tags defined in linear gain results"
             )
-        tags = data[measurement].get("used_keys")
-        if len(tags) < 3:
+        tag = data[measurement].get("used_keys")
+        if len(tag) < 3:
             logger.warning("Less than three measurements found. Linear fit result will be bogus")
 
+        if not set(tag).issubset(set(tags_aux)):
+            msg = f"Tag {tag} not found in the aux file"
+            logger.error(msg)
+            raise ValueError
+
+    else:
+        # Check key sanity
+        tags_data = list(data[measurement].keys())
+        if tag not in tags_aux:
+            msg = f"Tag {tag} not found in the aux file"
+            logger.error(msg)
+            raise ValueError
+        if tag not in tags_data:
+            msg = f"Tag {tag} not found in the result file"
+            logger.error(msg)
+            raise ValueError
+
     while ch_mask:
         ch = (ch_mask & -ch_mask).bit_length() - 1
         pmt_no = ch + 1
@@ -307,31 +359,36 @@ def main():
         vals, errs, units, mapping, _keys = get_values(
             data[measurement][pmt_no]
             if measurement == "linear_gain"
-            else data[measurement][tags][pmt_no]
+            else data[measurement][tag][pmt_no]
         )
         logger.info("collected measurement results")
 
         # collect pmt info
-        pmt_info = get_pmt_info(pmt_no, aux, tag_aux)
+        pmt_info = get_pmt_info(pmt_no, aux, tag)
         logger.info("collected PMT info")
 
         # get environment info
-        env_info = get_env_info(aux, tag_aux)
+        env_info = get_env_info(aux, tag)
         logger.info("collected environment info")
 
         sw_info = SoftwareInfo(
-            framework="mint-xyz", pe_reconstruction=reco, sftp_path=sftp_dir, run_tags=tags
+            framework="mint-xyz", pe_reconstruction=reco, sftp_path=sftp_dir, run_tags=tag
         )
         logger.info("collected software info")
 
         # get used devices
-        pmt_devices = f"pmt_{pmt_no}"
+        # Get settings from DB
+        # TODO replace with module level logic
+        with ProductionDatabase(**db_opts) as db:
+            overview = db.get_hemisphere_overview(str(sftp_root_dir), "tum")
+            pmt_name = overview.get("pmt-unit").get(str(pmt_no)).get("uid")
+            hemisphere = str(sftp_root_dir)
 
         # Build measurement result
         res = MeasurementResult(
             measurement_type=measurement,
             measurement_location="MINT",
-            devices_used=pmt_devices,
+            devices_used=[pmt_name, hemisphere],
             result=vals,
             result_unc=errs,
             units=units,
@@ -342,7 +399,14 @@ def main():
         )
         logger.info("Build data object")
 
-        # TODO Upload to database
+        # Upload to database
+        if args.dry:
+            msg = f"The following dict would be uploaded to the DB: {asdict(res)}"
+            logger.info(msg)
+        else:
+            with ProductionDatabase(**db_opts) as db:
+                db.client["mint"]["Measurements_Pmt"].insert_one(asdict(res))
+            msg = f"Uploaded document {asdict(res)} to mint/Measurements_Pmt"
 
         ch_mask &= ch_mask - 1  # clear lowest set bit
 
diff --git a/src/mintanalysis/pmt/ana/utils.py b/src/mintanalysis/pmt/ana/utils.py
index f7cc532..550621f 100644
--- a/src/mintanalysis/pmt/ana/utils.py
+++ b/src/mintanalysis/pmt/ana/utils.py
@@ -79,7 +79,9 @@ def quantity_to_dict(obj, val_key="val", err_key="err", unit_key="unit"):
         return {
             val_key: float(val),
             **({err_key: float(err)} if err is not None else {}),
-            unit_key: format(obj.units, "~"),
+            unit_key: (
+                str(obj.units) if obj.units.is_compatible_with("ohm") else format(obj.units, "~")
+            ),
         }
 
     if isinstance(obj, dict):

From b5e0c25b5592f9f8b97c3a19936bd8b8a3c9155d Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 16 Dec 2025 11:21:17 -0800
Subject: [PATCH 26/32] Switch from 1 indexed to 0 indexed aux files

---
 .../pmt/ana/peSpectrumAnalyzer.py             | 31 +++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
index b98e19c..bfc50da 100644
--- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
+++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
@@ -209,20 +209,20 @@ def analyze_run(self, run_name: str, meta: dict[str, Any]) -> dict[int, dict[str
         # collect figures and write once
         with PdfPages(pdf_path) as pdf:
             for ch in lh5.ls(f_dsp):
-                pmt = int(ch[2:]) + 1
-                self.logger.info("Run %s - channel %s (PMT %d)", run_name, ch, pmt)
+                ch_idx = int(ch[2:])
+                self.logger.info("Run %s - channel %s (PMT %d)", run_name, ch, ch_idx + 1)
                 try:
-                    fig, chan_data = self.process_channel(run_name, ch, pmt, meta, f_raw, f_dsp)
+                    fig, chan_data = self.process_channel(run_name, ch, meta, f_raw, f_dsp)
                     # fig may be None if plotting skipped
                     if fig is not None:
                         pdf.savefig(fig)
                         plt.close(fig)
-                    run_results[pmt] = chan_data
+                    run_results[ch_idx] = chan_data
                 except Exception as exc:
                     self.logger.exception(
                         "Channel-level error run=%s ch=%s: %s", run_name, ch, exc
                     )
-                    run_results[pmt] = {"status": "error", "reason": str(exc)}
+                    run_results[ch_idx] = {"status": "error", "reason": str(exc)}
 
         self.logger.info("Wrote PDF for run %s to %s", run_name, pdf_path)
         return run_results
@@ -234,7 +234,6 @@ def process_channel(
         self,
         run_name: str,
         ch: str,
-        pmt: int,
         meta: dict[str, Any],
         f_raw: Path,
         f_dsp: Path,
@@ -245,7 +244,7 @@ def process_channel(
         """
         result: dict[str, Any] = {}
         ch_idx = int(ch[2:])
-        result["voltage"] = meta[ch_idx + 1]["v10"]
+        result["voltage"] = meta[ch_idx]["v10"]
 
         # load data
         try:
@@ -269,7 +268,7 @@ def process_channel(
             pe_vals,
             bins=self.bins,
             histtype="step",
-            label=f"channel {ch} (PMT {pmt}) at {result['voltage'].magnitude:.2f}"
+            label=f"channel {ch} (PMT {ch_idx+1}) at {result['voltage'].magnitude:.2f}"
             f" {format(result['voltage'].units,'~')}",
         )
 
@@ -563,7 +562,7 @@ def compute_snr(self) -> None:
                     run_snr[pmt] = 1 - noise / signal
                 except Exception as e:
                     self.logger.warning(
-                        "Failed to compute SNR for run %s PMT %s: %s", run_name, pmt, e
+                        "Failed to compute SNR for run %s channel %s: %s", run_name, pmt, e
                     )
             snr[run_name] = run_snr
 
@@ -575,9 +574,9 @@ def compute_snr(self) -> None:
             vals = [pmt_dict[p].n for p in pmts]
             errs = [pmt_dict[p].s for p in pmts]
             ax.errorbar(pmts, vals, errs, label=run_name, fmt="o")
-        ax.set_xlabel("PMT")
+        ax.set_xlabel("Channel")
         ax.set_ylabel("SNR (a.u.)")
-        ax.set_title("SNR per PMT (= 1 - valley/peak)")
+        ax.set_title("SNR per Channel (= 1 - valley/peak)")
         ax.legend()
         plt.tight_layout()
         plot_path = self.plot_folder / "snr_plot.png"
@@ -601,7 +600,7 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
                     runtime_info = info.get("runtime", {})
                     if time_mode not in runtime_info:
                         self.logger.warning(
-                            "Run %s PMT %s: missing runtime '%s'; skipping DCR.",
+                            "Run %s channel %s: missing runtime '%s'; skipping DCR.",
                             run_name,
                             pmt,
                             time_mode,
@@ -615,7 +614,7 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
 
                 except Exception as e:
                     self.logger.warning(
-                        "Failed to compute DCR for run %s PMT %s: %s", run_name, pmt, e
+                        "Failed to compute DCR for run %s channel %s: %s", run_name, pmt, e
                     )
             dcr[run_name] = run_dcr
         self._save_results(dcr, "dcr")
@@ -626,9 +625,9 @@ def compute_dark_count_rate(self, time_mode: str = "aux") -> None:
             vals = [pmt_dict[p].n for p in pmts]
             errs = [pmt_dict[p].s for p in pmts]
             ax.errorbar(pmts, vals, errs, label=run_name, fmt="o")
-        ax.set_xlabel("PMT")
+        ax.set_xlabel("Channel")
         ax.set_ylabel("DCR (Hz)")
-        ax.set_title("Dark Count Rate per PMT")
+        ax.set_title("Dark Count Rate per Channel")
         ax.legend()
         plt.tight_layout()
         plot_path = self.plot_folder / "dcr_plot.png"
@@ -670,7 +669,7 @@ def compute_linear_gain_fit(self) -> None:
                     [i.m for i in pmt["voltage"]],
                     [i.n for i in pmt["vals"]],
                     [i.s for i in pmt["vals"]],
-                    label=f"PMT {key}",
+                    label=f"Channel {key}",
                     fmt="o",
                 )
                 ax.set_xlabel(f"Voltage ({format(xunit,'~')})")

From 01a9ce7a4bc0fd1fb12cdb1d63e2474d9abc090a Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 16 Dec 2025 12:59:08 -0800
Subject: [PATCH 27/32] save hemisphere info to result file

---
 src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
index bfc50da..93647c5 100644
--- a/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
+++ b/src/mintanalysis/pmt/ana/peSpectrumAnalyzer.py
@@ -87,6 +87,7 @@ def __init__(
         self.bins = np.arange(-100, 10000, bin_size)
         self.lim = lim
         self.override_results = override_results
+        self.hemispheres = {}
         self.logger = logger or setup_logging()
         self.plot_folder.mkdir(parents=True, exist_ok=True)
         self.calibrator = calibrator
@@ -116,6 +117,8 @@ def __init__(
         }
         self._save_results(cal_dict, "calibration_constants")
 
+        self._save_results(self.hemispheres, "hemispheres")
+
     # ----------------------
     # I/O helpers
     # ----------------------
@@ -139,6 +142,7 @@ def _load_aux(self) -> dict[str, Any]:
             else:
                 msg = f"Key {k} not in aux file, skipping."
                 self.logger.warning(msg)
+        self.hemispheres = {"A": aux.get("hemisphere_a"), "B": aux.get("hemisphere_b")}
         return ret
 
     def _load_results(self) -> dict:
@@ -165,7 +169,8 @@ def _save_results(self, results: dict[str, dict[int, dict[str, Any]]], key: str)
             existing[key] = quantity_to_dict(results)
             with open(self.result_yaml, "w") as f:
                 yaml.safe_dump(existing, f, default_flow_style=False)
-            self.logger.info("Updated result YAML at %s", self.result_yaml)
+            msg = f"Updated {key} YAML at {self.result_yaml}"
+            self.logger.info(msg)
 
         else:
             with open(self.result_yaml, "w") as f:

From 174c0961022caddbaee7deee4b7890a3ce2f6902 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 16 Dec 2025 17:59:19 -0800
Subject: [PATCH 28/32] added insert time to document

---
 src/mintanalysis/pmt/ana/uploadToDB.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py
index 3ea8d5d..ae934bc 100644
--- a/src/mintanalysis/pmt/ana/uploadToDB.py
+++ b/src/mintanalysis/pmt/ana/uploadToDB.py
@@ -7,7 +7,8 @@
 
 import argparse
 import logging
-from dataclasses import asdict, dataclass
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, List
 
@@ -69,6 +70,7 @@ class MeasurementResult:
     result: NumberOrList
     result_unc: NumberOrList
     units: StrOrList
+    insert_time: str = field(init=False)
 
     pmt_info: PMTInfo
     env_info: EnvironmentInfo
@@ -106,6 +108,8 @@ def __post_init__(self):
                 msg = "result, result_unc, units, and mapping.parameters must all have same length"
                 raise ValueError(msg)
 
+        self.insert_time = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S.%f")
+
 
 def get_values(
     obj: Any,
@@ -257,9 +261,10 @@ def main():
     ch_mask = args.ch_mask
     tag = args.key
     db_opts = {
+        "mongo_user": "mint",
         "use_tunnel": True,
         "ssh_keyring_service": "mongo-prod",
-        "mongo_keyring_service": "mongo-prod",
+        "mongo_keyring_service": "mongo-mint",
         "logger": logger.info,
     }
 
@@ -381,14 +386,17 @@ def main():
         # TODO replace with module level logic
         with ProductionDatabase(**db_opts) as db:
             overview = db.get_hemisphere_overview(str(sftp_root_dir), "tum")
-            pmt_name = overview.get("pmt-unit").get(str(pmt_no)).get("uid")
-            hemisphere = str(sftp_root_dir)
+            pmt_obj = {
+                "device_type": "pmt-unit",
+                "uid": overview.get("pmt-unit").get(str(pmt_no)).get("uid"),
+                "_id": overview.get("pmt-unit").get(str(pmt_no)).get("_id"),
+            }
 
         # Build measurement result
         res = MeasurementResult(
             measurement_type=measurement,
             measurement_location="MINT",
-            devices_used=[pmt_name, hemisphere],
+            devices_used=pmt_obj,
             result=vals,
             result_unc=errs,
             units=units,
@@ -402,12 +410,12 @@ def main():
         # Upload to database
         if args.dry:
             msg = f"The following dict would be uploaded to the DB: {asdict(res)}"
-            logger.info(msg)
         else:
             with ProductionDatabase(**db_opts) as db:
                 db.client["mint"]["Measurements_Pmt"].insert_one(asdict(res))
             msg = f"Uploaded document {asdict(res)} to mint/Measurements_Pmt"
 
+        logger.info(msg)
         ch_mask &= ch_mask - 1  # clear lowest set bit
 
 

From 6853011d9151f550774183c9393be710c39ccda5 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 16 Dec 2025 18:12:31 -0800
Subject: [PATCH 29/32] Add DB connection settings to arg parser

---
 src/mintanalysis/pmt/ana/uploadToDB.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py
index ae934bc..58fdc9d 100644
--- a/src/mintanalysis/pmt/ana/uploadToDB.py
+++ b/src/mintanalysis/pmt/ana/uploadToDB.py
@@ -253,6 +253,19 @@ def main():
         "-r", "--reco", default="NNLS", help="Reconstruction algorithm used in DSP"
     )
 
+    parser.add_argument("--mongo_user", default="mint", help="Specify user for mongoDB login")
+    parser.add_argument(
+        "--mongo_keyring",
+        default=None,
+        help="Specify keyring for mongoDB login (will prompt password if None)",
+    )
+    parser.add_argument(
+        "--ssh_keyring",
+        default=None,
+        help="Specify keyring for ssh tunnel login  (will prompt password if None)",
+    )
+    parser.add_argument("--ssh_user", default="mint", help="Specify user for ssh tunnel login")
+
     args = parser.parse_args()
     result_yaml = args.f_ana
     aux_file = args.f_aux
@@ -261,10 +274,11 @@ def main():
     ch_mask = args.ch_mask
     tag = args.key
     db_opts = {
-        "mongo_user": "mint",
+        "mongo_user": args.mongo_user,
         "use_tunnel": True,
-        "ssh_keyring_service": "mongo-prod",
-        "mongo_keyring_service": "mongo-mint",
+        "ssh_keyring_service": args.ssh_keyring,
+        "ssh_user": args.ssh_user,
+        "mongo_keyring_service": args.mongo_keyring,
         "logger": logger.info,
     }
 

From a470796f435653c0e0d29cc9bf9b72249a15f245 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 16 Dec 2025 18:30:47 -0800
Subject: [PATCH 30/32] define version automatically

---
 pyproject.toml                         | 15 ++++++++-------
 src/mintanalysis/pmt/ana/uploadToDB.py | 21 +++++++++++----------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0d80565..53fe75d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,10 +1,14 @@
 [build-system]
-requires = [
-    "setuptools>=61.2",
-    "setuptools_scm[toml]>=7"
-]
+requires = ["setuptools>=64", "setuptools_scm>=8"]
 build-backend = "setuptools.build_meta"
 
+[tool.setuptools_scm]
+write_to = "src/mintanalysis/_version.py"
+# 'node-and-date' or 'node-and-timestamp' appends the short hash
+local_scheme = "node-and-date"
+# 'guess-next-dev' ensures versions like 1.0.1.dev1+gabc123
+version_scheme = "guess-next-dev"
+
 [tool.setuptools]
 include-package-data = true
 zip-safe = false
@@ -20,9 +24,6 @@ py-modules = []
 where = ["src"]
 
 
-[tool.setuptools_scm]
-write_to = "src/mintanalysis/_version.py"
-
 [project.urls]
 Homepage = "https://github.com/pone-software/mint-analysis"
 Issues = "https://github.com/pone-software/mint-analysis/issues"
diff --git a/src/mintanalysis/pmt/ana/uploadToDB.py b/src/mintanalysis/pmt/ana/uploadToDB.py
index 58fdc9d..dac62b7 100644
--- a/src/mintanalysis/pmt/ana/uploadToDB.py
+++ b/src/mintanalysis/pmt/ana/uploadToDB.py
@@ -9,6 +9,7 @@
 import logging
 from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
+from importlib.metadata import PackageNotFoundError, version
 from pathlib import Path
 from typing import Any, List
 
@@ -19,15 +20,6 @@
 
 from .utils import get_physics_object, quantity_to_dict, setup_logging
 
-ROOT_DIR = Path("/home/pkrause/software/mint-analysis/debug_out")
-
-# binary channel mask to upload
-# 0x0001 is PMT 1 only
-# 0x0002 is PMT 2 only
-# 0x0003 is PMT 1,2
-# 0x0004 is PMT 3 only
-
-
 NumberOrList = float | List[float]
 StrOrList = str | List[str]
 
@@ -390,8 +382,17 @@ def main():
         env_info = get_env_info(aux, tag)
         logger.info("collected environment info")
 
+        # get software info
+        try:
+            framework = version("mint-analysis")
+        except PackageNotFoundError:
+            framework = "unknown"
+
         sw_info = SoftwareInfo(
-            framework="mint-xyz", pe_reconstruction=reco, sftp_path=sftp_dir, run_tags=tag
+            framework="mint_analyis_" + framework,
+            pe_reconstruction=reco,
+            sftp_path=sftp_dir,
+            run_tags=tag,
         )
         logger.info("collected software info")
 

From 158584cc65b14602c757945949facde5df4c7402 Mon Sep 17 00:00:00 2001
From: Patrick Krause <pkrause@sfu.ca>
Date: Tue, 16 Dec 2025 18:34:31 -0800
Subject: [PATCH 31/32] Remove uv dependencies

---
 pyproject.toml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 53fe75d..dd2e136 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -96,15 +96,6 @@ docs = [
 build_ana = "mintanalysis.pmt.ana.peSpectrumAnalyzer:main"
 upload_ana = "mintanalysis.pmt.ana.uploadToDB:main"
 
-#[[tool.uv.index]]
-# Optional name for the index.
-#name = "dspeed-pone"
-# Required URL for the index.
-#url = "github://pone-software/dspeed-pone"
-
-[tool.uv.workspace]
-exclude = ["sim_data", "data", "dspeed"]
-
 [tool.pytest.ini_options]
 minversion = "6.0"
 addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]

From 3eb3472b61b2ca9d88b0b7d79af1730d8818226f Mon Sep 17 00:00:00 2001
From: Patrick Krause <p.krause5994@gmail.com>
Date: Tue, 16 Dec 2025 18:43:23 -0800
Subject: [PATCH 32/32] Revise README for MINT analysis framework details

Updated README to clarify MINT analysis framework and installation instructions.
---
 README.md | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 1be341c..4e91519 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,18 @@
 # mint-analysis
-Collection of scripts for MINT tests
+A framework to analysis p-one PMT data.
 
+The MINT analysis package processes data recorded by the Andromeda package (e.g., PMT or acoustic data) and applies advanced analysis routines that lie outside Andromeda’s scope.
 
-## Installation
-
-Clone the repo
-
-To install packages I recommend using uv [https://docs.astral.sh/uv/], which
-is a fast modern python package handler that also handles virtual environments.
-
-You'll need to install this first which is shown here [https://docs.astral.sh/uv/getting-started/installation/].
+For PMT data, two analysis frameworks are planned:
+- Pygama – Modern, Python-based, fast-turnaround DSP toolkit for time-series data.
+Status: Implemented.
+- IceTray – The classical IceCube framework and future default for P-ONE data.
+Status: Not yet implemented.
 
-Once you have it installed go to the cloned repo and inside you should see a pyproject.toml file
-this contains all the info on the packages needed.
+Detailed unboarding instructions can be found [on confluence](https://p-one.atlassian.net/wiki/spaces/PONE/pages/1797849089/MINT+Analysis+User+Onboarding).
 
-To install the packages simply run `uv sync`and they should all install.
-
-Then to open a jupyter notebook: `uv run --with jupyter jupyter notebook`. (In fact you don't even need the `uv sync` and can
-simply do the run, uv will handle the rest). This will run a jupyter notebook instance inside the virtual environment.
-
-Some packages may cause issues such as h5py which may need to be installed separately via brew.
+## Installation
 
-All new instances you make are completely independent so can use different package/python versions as
-specified in the pyproject.toml file.
+Clone the repo
+cd into repo
+`pip install .`