diff --git a/Examples/qspace_lanczos/compute_spectral_Xpoint.py b/Examples/qspace_lanczos/compute_spectral_Xpoint.py
new file mode 100644
index 00000000..c46ad7dc
--- /dev/null
+++ b/Examples/qspace_lanczos/compute_spectral_Xpoint.py
@@ -0,0 +1,195 @@
+"""
+Q-Space Lanczos: phonon spectral function at the X point of SnTe
+================================================================
+
+This example computes the anharmonic phonon spectral function for SnTe
+at the X point (zone boundary) using the Q-space Lanczos algorithm.
+
+The Q-space Lanczos exploits Bloch momentum conservation to drastically
+reduce the size of the two-phonon sector, giving a speedup proportional
+to the number of unit cells in the supercell (N_cell = 8 for this 2x2x2
+supercell).
+
+Requirements:
+    - Julia with SparseArrays package
+    - spglib
+    - The SnTe ensemble from tests/test_julia/data/
+
+Usage:
+    python compute_spectral_Xpoint.py
+
+    # Or with MPI parallelism:
+    mpirun -np 4 python compute_spectral_Xpoint.py
+"""
+from __future__ import print_function
+
+import numpy as np
+import os, sys
+
+import cellconstructor as CC
+import cellconstructor.Phonons
+import cellconstructor.Units
+
+import sscha, sscha.Ensemble
+import tdscha.QSpaceLanczos as QL
+
+from tdscha.Parallel import pprint as print
+
+# ========================
+# Parameters
+# ========================
+# Path to the SnTe ensemble data (2x2x2 supercell, 2 atoms/unit cell)
+DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                        '..', '..', 'tests', 'test_julia', 'data')
+NQIRR = 3           # Number of irreducible q-points in the dynamical matrix
+TEMPERATURE = 250    # Temperature in Kelvin
+N_STEPS = 50         # Number of Lanczos steps (increase for production)
+SAVE_DIR = "output"  # Directory for checkpoints and results
+
+
+def main():
+    # ========================
+    # 1. Load ensemble
+    # ========================
+    dyn = CC.Phonons.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    ens = sscha.Ensemble.Ensemble(dyn, TEMPERATURE)
+    ens.load_bin(DATA_DIR, 1)
+
+    # ========================
+    # 2. Create Q-space Lanczos
+    # ========================
+    qlanc = QL.QSpaceLanczos(ens)
+    qlanc.ignore_v3 = False   # Include 3-phonon interactions
+    qlanc.ignore_v4 = False   # Include 4-phonon interactions
+    qlanc.init(use_symmetries=True)
+
+    # ========================
+    # 3. Inspect q-points and pick the X point
+    # ========================
+    print("Available q-points:")
+    for iq, q in enumerate(qlanc.q_points):
+        freqs = qlanc.w_q[:, iq] * CC.Units.RY_TO_CM
+        print("  iq={}: q = ({:8.5f}, {:8.5f}, {:8.5f})  "
+              "freqs = {} cm-1".format(iq, q[0], q[1], q[2],
+              np.array2string(freqs, precision=1, separator=', ')))
+
+    # The zone-boundary X point in a 2x2x2 FCC supercell is at iq=5,6,7
+    # (equivalent by cubic symmetry). We pick iq=5.
+    iq_pert = 5
+    print()
+    print("Selected q-point: iq={}".format(iq_pert))
+    print("  q = {}".format(qlanc.q_points[iq_pert]))
+    print()
+
+    # ========================
+    # 4. Run Lanczos for each band at the X point
+    # ========================
+    n_bands = qlanc.n_bands  # 6 bands for SnTe (3 * 2 atoms)
+
+    for band in range(n_bands):
+        freq = qlanc.w_q[band, iq_pert] * CC.Units.RY_TO_CM
+
+        # Skip acoustic modes (zero frequency at Gamma only; at X all modes
+        # have finite frequency, but we check anyway for safety)
+        if qlanc.w_q[band, iq_pert] < 1e-6:
+            print("Skipping acoustic band {} (freq = {:.2f} cm-1)".format(
+                band, freq))
+            continue
+
+        print("=" * 50)
+        print("Band {}: {:.2f} cm-1".format(band, freq))
+        print("=" * 50)
+
+        qlanc.prepare_mode_q(iq_pert, band)
+        qlanc.run_FT(N_STEPS, save_each=10, save_dir=SAVE_DIR,
+                      prefix="Xpoint_band{}".format(band), verbose=True)
+        qlanc.save_status(os.path.join(SAVE_DIR,
+                          "Xpoint_band{}_final.npz".format(band)))
+
+    # ========================
+    # 5. Plot the total spectral function at X
+    # ========================
+    print()
+    print("=" * 50)
+    print("Computing total spectral function at X point")
+    print("=" * 50)
+
+    # Frequency grid (cm-1)
+    w_cm = np.linspace(0, 200, 500)
+    w_ry = w_cm / CC.Units.RY_TO_CM
+    smearing = 3.0 / CC.Units.RY_TO_CM  # 3 cm-1 broadening
+
+    total_spectral = np.zeros_like(w_cm)
+
+    for band in range(n_bands):
+        if qlanc.w_q[band, iq_pert] < 1e-6:
+            continue
+
+        result_file = os.path.join(SAVE_DIR,
+                                   "Xpoint_band{}_final.npz".format(band))
+        if not os.path.exists(result_file):
+            print("  Band {} result not found, skipping".format(band))
+            continue
+
+        # Load and compute Green function
+        from tdscha.DynamicalLanczos import Lanczos
+        lanc_tmp = Lanczos()
+        lanc_tmp.load_status(result_file)
+
+        gf = lanc_tmp.get_green_function_continued_fraction(
+            w_ry, smearing=smearing, use_terminator=True)
+        spectral = -np.imag(gf)
+        total_spectral += spectral
+
+        # Print the renormalized frequency from G(0)
+        gf0 = lanc_tmp.get_green_function_continued_fraction(
+            np.array([0.0]), smearing=smearing, use_terminator=True)
+        w2 = 1.0 / np.real(gf0[0])
+        w_ren = np.sign(w2) * np.sqrt(np.abs(w2)) * CC.Units.RY_TO_CM
+        print("  Band {}: harmonic = {:.2f} cm-1, "
+              "renormalized = {:.2f} cm-1".format(
+                  band,
+                  qlanc.w_q[band, iq_pert] * CC.Units.RY_TO_CM,
+                  w_ren))
+
+    # Save the spectrum to a text file
+    output_file = os.path.join(SAVE_DIR, "spectral_Xpoint.dat")
+    np.savetxt(output_file,
+               np.column_stack([w_cm, total_spectral]),
+               header="omega(cm-1)  spectral_function(arb.units)",
+               fmt="%.6f")
+    print()
+    print("Spectral function saved to {}".format(output_file))
+
+    # Plot if matplotlib available
+    try:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+
+        fig, ax = plt.subplots(figsize=(8, 5))
+        ax.plot(w_cm, total_spectral, 'b-', linewidth=1.5)
+
+        # Mark harmonic frequencies
+        for band in range(n_bands):
+            freq = qlanc.w_q[band, iq_pert] * CC.Units.RY_TO_CM
+            if freq > 1e-3:
+                ax.axvline(freq, color='r', linestyle='--', alpha=0.5,
+                           linewidth=0.8)
+
+        ax.set_xlabel("Frequency (cm$^{-1}$)")
+        ax.set_ylabel("Spectral function (arb. units)")
+        ax.set_title("SnTe phonon spectral function at X point (T = {} K)".format(
+            TEMPERATURE))
+        ax.set_xlim(0, 200)
+        ax.legend(["Anharmonic (TD-SCHA)", "Harmonic frequencies"],
+                  loc="upper right")
+        fig.tight_layout()
+        fig.savefig(os.path.join(SAVE_DIR, "spectral_Xpoint.pdf"))
+        print("Plot saved to {}/spectral_Xpoint.pdf".format(SAVE_DIR))
+    except ImportError:
+        print("matplotlib not available, skipping plot")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Modules/DynamicalLanczos.py b/Modules/DynamicalLanczos.py
index d5f45f6c..c80718d1 100644
--- a/Modules/DynamicalLanczos.py
+++ b/Modules/DynamicalLanczos.py
@@ -300,6 +300,7 @@ def __init__(self, ensemble = None, mode = None, unwrap_symmetries = False, sele
         self.gamma_only = False
         self.trans_projector = None    # (n_modes, n_modes) matrix P = (1/n_cells) Σ_R T_R^mode
         self.trans_operators = None    # list of (n_modes, n_modes) T_R^mode matrices
+        self.trans_cart_perms = None   # list of Cartesian permutation index arrays for fast projection
 
         # Set to True if we want to use the Wigner equations
         self.use_wigner = use_wigner
@@ -764,15 +765,21 @@ def prepare_symmetrization(self, no_sym = False, verbose = True, symmetries = No
                     len(pg_symmetries), n_total_syms, n_total_syms // max(len(pg_symmetries), 1)))
 
             # Build translation operators in mode space: T_R^mode = pols^T @ P_R @ pols
+            # Also store Cartesian permutation index arrays for fast projection
             self.trans_operators = []
+            self.trans_cart_perms = []
             nat_sc = super_structure.N_atoms
             for t_sym in translations:
                 irt = CC.symmetries.GetIRT(super_structure, t_sym)
                 # Build Cartesian permutation matrix P_R (3*nat_sc x 3*nat_sc)
                 P_R = np.zeros((3 * nat_sc, 3 * nat_sc), dtype=np.double)
+                # Build inverse permutation index array for fast O(n^2) projection
+                inv_perm = np.zeros(3 * nat_sc, dtype=np.intp)
                 for i_at in range(nat_sc):
                     j_at = irt[i_at]
                     P_R[3*j_at:3*j_at+3, 3*i_at:3*i_at+3] = np.eye(3)
+                    inv_perm[3*j_at:3*j_at+3] = [3*i_at, 3*i_at+1, 3*i_at+2]
+                self.trans_cart_perms.append(inv_perm)
                 # Project into mode space
                 T_mode = self.pols.T @ P_R @ self.pols  # (n_modes, n_modes)
                 self.trans_operators.append(T_mode)
@@ -3171,12 +3178,16 @@ def get_combined_proc(start_end):
             # Project f_pert_av: P_trans @ f
             f_pert_av = self.trans_projector @ f_pert_av
 
-            # Project d2v_pert_av: (1/n_cells) Σ_R T_R @ d2v @ T_R^T
-            n_cells = len(self.trans_operators)
-            d2v_proj = np.zeros_like(d2v_pert_av)
-            for T_R in self.trans_operators:
-                d2v_proj += T_R @ d2v_pert_av @ T_R.T
-            d2v_pert_av = d2v_proj / n_cells
+            # Project d2v_pert_av using Cartesian-space permutations (O(n^2) per translation)
+            # Math: (1/N) Σ_R T_R @ d2v @ T_R^T = pols^T @ [(1/N) Σ_R P_R @ (pols @ d2v @ pols^T) @ P_R^T] @ pols
+            # where P_R @ M @ P_R^T is just row/column permutation (fancy indexing)
+            n_cells = len(self.trans_cart_perms)
+            d2v_cart = self.pols @ d2v_pert_av @ self.pols.T
+            d2v_cart_avg = np.zeros_like(d2v_cart)
+            for inv_perm in self.trans_cart_perms:
+                d2v_cart_avg += d2v_cart[np.ix_(inv_perm, inv_perm)]
+            d2v_cart_avg /= n_cells
+            d2v_pert_av = self.pols.T @ d2v_cart_avg @ self.pols
             _t_trans_end = time.time()
             if self.verbose:
                 print("Time for translational projection: {:.6f} s".format(_t_trans_end - _t_trans_start))
diff --git a/Modules/QSpaceHessian.py b/Modules/QSpaceHessian.py
new file mode 100644
index 00000000..363292e0
--- /dev/null
+++ b/Modules/QSpaceHessian.py
@@ -0,0 +1,762 @@
+"""
+Q-Space Free Energy Hessian
+============================
+
+Computes the free energy Hessian d²F/dRdR in q-space (Bloch basis).
+
+Instead of building the full D4 matrix explicitly (O(N⁴) memory, O(N⁶) time),
+solves L_static(q) x = e_i for each band at each irreducible q-point, where
+L_static is the static Liouvillian operator. The Hessian is then
+H(q) = inv(G(q)), where G(q) is the static susceptibility.
+
+The static L operator is DIFFERENT from the spectral L used in Lanczos:
+  - Static:   R sector = +w²,  W sector = 1/Lambda (one 2-phonon sector)
+  - Spectral: R sector = -w²,  a'/b' sectors = -(w1∓w2)² (two sectors)
+
+Both share the same anharmonic core (ensemble averages of D3/D4).
+
+The q-space block-diagonal structure gives a speedup of ~N_cell³ / N_q_irr
+over the real-space approach.
+
+References:
+    Monacelli & Mauri 2021 (Phys. Rev. B)
+"""
+
+from __future__ import print_function, division
+
+import sys
+import time
+import warnings
+import numpy as np
+import scipy.sparse.linalg
+
+import cellconstructor as CC
+import cellconstructor.Phonons
+import cellconstructor.Methods
+
+import cellconstructor.Settings as Parallel
+from cellconstructor.Settings import ParallelPrint as print
+
+try:
+    import spglib
+    __SPGLIB__ = True
+except ImportError:
+    __SPGLIB__ = False
+
+# Constants
+__EPSILON__ = 1e-12
+__RyToK__ = 157887.32400374097
+
+
+class QSpaceHessian:
+    """Compute the free energy Hessian in q-space via iterative linear solves.
+
+    Uses the static Liouvillian operator L_static, which has the structure:
+      - R sector: w² * R  (positive, unlike spectral -w²)
+      - W sector: (1/Lambda) * W  (static 2-phonon propagator)
+      - Anharmonic coupling via ensemble averages (same Julia core)
+
+    Parameters
+    ----------
+    ensemble : sscha.Ensemble.Ensemble
+        The SSCHA ensemble.
+    verbose : bool
+        If True, print progress information.
+    **kwargs
+        Additional keyword arguments passed to QSpaceLanczos.
+    """
+
+    def __init__(self, ensemble, verbose=True, **kwargs):
+        from tdscha.QSpaceLanczos import QSpaceLanczos
+
+        self.qlanc = QSpaceLanczos(ensemble, **kwargs)
+        self.ensemble = ensemble
+        self.verbose = verbose
+
+        # Shortcuts
+        self.n_q = self.qlanc.n_q
+        self.n_bands = self.qlanc.n_bands
+        self.q_points = self.qlanc.q_points
+        self.w_q = self.qlanc.w_q
+        self.pols_q = self.qlanc.pols_q
+
+        # Results storage: iq -> H_q matrix (n_bands x n_bands)
+        self.H_q_dict = {}
+
+        # Irreducible q-point data (set by init)
+        self.irr_qpoints = None
+        self.q_star_map = None
+
+        # Static psi layout (set by _compute_static_block_layout)
+        self._static_psi_size = None
+        self._static_block_offsets = None
+        self._static_block_sizes = None
+
+    def init(self, use_symmetries=True):
+        """Initialize the Lanczos engine and find irreducible q-points.
+
+        Parameters
+        ----------
+        use_symmetries : bool
+            If True, use symmetries to reduce q-points.
+        """
+        self.qlanc.init(use_symmetries=use_symmetries)
+        if use_symmetries and __SPGLIB__:
+            self._find_irreducible_qpoints()
+        else:
+            self.irr_qpoints = list(range(self.n_q))
+            self.q_star_map = {iq: [(iq, np.eye(3), np.zeros(3))]
+                               for iq in range(self.n_q)}
+
+    def _find_irreducible_qpoints(self):
+        """Find irreducible q-points under point-group symmetries."""
+        from tdscha.QSpaceLanczos import find_q_index
+
+        uci = self.qlanc.uci_structure
+        spg_data = spglib.get_symmetry(uci.get_spglib_cell())
+        rot_frac_all = spg_data['rotations']
+        trans_frac_all = spg_data['translations']
+
+        M = uci.unit_cell.T
+        Minv = np.linalg.inv(M)
+
+        unique_pg = {}
+        for i in range(len(rot_frac_all)):
+            key = rot_frac_all[i].tobytes()
+            if key not in unique_pg:
+                unique_pg[key] = i
+        pg_indices = list(unique_pg.values())
+
+        bg = uci.get_reciprocal_vectors() / (2 * np.pi)
+
+        visited = set()
+        self.irr_qpoints = []
+        self.q_star_map = {}
+
+        for iq in range(self.n_q):
+            if iq in visited:
+                continue
+
+            self.irr_qpoints.append(iq)
+            star = []
+
+            for idx in pg_indices:
+                R_frac = rot_frac_all[idx].astype(float)
+                t_frac = trans_frac_all[idx]
+                R_cart = M @ R_frac @ Minv
+                t_cart = M @ t_frac
+
+                Rq = R_cart @ self.q_points[iq]
+                try:
+                    iq_rot = find_q_index(Rq, self.q_points, bg)
+                except ValueError:
+                    continue
+
+                if iq_rot not in visited:
+                    visited.add(iq_rot)
+                star.append((iq_rot, R_cart.copy(), t_cart.copy()))
+
+            seen_iqs = set()
+            unique_star = []
+            for entry in star:
+                if entry[0] not in seen_iqs:
+                    seen_iqs.add(entry[0])
+                    unique_star.append(entry)
+
+            self.q_star_map[iq] = unique_star
+
+        if self.verbose:
+            print("Q-space Hessian: {} irreducible q-points out of {}".format(
+                len(self.irr_qpoints), self.n_q))
+
+    def _build_D_matrix(self, R_cart, t_cart, iq_from, iq_to):
+        """Compute the mode representation matrix D for symmetry {R|t}."""
+        from tdscha.QSpaceLanczos import QSpaceLanczos
+
+        uci = self.qlanc.uci_structure
+        nat_uc = uci.N_atoms
+        M = uci.unit_cell.T
+        Minv = np.linalg.inv(M)
+
+        irt = QSpaceLanczos._get_atom_perm(uci, R_cart, t_cart, M, Minv)
+
+        q_to = self.q_points[iq_to]
+        P_uc = np.zeros((3 * nat_uc, 3 * nat_uc), dtype=np.complex128)
+        for kappa in range(nat_uc):
+            kp = irt[kappa]
+            tau_k = uci.coords[kappa]
+            tau_kp = uci.coords[kp]
+            L = R_cart @ tau_k + t_cart - tau_kp
+            phase = np.exp(-2j * np.pi * q_to @ L)
+            P_uc[3 * kp:3 * kp + 3, 3 * kappa:3 * kappa + 3] = phase * R_cart
+
+        D = np.conj(self.pols_q[:, :, iq_to]).T @ P_uc @ self.pols_q[:, :, iq_from]
+        return D
+
+    # ==================================================================
+    # Static psi layout: (R[nb], W[blocks])
+    # Only ONE 2-phonon sector (unlike spectral a'/b')
+    # ==================================================================
+    def _compute_static_block_layout(self):
+        """Pre-compute block layout for the static psi vector.
+
+        Layout: [R sector (nb)] [W blocks (one per unique pair)]
+        """
+        nb = self.n_bands
+        sizes = []
+        for iq1, iq2 in self.qlanc.unique_pairs:
+            if iq1 < iq2:
+                sizes.append(nb * nb)
+            else:
+                sizes.append(nb * (nb + 1) // 2)
+        self._static_block_sizes = sizes
+
+        offsets = []
+        offset = nb  # R sector first
+        for s in sizes:
+            offsets.append(offset)
+            offset += s
+        self._static_block_offsets = offsets
+        self._static_psi_size = offset
+
+    def _get_static_psi_size(self):
+        return self._static_psi_size
+
+    def _get_W_block(self, psi, pair_idx):
+        """Extract full (nb, nb) W block from static psi."""
+        nb = self.n_bands
+        iq1, iq2 = self.qlanc.unique_pairs[pair_idx]
+        offset = self._static_block_offsets[pair_idx]
+        size = self._static_block_sizes[pair_idx]
+        raw = psi[offset:offset + size]
+        if iq1 < iq2:
+            return raw.reshape(nb, nb).copy()
+        else:
+            return self.qlanc._unpack_upper_triangle(raw, nb)
+
+    def _set_W_block(self, psi, pair_idx, matrix):
+        """Write full (nb, nb) matrix into W sector of static psi."""
+        nb = self.n_bands
+        iq1, iq2 = self.qlanc.unique_pairs[pair_idx]
+        offset = self._static_block_offsets[pair_idx]
+        if iq1 < iq2:
+            psi[offset:offset + nb * nb] = matrix.ravel()
+        else:
+            size = self._static_block_sizes[pair_idx]
+            psi[offset:offset + size] = \
+                self.qlanc._pack_upper_triangle(matrix, nb)
+
+    def _static_mask(self):
+        """Build mask for static psi inner product.
+
+        Same structure as FT mask but only one W sector (not a' + b').
+        Acoustic modes (w < eps) are masked out (set to 0) to keep
+        the null space purely in the W sector and avoid inconsistency.
+        """
+        psi_size = self._get_static_psi_size()
+        mask = np.ones(psi_size, dtype=np.float64)
+        nb = self.n_bands
+        eps = self.qlanc.acoustic_eps
+
+        # R sector: mask acoustic modes at the perturbation q-point
+        w_qp = self.w_q[:, self.qlanc.iq_pert]
+        for nu in range(nb):
+            if w_qp[nu] < eps:
+                mask[nu] = 0
+
+        for pair_idx, (iq1, iq2) in enumerate(self.qlanc.unique_pairs):
+            offset = self._static_block_offsets[pair_idx]
+            w1 = self.w_q[:, iq1]
+            w2 = self.w_q[:, iq2]
+            # Which modes are acoustic?
+            ac1 = w1 < eps
+            ac2 = w2 < eps
+
+            if iq1 < iq2:
+                # Full block stored in row-major: element (i,j) at offset + i*nb + j
+                for i in range(nb):
+                    for j in range(nb):
+                        if ac1[i] or ac2[j]:
+                            mask[offset + i * nb + j] = 0
+                        else:
+                            mask[offset + i * nb + j] = 2
+            else:
+                # Upper triangle: diagonal then off-diagonal
+                idx = offset
+                for i in range(nb):
+                    if ac1[i] or ac2[i]:
+                        mask[idx] = 0
+                    else:
+                        mask[idx] = 1  # diagonal
+                    idx += 1
+                    for j in range(i + 1, nb):
+                        if ac1[i] or ac2[j]:
+                            mask[idx] = 0
+                        else:
+                            mask[idx] = 2  # off-diagonal
+                        idx += 1
+
+        return mask
+
+    # ==================================================================
+    # Lambda: static 2-phonon propagator
+    # ==================================================================
+    def _compute_lambda_q(self, iq1, iq2):
+        """Compute the static 2-phonon propagator Lambda for pair (iq1, iq2).
+
+        Lambda[nu1, nu2] = ((n1+n2+1)/(w1+w2) - dn12/dw) / (4*w1*w2)
+
+        where dn12/dw = (n1-n2)/(w1-w2) for w1 != w2,
+                       = -beta * exp(beta*w) * n^2 for w1 == w2.
+
+        Returns
+        -------
+        Lambda : ndarray(n_bands, n_bands), float64
+        """
+        w1 = self.w_q[:, iq1]
+        w2 = self.w_q[:, iq2]
+        T = self.qlanc.T
+
+        w1_mat = np.tile(w1, (self.n_bands, 1)).T
+        w2_mat = np.tile(w2, (self.n_bands, 1))
+
+        n1 = np.zeros_like(w1)
+        n2 = np.zeros_like(w2)
+        if T > __EPSILON__:
+            beta = __RyToK__ / T
+            valid1 = w1 > self.qlanc.acoustic_eps
+            valid2 = w2 > self.qlanc.acoustic_eps
+            n1[valid1] = 1.0 / (np.exp(w1[valid1] * beta) - 1.0)
+            n2[valid2] = 1.0 / (np.exp(w2[valid2] * beta) - 1.0)
+
+        n1_mat = np.tile(n1, (self.n_bands, 1)).T
+        n2_mat = np.tile(n2, (self.n_bands, 1))
+
+        valid_mask = np.outer(w1 > self.qlanc.acoustic_eps,
+                              w2 > self.qlanc.acoustic_eps)
+
+        # (n1 - n2) / (w1 - w2), regularized for w1 ≈ w2
+        diff_n = np.zeros_like(w1_mat)
+        w_diff = w1_mat - w2_mat
+        w_equal = np.abs(w_diff) < 1e-8
+
+        if T > __EPSILON__:
+            beta = __RyToK__ / T
+            # Normal case: w1 != w2
+            safe_diff = np.where(w_equal, 1.0, w_diff)
+            diff_n = np.where(w_equal, 0.0, (n1_mat - n2_mat) / safe_diff)
+            # Degenerate case: w1 == w2
+            w_eq_vals = np.where(w_equal & valid_mask, w1_mat, 0.0)
+            n_eq_vals = np.where(w_equal & valid_mask, n1_mat, 0.0)
+            diff_n_deg = np.where(
+                w_equal & valid_mask,
+                -beta * np.exp(w_eq_vals * beta) * n_eq_vals ** 2,
+                0.0)
+            diff_n = np.where(w_equal, diff_n_deg, diff_n)
+
+        # Lambda = ((n1+n2+1)/(w1+w2) - diff_n) / (4*w1*w2)
+        w_sum = w1_mat + w2_mat
+        safe_wsum = np.where(valid_mask, w_sum, 1.0)
+        safe_wprod = np.where(valid_mask, w1_mat * w2_mat, 1.0)
+
+        Lambda = np.where(
+            valid_mask,
+            ((n1_mat + n2_mat + 1) / safe_wsum - diff_n) / (4 * safe_wprod),
+            0.0)
+
+        return Lambda
+
+    # ==================================================================
+    # Static L operator
+    # ==================================================================
+    def _apply_L_static_q(self, psi):
+        """Apply the static Liouvillian L_static to a psi vector.
+
+        L_static has:
+          R sector: w² * R
+          W sector: (1/Lambda) * W
+          Anharmonic coupling from ensemble averages
+
+        Parameters
+        ----------
+        psi : ndarray(static_psi_size,), complex128
+
+        Returns
+        -------
+        out : ndarray(static_psi_size,), complex128
+        """
+        nb = self.n_bands
+        out = np.zeros_like(psi)
+
+        # --- Harmonic part ---
+        # R sector: w² * R
+        w_qp = self.w_q[:, self.qlanc.iq_pert]
+        out[:nb] = (w_qp ** 2) * psi[:nb]
+
+        # W sector: (1/Lambda) * W
+        for pair_idx, (iq1, iq2) in enumerate(self.qlanc.unique_pairs):
+            Lambda = self._compute_lambda_q(iq1, iq2)
+            W_block = self._get_W_block(psi, pair_idx)
+
+            # 1/Lambda * W, with Lambda=0 for acoustic modes → set to 0
+            safe_Lambda = np.where(np.abs(Lambda) > 1e-30, Lambda, 1.0)
+            inv_Lambda_W = np.where(
+                np.abs(Lambda) > 1e-30,
+                W_block / safe_Lambda,
+                0.0)
+
+            self._set_W_block(out, pair_idx, inv_Lambda_W)
+
+        # --- Anharmonic part ---
+        anh_out = self._apply_anharmonic_static_q(psi)
+        out += anh_out
+
+        return out
+
+    def _apply_anharmonic_static_q(self, psi):
+        """Apply the anharmonic part of the static L operator.
+
+        1. Extract R1 from R sector
+        2. Transform W blocks → Y1 (Upsilon perturbation)
+        3. Call Julia q-space extension
+        4. R output = -f_pert, W output = d2v blocks
+
+        Parameters
+        ----------
+        psi : ndarray(static_psi_size,), complex128
+
+        Returns
+        -------
+        out : ndarray(static_psi_size,), complex128
+        """
+        nb = self.n_bands
+        out = np.zeros_like(psi)
+
+        R1 = psi[:nb].copy()
+
+        # Build Y1 blocks from W: Y1 = -2 * Y_w1 * Y_w2 * W
+        # Y_w[nu] = 2*w/(2*n+1) for each q-point
+        Y1_blocks = []
+        for pair_idx, (iq1, iq2) in enumerate(self.qlanc.unique_pairs):
+            W_block = self._get_W_block(psi, pair_idx)
+
+            Y_w1 = self._compute_Y_w(iq1)  # (nb,)
+            Y_w2 = self._compute_Y_w(iq2)  # (nb,)
+
+            Y1_block = -2.0 * np.outer(Y_w1, Y_w2) * W_block
+            Y1_blocks.append(Y1_block)
+
+        Y1_flat = self.qlanc._flatten_blocks(Y1_blocks)
+
+        # Call Julia via the same interface as the spectral case
+        f_pert, d2v_blocks = self.qlanc._call_julia_qspace(R1, Y1_flat)
+
+        # Output: R ← -f_pert (note the sign!)
+        out[:nb] = -f_pert
+
+        # Output: W ← d2v (direct, no chi± transformation)
+        for pair_idx in range(len(self.qlanc.unique_pairs)):
+            self._set_W_block(out, pair_idx, d2v_blocks[pair_idx])
+
+        return out
+
+    def _compute_Y_w(self, iq):
+        """Compute Y_w = 2*w/(2*n+1) for all bands at q-point iq.
+
+        Acoustic modes (w < eps) get Y_w = 0.
+        """
+        w = self.w_q[:, iq]
+        T = self.qlanc.T
+
+        n = np.zeros_like(w)
+        if T > __EPSILON__:
+            valid = w > self.qlanc.acoustic_eps
+            n[valid] = 1.0 / (np.exp(w[valid] * __RyToK__ / T) - 1.0)
+
+        Y_w = np.zeros_like(w)
+        valid = w > self.qlanc.acoustic_eps
+        Y_w[valid] = 2.0 * w[valid] / (2.0 * n[valid] + 1.0)
+        return Y_w
+
+    # ==================================================================
+    # Preconditioner: inverse of harmonic L_static
+    # ==================================================================
+    def _apply_harmonic_preconditioner(self, psi_tilde, sqrt_mask,
+                                       inv_sqrt_mask):
+        """Apply harmonic preconditioner M = L_harm^{-1} in transformed basis.
+
+        For L_static_harm:
+          R sector: eigenvalue = w², M = 1/w²
+          W sector: eigenvalue = 1/Lambda, M = Lambda
+        """
+        nb = self.n_bands
+        psi = psi_tilde * inv_sqrt_mask
+        result = np.zeros_like(psi)
+
+        # R sector: 1/w²
+        w_qp = self.w_q[:, self.qlanc.iq_pert]
+        for nu in range(nb):
+            if w_qp[nu] > self.qlanc.acoustic_eps:
+                result[nu] = psi[nu] / (w_qp[nu] ** 2)
+
+        # W sector: Lambda (inverse of 1/Lambda)
+        for pair_idx, (iq1, iq2) in enumerate(self.qlanc.unique_pairs):
+            Lambda = self._compute_lambda_q(iq1, iq2)
+            W_block = self._get_W_block(psi, pair_idx)
+            result_block = Lambda * W_block
+            self._set_W_block(result, pair_idx, result_block)
+
+        return result * sqrt_mask
+
+    # ==================================================================
+    # Core solver
+    # ==================================================================
+    def compute_hessian_at_q(self, iq, tol=1e-6, max_iters=500,
+                             use_preconditioner=True,
+                             dense_fallback=False):
+        """Compute the free energy Hessian at a single q-point.
+
+        Solves L_static(q) x_i = e_i for each non-acoustic band.
+        G_q[j,i] = x_i[j] (R-sector), H_q = inv(G_q).
+
+        Parameters
+        ----------
+        iq : int
+            Q-point index.
+        tol : float
+            Convergence tolerance for the iterative solver.
+        max_iters : int
+            Maximum number of iterations.
+        use_preconditioner : bool
+            If True, use harmonic preconditioner.
+        dense_fallback : bool
+            If True, fall back to dense solve when iterative solvers fail.
+            WARNING: this builds a psi_size x psi_size dense matrix, which
+            can be very large for big supercells. Default is False.
+
+        Returns
+        -------
+        H_q : ndarray(n_bands, n_bands), complex128
+            The Hessian matrix in the mode basis at q-point iq.
+        """
+        nb = self.n_bands
+
+        # 1. Setup pair map and block layout
+        self.qlanc.build_q_pair_map(iq)
+        self._compute_static_block_layout()
+        psi_size = self._get_static_psi_size()
+        mask = self._static_mask()
+
+        # Similarity transform arrays
+        sqrt_mask = np.sqrt(mask)
+        inv_sqrt_mask = np.zeros_like(sqrt_mask)
+        nonzero = mask > 0
+        inv_sqrt_mask[nonzero] = 1.0 / sqrt_mask[nonzero]
+
+        # 2. Define transformed operator: L_tilde = D^{1/2} L_static D^{-1/2}
+        def apply_L_tilde(x_tilde):
+            x = x_tilde * inv_sqrt_mask
+            Lx = self._apply_L_static_q(x)
+            return Lx * sqrt_mask
+
+        L_op = scipy.sparse.linalg.LinearOperator(
+            (psi_size, psi_size), matvec=apply_L_tilde, dtype=np.complex128)
+
+        # 3. Preconditioner
+        M_op = None
+        if use_preconditioner:
+            def apply_M_tilde(x_tilde):
+                return self._apply_harmonic_preconditioner(
+                    x_tilde, sqrt_mask, inv_sqrt_mask)
+            M_op = scipy.sparse.linalg.LinearOperator(
+                (psi_size, psi_size), matvec=apply_M_tilde,
+                dtype=np.complex128)
+
+        # 4. Identify non-acoustic bands
+        w_qp = self.w_q[:, iq]
+        non_acoustic = [nu for nu in range(nb)
+                        if w_qp[nu] > self.qlanc.acoustic_eps]
+
+        if self.verbose:
+            print("  Solving at q={} ({} non-acoustic bands out of {})".format(
+                iq, len(non_acoustic), nb))
+
+        # 5. Solve L_static x_i = e_i for each non-acoustic band
+        G_q = np.zeros((nb, nb), dtype=np.complex128)
+        total_iters = 0
+        L_dense = None  # Built lazily if iterative solvers fail
+
+        for band_i in non_acoustic:
+            rhs = np.zeros(psi_size, dtype=np.complex128)
+            rhs[band_i] = 1.0
+            rhs_tilde = rhs * sqrt_mask
+
+            # Initial guess from preconditioner
+            x0 = None
+            if use_preconditioner:
+                x0 = apply_M_tilde(rhs_tilde)
+
+            n_iters = [0]
+            def _count(xk):
+                n_iters[0] += 1
+
+            t1 = time.time()
+
+            # Use GMRES since L_static can be indefinite for unstable systems
+            x_tilde, info = scipy.sparse.linalg.gmres(
+                L_op, rhs_tilde, x0=x0, rtol=tol, maxiter=max_iters,
+                M=M_op, callback=_count, callback_type='legacy')
+
+            if info != 0:
+                if self.verbose:
+                    print("    GMRES did not converge for band {} (info={}), "
+                          "trying BiCGSTAB...".format(band_i, info))
+                x_tilde, info = scipy.sparse.linalg.bicgstab(
+                    L_op, rhs_tilde, x0=x_tilde, rtol=tol, maxiter=max_iters,
+                    M=M_op)
+                if info != 0:
+                    if not dense_fallback:
+                        raise RuntimeError(
+                            "Iterative solvers (GMRES, BiCGSTAB) did not "
+                            "converge for band {} at iq={} (info={}). "
+                            "Set dense_fallback=True to use a dense solve "
+                            "(warning: O(psi_size^2) memory)."
+                            .format(band_i, iq, info))
+                    if self.verbose:
+                        print("    BiCGSTAB also did not converge "
+                              "for band {} (info={}), using dense solve"
+                              .format(band_i, info))
+                    # Build dense L matrix once, reuse for all failing bands
+                    if L_dense is None:
+                        if self.verbose:
+                            print("    Building dense L matrix "
+                                  "(size {})...".format(psi_size))
+                        L_dense = np.zeros((psi_size, psi_size),
+                                           dtype=np.complex128)
+                        for j in range(psi_size):
+                            e_j = np.zeros(psi_size, dtype=np.complex128)
+                            e_j[j] = 1.0
+                            L_dense[:, j] = apply_L_tilde(e_j)
+                        L_dense = (L_dense + L_dense.conj().T) / 2
+                    x_tilde, _, _, _ = np.linalg.lstsq(
+                        L_dense, rhs_tilde, rcond=1e-10)
+
+            t2 = time.time()
+            total_iters += n_iters[0]
+
+            # Un-transform
+            x = x_tilde * inv_sqrt_mask
+
+            # Extract R-sector
+            G_q[:, band_i] = x[:nb]
+
+            if self.verbose:
+                print("    Band {}: {} iters, {:.2f}s".format(
+                    band_i, n_iters[0], t2 - t1))
+
+        # 6. Symmetrize G_q (should be Hermitian)
+        G_q = (G_q + G_q.conj().T) / 2
+
+        # 7. Invert to get H_q
+        H_q = np.zeros((nb, nb), dtype=np.complex128)
+        if len(non_acoustic) > 0:
+            na = np.array(non_acoustic)
+            G_sub = G_q[np.ix_(na, na)]
+
+            cond = np.linalg.cond(G_sub)
+            if cond > 1e12 and self.verbose:
+                print("    WARNING: G_q ill-conditioned (cond={:.2e})".format(
+                    cond))
+
+            H_sub = np.linalg.inv(G_sub)
+            H_q[np.ix_(na, na)] = H_sub
+
+        H_q = (H_q + H_q.conj().T) / 2
+
+        if self.verbose:
+            eigvals = np.sort(np.real(np.linalg.eigvalsh(H_q)))
+            non_zero = eigvals[np.abs(eigvals) > 1e-10]
+            if len(non_zero) > 0:
+                print("  H_q eigenvalue range: [{:.6e}, {:.6e}]".format(
+                    non_zero[0], non_zero[-1]))
+            print("  Total L applications: {}".format(total_iters))
+
+        self.H_q_dict[iq] = H_q
+        return H_q
+
+    def compute_full_hessian(self, tol=1e-6, max_iters=500,
+                             use_preconditioner=True,
+                             dense_fallback=False):
+        """Compute the Hessian at all q-points and return as CC.Phonons.
+
+        Parameters
+        ----------
+        tol : float
+            Convergence tolerance for iterative solver.
+        max_iters : int
+            Maximum iterations per linear solve.
+        use_preconditioner : bool
+            If True, use harmonic preconditioner.
+        dense_fallback : bool
+            If True, fall back to dense solve when iterative solvers fail.
+            WARNING: this builds a psi_size x psi_size dense matrix, which
+            can be very large for big supercells. Default is False.
+
+        Returns
+        -------
+        hessian : CC.Phonons.Phonons
+            The free energy Hessian as a Phonons object (Ry/bohr²).
+        """
+        if self.verbose:
+            print()
+            print("=" * 50)
+            print("  Q-SPACE FREE ENERGY HESSIAN")
+            print("=" * 50)
+            print()
+
+        t_start = time.time()
+
+        for iq_irr in self.irr_qpoints:
+            if self.verbose:
+                print("Irreducible q-point {} / {}".format(
+                    self.irr_qpoints.index(iq_irr) + 1,
+                    len(self.irr_qpoints)))
+            self.compute_hessian_at_q(
+                iq_irr, tol=tol, max_iters=max_iters,
+                use_preconditioner=use_preconditioner,
+                dense_fallback=dense_fallback)
+
+        # Unfold to full BZ
+        for iq_irr in self.irr_qpoints:
+            H_irr = self.H_q_dict[iq_irr]
+            for iq_rot, R_cart, t_cart in self.q_star_map[iq_irr]:
+                if iq_rot == iq_irr:
+                    continue
+                D = self._build_D_matrix(R_cart, t_cart, iq_irr, iq_rot)
+                self.H_q_dict[iq_rot] = D @ H_irr @ D.conj().T
+
+        t_end = time.time()
+        if self.verbose:
+            print()
+            print("Total time: {:.1f}s".format(t_end - t_start))
+
+        return self._build_phonons()
+
+    def _build_phonons(self):
+        """Convert mode-basis H_q to Cartesian and create CC.Phonons.Phonons."""
+        nq = self.n_q
+        q_points = np.array(self.qlanc.dyn.q_tot)
+        uc_structure = self.qlanc.dyn.structure.copy()
+
+        hessian = CC.Phonons.Phonons(uc_structure, nqirr=nq)
+
+        for iq in range(nq):
+            H_q = self.H_q_dict[iq]
+            pol = self.pols_q[:, :, iq]
+            pol_m = np.einsum("a, ab -> ab", np.sqrt(self.qlanc.m), pol)
+            Phi_q = pol_m @ H_q @ np.conj(pol_m).T
+            hessian.dynmats[iq] = Phi_q
+            hessian.q_tot[iq] = q_points[iq]
+
+        hessian.AdjustQStar()
+        return hessian
diff --git a/Modules/QSpaceLanczos.py b/Modules/QSpaceLanczos.py
new file mode 100644
index 00000000..ec701b07
--- /dev/null
+++ b/Modules/QSpaceLanczos.py
@@ -0,0 +1,1469 @@
+"""
+Q-Space Lanczos Module
+======================
+
+This module implements the Lanczos algorithm in q-space (Bloch basis) to exploit
+momentum conservation and block structure from Bloch's theorem. This gives a
+speedup of ~N_cell over the real-space implementation.
+
+Key differences from the real-space DynamicalLanczos.Lanczos:
+- Psi vector is complex128 (Hermitian Lanczos with sesquilinear inner product)
+- Two-phonon sector uses (q1, q2) pairs constrained by q1+q2 = q_pert + G
+- Symmetries are point-group only (translations handled by Fourier transform)
+- Requires Julia extension (tdscha_qspace.jl)
+
+References:
+    Implementation plan: implementation_plan.md
+    Parent class: DynamicalLanczos.py
+"""
+
+from __future__ import print_function
+from __future__ import division
+
+import sys, os
+import time
+import warnings
+import numpy as np
+
+import cellconstructor as CC
+import cellconstructor.Phonons
+import cellconstructor.symmetries
+import cellconstructor.Methods
+
+import tdscha.DynamicalLanczos as DL
+import cellconstructor.Settings as Parallel
+from cellconstructor.Settings import ParallelPrint as print
+
+# Try to import the julia module
+__JULIA_EXT__ = False
+try:
+    import julia, julia.Main
+    julia.Main.include(os.path.join(os.path.dirname(__file__), 
+        "tdscha_qspace.jl"))
+    __JULIA_EXT__ = True
+except:
+    try:
+        import julia
+        from julia.api import Julia
+        jl = Julia(compiled_modules=False)
+        import julia.Main
+        try:
+            julia.Main.include(os.path.join(os.path.dirname(__file__),
+                "tdscha_qspace.jl"))
+            __JULIA_EXT__ = True
+        except:
+            # Install the required modules
+            julia.Main.eval("""
+using Pkg
+Pkg.add("SparseArrays")
+Pkg.add("InteractiveUtils")
+""")
+            try:
+                julia.Main.include(os.path.join(os.path.dirname(__file__),
+                    "tdscha_qspace.jl"))
+                __JULIA_EXT__ = True
+            except Exception as e:
+                warnings.warn("Julia extension not available.\nError: {}".format(e))
+    except Exception as e:
+        warnings.warn("Julia extension not available.\nError: {}".format(e))
+    pass
+
+try:
+    import spglib
+    __SPGLIB__ = True
+except ImportError:
+    __SPGLIB__ = False
+
+
+# Constants
+__EPSILON__ = 1e-12
+__RyToK__ = 157887.32400374097
+TYPE_DP = np.double
+
+
+def find_q_index(q_target, q_points, bg, tol=1e-6):
+    """Find the index of q_target in q_points up to a reciprocal lattice vector.
+
+    Parameters
+    ----------
+    q_target : ndarray(3,)
+        The q-point to find (Cartesian coordinates).
+    q_points : ndarray(n_q, 3)
+        Array of q-points.
+    bg : ndarray(3, 3)
+        Reciprocal lattice vectors / (2*pi), rows are vectors.
+    tol : float
+        Tolerance for matching.
+
+    Returns
+    -------
+    int
+        Index of the matching q-point.
+    """
+    for iq, q in enumerate(q_points):
+        dist = CC.Methods.get_min_dist_into_cell(bg, q_target, q)
+        if dist < tol:
+            return iq
+    raise ValueError("Could not find q-point {} in the q-point list".format(q_target))
+
+
+class QSpaceLanczos(DL.Lanczos):
+    """Q-space Lanczos for spectral calculations exploiting Bloch's theorem.
+
+    This class works in the q-space mode basis to exploit momentum conservation,
+    reducing the psi vector size by ~N_cell and the anharmonic computation by ~N_cell.
+
+    Only Wigner formalism is supported. Requires Julia extension.
+    """
+
+    def __init__(self, ensemble, **kwargs):
+        """Initialize the Q-Space Lanczos.
+
+        Parameters
+        ----------
+        ensemble : sscha.Ensemble.Ensemble
+            The SSCHA ensemble.
+        **kwargs
+            Additional keyword arguments passed to the parent Lanczos class.
+        """
+        # Force Wigner and Julia mode
+        kwargs['mode'] = DL.MODE_FAST_JULIA
+        kwargs['use_wigner'] = True
+
+        self.ensemble = ensemble
+        super().__init__(ensemble, unwrap_symmetries=False, **kwargs)
+
+        if not __JULIA_EXT__:
+            raise ImportError(
+                "QSpaceLanczos requires Julia. Install with: pip install julia"
+            )
+        self.use_wigner = True
+
+        # -- Add the q-space attributes --
+        qspace_attrs = [
+            'q_points', 'n_q', 'n_bands', 'w_q', 'pols_q',
+            'acoustic_eps', 'X_q', 'Y_q',
+            'iq_pert', 'q_pair_map', 'unique_pairs',
+            '_psi_size', '_block_offsets_a', '_block_offsets_b', '_block_sizes',
+            '_qspace_sym_data', '_qspace_sym_q_map', 'n_syms_qspace',
+        ]
+        self.__total_attributes__.extend(qspace_attrs)
+
+        # == 1. Get q-space eigenmodes ==
+        ws_sc, pols_sc, w_q, pols_q = self.dyn.DiagonalizeSupercell(return_qmodes=True)
+
+        self.q_points = np.array(self.dyn.q_tot)  # (n_q, 3)
+        self.n_q = len(self.q_points)
+        self.n_bands = 3 * self.uci_structure.N_atoms  # uniform band count
+        self.w_q = w_q        # (n_bands, n_q) from DiagonalizeSupercell
+        self.pols_q = pols_q  # (3*n_at, n_bands, n_q) complex eigenvectors
+
+        # The masses needs to be restricted to the primitive cell only
+        self.m = self.m[:self.n_bands]
+
+        # Small frequency threshold for acoustic mode masking
+        self.acoustic_eps = 1e-6
+
+        # == 2. Bloch transform ensemble data ==
+        self._bloch_transform_ensemble()
+
+        # Q-space specific state (set by build_q_pair_map)
+        self.iq_pert = None
+        self.q_pair_map = None
+        self.unique_pairs = None
+        self._psi_size = None
+        self._block_offsets_a = None
+        self._block_offsets_b = None
+        self._block_sizes = None
+
+        # Symmetry data for q-space
+        self._qspace_sym_data = None
+        self._qspace_sym_q_map = None
+        self.n_syms_qspace = 0
+
+    def _bloch_transform_ensemble(self):
+        """Bloch transform the ensemble displacements and forces into q-space mode basis.
+
+        Computes X_q and Y_q from the ensemble u_disps_qspace and forces_qspace.
+        Uses sscha.Ensemble implementation for the Fourier transform via Julia.
+
+        The forces are the anharmonic residual: f - f_SSCHA - <f - f_SSCHA>,
+        matching the preprocessing done in DynamicalLanczos.__init__.
+        """
+        # Ensure the ensemble has computed q-space quantities
+        # Check if fourier_gradient is active or force it
+        if self.ensemble.u_disps_qspace is None:
+             # Force fourier gradient initialization in the ensemble
+             if not self.ensemble.fourier_gradient:
+                 print("Ensemble checking: computing Fourier transform of displacements and forces...")
+                 self.ensemble.fourier_gradient = True
+             self.ensemble.init()
+
+        # Unit conversion factors
+        # Target: Bohr (u) and Ry/Bohr (f)
+        u_conv = 1.0
+        f_conv = 1.0
+        if self.ensemble.units == "default":
+            u_conv = CC.Units.A_TO_BOHR
+            f_conv = 1.0 / CC.Units.A_TO_BOHR
+        elif self.ensemble.units == "hartree":
+            f_conv = 2.0 # Ha -> Ry
+
+        # Mass scaling factors (sqrt(m) for u, 1/sqrt(m) for f)
+        # We use self.dyn.structure corresponding to unit cell
+        m_uc = self.dyn.structure.get_masses_array()
+        sqrt_m = np.sqrt(m_uc)
+        sqrt_m_3 = np.repeat(sqrt_m, 3) # (3*nat_uc,)
+
+        # Compute the average anharmonic force (matching parent DynamicalLanczos)
+        # get_average_forces returns rho-weighted <f - f_SSCHA> in unit cell, Ry/Angstrom
+        f_mean_uc = self.ensemble.get_average_forces(get_error=False)  # (nat_uc, 3)
+        # Symmetrize the average force
+        qe_sym = CC.symmetries.QE_Symmetry(self.dyn.structure)
+        qe_sym.SetupQPoint()
+        qe_sym.SymmetrizeVector(f_mean_uc)
+        f_mean_flat = f_mean_uc.ravel()  # (3*nat_uc,) in Ry/Angstrom
+
+        # Fourier transform of the constant (tiled) average force:
+        # At Gamma: f_mean_q[Gamma] = sqrt(n_cell) * f_mean_uc
+        # At q != Gamma: f_mean_q[q] = 0
+        n_cell = np.prod(self.dyn.GetSupercell())
+        f_mean_q_gamma = np.sqrt(n_cell) * f_mean_flat  # (3*nat_uc,) complex
+
+        # Allocate q-space arrays
+        self.X_q = np.zeros((self.n_q, self.N, self.n_bands), dtype=np.complex128)
+        self.Y_q = np.zeros((self.n_q, self.N, self.n_bands), dtype=np.complex128)
+
+        # Projection loop
+        for iq in range(self.n_q):
+            # Retrieve ensemble q-space data (N, 3*nat_uc)
+            # Apply conversions and mass scaling
+            u_tilde_q = self.ensemble.u_disps_qspace[:, :, iq] * (u_conv * sqrt_m_3[None, :])
+
+            # Anharmonic force residual: f - f_SSCHA (both in Ry/Angstrom in q-space)
+            delta_f_q = (self.ensemble.forces_qspace[:, :, iq]
+                         - self.ensemble.sscha_forces_qspace[:, :, iq])
+
+            # Subtract the average force (only non-zero at Gamma)
+            if iq == 0:
+                delta_f_q = delta_f_q - f_mean_q_gamma[None, :]
+
+            # Convert to Ry/Bohr and mass-scale
+            f_tilde_q = delta_f_q * (f_conv / sqrt_m_3[None, :])
+
+            # Project onto eigenvector basis: X_q[iq, config, nu] = sum_a conj(pols_q[a, nu, iq]) * u_tilde_q[config, a]
+            # pols_q shape: (3*nat_uc, n_bands, n_q)
+            pol_iq = self.pols_q[:, :, iq]  # (3*nat_uc, n_bands)
+
+            self.X_q[iq, :, :] = u_tilde_q @ np.conj(pol_iq)
+            self.Y_q[iq, :, :] = f_tilde_q @ np.conj(pol_iq)
+
+    def build_q_pair_map(self, iq_pert):
+        """Find all (iq1, iq2) pairs satisfying q1 + q2 = q_pert + G.
+
+        Parameters
+        ----------
+        iq_pert : int
+            Index of the perturbation q-point.
+        """
+        bg = self.uci_structure.get_reciprocal_vectors() / (2 * np.pi)
+        q_pert = self.q_points[iq_pert]
+
+        self.iq_pert = iq_pert
+        self.q_pair_map = np.zeros(self.n_q, dtype=np.int32)
+
+        for iq1 in range(self.n_q):
+            q_target = q_pert - self.q_points[iq1]
+            found = False
+            for iq2 in range(self.n_q):
+                if CC.Methods.get_min_dist_into_cell(bg, q_target, self.q_points[iq2]) < 1e-6:
+                    self.q_pair_map[iq1] = iq2
+                    found = True
+                    break
+            if not found:
+                raise ValueError(
+                    "Could not find partner for q1={} with q_pert={}".format(
+                        self.q_points[iq1], q_pert))
+
+        # Unique pairs: iq1 <= iq2 (avoids double-counting)
+        self.unique_pairs = []
+        for iq1 in range(self.n_q):
+            iq2 = self.q_pair_map[iq1]
+            if iq1 <= iq2:
+                self.unique_pairs.append((iq1, iq2))
+
+        # Pre-compute block layout
+        self._compute_block_layout()
+
+    def _compute_block_layout(self):
+        """Pre-compute the block offsets and sizes for the psi vector."""
+        nb = self.n_bands
+        n_pairs = len(self.unique_pairs)
+
+        # Compute block sizes
+        self._block_sizes = []
+        for iq1, iq2 in self.unique_pairs:
+            if iq1 < iq2:
+                self._block_sizes.append(nb * nb)  # full block
+            else:  # iq1 == iq2
+                self._block_sizes.append(nb * (nb + 1) // 2)  # upper triangle
+
+        # R sector: n_bands entries
+        r_size = nb
+
+        # a' sector offsets
+        self._block_offsets_a = []
+        offset = r_size
+        for size in self._block_sizes:
+            self._block_offsets_a.append(offset)
+            offset += size
+
+        # b' sector offsets
+        self._block_offsets_b = []
+        for size in self._block_sizes:
+            self._block_offsets_b.append(offset)
+            offset += size
+
+        self._psi_size = offset
+
+    def get_psi_size(self):
+        """Return the total size of the psi vector."""
+        if self._psi_size is None:
+            raise ValueError("Must call build_q_pair_map first")
+        return self._psi_size
+
+    def get_block_offset(self, pair_idx, sector='a'):
+        """Get the offset into psi for a given pair index.
+
+        Parameters
+        ----------
+        pair_idx : int
+            Index into self.unique_pairs.
+        sector : str
+            'a' for a' sector, 'b' for b' sector.
+        """
+        if sector == 'a':
+            return self._block_offsets_a[pair_idx]
+        else:
+            return self._block_offsets_b[pair_idx]
+
+    def get_block_size(self, pair_idx):
+        """Get the number of entries for this pair."""
+        return self._block_sizes[pair_idx]
+
+    def get_R1_q(self):
+        """Extract R^(1) from psi (n_bands complex entries at q_pert)."""
+        return self.psi[:self.n_bands].copy()
+
+    def _unpack_upper_triangle(self, flat_data, n):
+        """Unpack upper triangle storage into a full (n, n) matrix.
+
+        Storage order: for i in range(n): M[i, i:] stored contiguously.
+        Off-diagonal: M[j, i] = conj(M[i, j]) for Hermitian matrix.
+        """
+        mat = np.zeros((n, n), dtype=np.complex128)
+        idx = 0
+        for i in range(n):
+            length = n - i
+            mat[i, i:] = flat_data[idx:idx + length]
+            idx += length
+        # Fill lower triangle (Hermitian)
+        for i in range(n):
+            mat[i + 1:, i] = np.conj(mat[i, i + 1:])
+        return mat
+
+    def _pack_upper_triangle(self, mat, n):
+        """Pack a full (n, n) matrix into upper triangle storage."""
+        size = n * (n + 1) // 2
+        flat = np.zeros(size, dtype=np.complex128)
+        idx = 0
+        for i in range(n):
+            length = n - i
+            flat[idx:idx + length] = mat[i, i:]
+            idx += length
+        return flat
+
+    def get_block(self, pair_idx, sector='a'):
+        """Reconstruct full (n_bands, n_bands) matrix from psi storage.
+
+        Parameters
+        ----------
+        pair_idx : int
+            Index into self.unique_pairs.
+        sector : str
+            'a' or 'b'.
+
+        Returns
+        -------
+        ndarray(n_bands, n_bands), complex128
+        """
+        nb = self.n_bands
+        iq1, iq2 = self.unique_pairs[pair_idx]
+        offset = self.get_block_offset(pair_idx, sector)
+        size = self.get_block_size(pair_idx)
+
+        raw = self.psi[offset:offset + size]
+
+        if iq1 < iq2:
+            # Full block, row-major
+            return raw.reshape(nb, nb).copy()
+        else:
+            # Upper triangle storage
+            return self._unpack_upper_triangle(raw, nb)
+
+    def get_a1_block(self, pair_idx):
+        """Get the a'(1) block for pair_idx."""
+        return self.get_block(pair_idx, 'a')
+
+    def get_b1_block(self, pair_idx):
+        """Get the b'(1) block for pair_idx."""
+        return self.get_block(pair_idx, 'b')
+
+    def set_block_in_psi(self, pair_idx, matrix, sector, target_psi):
+        """Write a (n_bands, n_bands) block into the target psi vector.
+
+        Parameters
+        ----------
+        pair_idx : int
+        matrix : ndarray(n_bands, n_bands)
+        sector : str ('a' or 'b')
+        target_psi : ndarray — the psi vector to write into
+        """
+        nb = self.n_bands
+        iq1, iq2 = self.unique_pairs[pair_idx]
+        offset = self.get_block_offset(pair_idx, sector)
+
+        if iq1 < iq2:
+            # Full block
+            target_psi[offset:offset + nb * nb] = matrix.ravel()
+        else:
+            # Upper triangle
+            target_psi[offset:offset + self.get_block_size(pair_idx)] = \
+                self._pack_upper_triangle(matrix, nb)
+
+    # ====================================================================
+    # Step 4: Mask for Hermitian inner product
+    # ====================================================================
+    def mask_dot_wigner(self, debug=False):
+        """Build the mask for Hermitian inner product with upper-triangle storage.
+
+        For full blocks (iq1 < iq2): factor 2 for the conjugate block (iq2, iq1).
+        For diagonal blocks (iq1 == iq2): off-diagonal factor 2, diagonal factor 1.
+
+        Returns
+        -------
+        ndarray(psi_size,), float64
+        """
+        mask = np.ones(self.get_psi_size(), dtype=np.float64)
+
+        for pair_idx, (iq1, iq2) in enumerate(self.unique_pairs):
+            offset_a = self.get_block_offset(pair_idx, 'a')
+            offset_b = self.get_block_offset(pair_idx, 'b')
+            size = self.get_block_size(pair_idx)
+
+            if iq1 < iq2:
+                # Full block: factor 2 for the conjugate block
+                mask[offset_a:offset_a + size] = 2
+                mask[offset_b:offset_b + size] = 2
+            else:  # iq1 == iq2, upper triangle storage
+                nb = self.n_bands
+                idx_a = offset_a
+                idx_b = offset_b
+                for i in range(nb):
+                    # Diagonal element: factor 1
+                    mask[idx_a] = 1
+                    mask[idx_b] = 1
+                    idx_a += 1
+                    idx_b += 1
+                    # Off-diagonal elements: factor 2
+                    for j in range(i + 1, nb):
+                        mask[idx_a] = 2
+                        mask[idx_b] = 2
+                        idx_a += 1
+                        idx_b += 1
+
+        return mask
+
+    # ====================================================================
+    # Step 5: Harmonic operator
+    # ====================================================================
+    def apply_L1_FT(self, transpose=False):
+        """Apply the harmonic part of L in q-space (Wigner formalism).
+
+        L_harm is block-diagonal:
+          R sector: -(w_q_pert[nu])^2 * R[nu]
+          a' sector: -(w1 - w2)^2 * a'
+          b' sector: -(w1 + w2)^2 * b'
+
+        Returns
+        -------
+        ndarray(psi_size,), complex128
+        """
+        out = np.zeros(self.get_psi_size(), dtype=np.complex128)
+
+        if self.ignore_harmonic:
+            return out
+
+        # R sector
+        w_qp = self.w_q[:, self.iq_pert]  # (n_bands,)
+        out[:self.n_bands] = -(w_qp ** 2) * self.psi[:self.n_bands]
+
+        # a' and b' sectors
+        for pair_idx, (iq1, iq2) in enumerate(self.unique_pairs):
+            w1 = self.w_q[:, iq1]  # (n_bands,)
+            w2 = self.w_q[:, iq2]  # (n_bands,)
+            a_block = self.get_a1_block(pair_idx)
+            b_block = self.get_b1_block(pair_idx)
+
+            w_minus2 = np.subtract.outer(w1, w2) ** 2  # (n_bands, n_bands)
+            w_plus2 = np.add.outer(w1, w2) ** 2
+
+            self.set_block_in_psi(pair_idx, -w_minus2 * a_block, 'a', out)
+            self.set_block_in_psi(pair_idx, -w_plus2 * b_block, 'b', out)
+
+        return out
+
+    # ====================================================================
+    # Step 6: Anharmonic operator
+    # ====================================================================
+    def _safe_bose_and_mask(self, iq):
+        """Compute Bose-Einstein occupation for bands at iq, masking acoustic modes.
+
+        Returns
+        -------
+        n : ndarray(n_bands,)
+            Bose-Einstein occupation; 0 for acoustic modes.
+        valid : ndarray(n_bands,), bool
+            True for non-acoustic bands.
+        """
+        w = self.w_q[:, iq]
+        valid = w > self.acoustic_eps
+        n = np.zeros_like(w)
+        if self.T > __EPSILON__:
+            n[valid] = 1.0 / (np.exp(w[valid] * __RyToK__ / self.T) - 1.0)
+        return n, valid
+
+    def get_chi_minus_q(self):
+        """Get chi^- for each unique pair as a list of (n_bands, n_bands) matrices.
+
+        chi^-_{nu1, nu2} = (w1 - w2)(n1 - n2) / (2 * w1 * w2)
+        Entries involving acoustic modes (w < acoustic_eps) are set to 0.
+        """
+        chi_list = []
+        for iq1, iq2 in self.unique_pairs:
+            w1 = self.w_q[:, iq1]
+            w2 = self.w_q[:, iq2]
+            n1, v1 = self._safe_bose_and_mask(iq1)
+            n2, v2 = self._safe_bose_and_mask(iq2)
+            # Outer mask: both bands must be non-acoustic
+            valid_mask = np.outer(v1, v2)
+
+            w1_mat = np.tile(w1, (self.n_bands, 1)).T
+            w2_mat = np.tile(w2, (self.n_bands, 1))
+            n1_mat = np.tile(n1, (self.n_bands, 1)).T
+            n2_mat = np.tile(n2, (self.n_bands, 1))
+
+            # Safe division: use np.where to avoid 0/0
+            denom = 2.0 * w1_mat * w2_mat
+            chi = np.where(valid_mask,
+                           (w1_mat - w2_mat) * (n1_mat - n2_mat) / np.where(valid_mask, denom, 1.0),
+                           0.0)
+            chi_list.append(chi)
+        return chi_list
+
+    def get_chi_plus_q(self):
+        """Get chi^+ for each unique pair as a list of (n_bands, n_bands) matrices.
+
+        chi^+_{nu1, nu2} = (w1 + w2)(1 + n1 + n2) / (2 * w1 * w2)
+        Entries involving acoustic modes (w < acoustic_eps) are set to 0.
+        """
+        chi_list = []
+        for iq1, iq2 in self.unique_pairs:
+            w1 = self.w_q[:, iq1]
+            w2 = self.w_q[:, iq2]
+            n1, v1 = self._safe_bose_and_mask(iq1)
+            n2, v2 = self._safe_bose_and_mask(iq2)
+            valid_mask = np.outer(v1, v2)
+
+            w1_mat = np.tile(w1, (self.n_bands, 1)).T
+            w2_mat = np.tile(w2, (self.n_bands, 1))
+            n1_mat = np.tile(n1, (self.n_bands, 1)).T
+            n2_mat = np.tile(n2, (self.n_bands, 1))
+
+            denom = 2.0 * w1_mat * w2_mat
+            chi = np.where(valid_mask,
+                           (w1_mat + w2_mat) * (1 + n1_mat + n2_mat) / np.where(valid_mask, denom, 1.0),
+                           0.0)
+            chi_list.append(chi)
+        return chi_list
+
+    def get_alpha1_beta1_wigner_q(self, get_alpha=True):
+        """Get the perturbation on alpha (Upsilon) from the q-space psi.
+
+        Transforms a'/b' blocks back to the alpha1 perturbation that the
+        Julia code needs.
+
+        alpha1[iq1, iq2] = (w1*w2/X) * [sqrt(-0.5*chi_minus)*a' - sqrt(0.5*chi_plus)*b']
+
+        Returns
+        -------
+        list of ndarray(n_bands, n_bands) — one per unique pair
+        """
+        chi_minus_list = self.get_chi_minus_q()
+        chi_plus_list = self.get_chi_plus_q()
+
+        alpha1_blocks = []
+        for pair_idx, (iq1, iq2) in enumerate(self.unique_pairs):
+            w1 = self.w_q[:, iq1]
+            w2 = self.w_q[:, iq2]
+            n1, v1 = self._safe_bose_and_mask(iq1)
+            n2, v2 = self._safe_bose_and_mask(iq2)
+            valid_mask = np.outer(v1, v2)
+
+            w1_mat = np.tile(w1, (self.n_bands, 1)).T
+            w2_mat = np.tile(w2, (self.n_bands, 1))
+            n1_mat = np.tile(n1, (self.n_bands, 1)).T
+            n2_mat = np.tile(n2, (self.n_bands, 1))
+
+            X = (1 + 2 * n1_mat) * (1 + 2 * n2_mat) / 8
+            # Safe division for w2_on_X: when acoustic, X→∞ and w→0, set to 0
+            w2_on_X = np.where(valid_mask,
+                               (w1_mat * w2_mat) / np.where(valid_mask, X, 1.0),
+                               0.0)
+            chi_minus = chi_minus_list[pair_idx]
+            chi_plus = chi_plus_list[pair_idx]
+
+            a_block = self.get_a1_block(pair_idx)
+            b_block = self.get_b1_block(pair_idx)
+
+            if get_alpha:
+                new_a = w2_on_X * np.sqrt(-0.5 * chi_minus) * a_block
+                new_b = w2_on_X * np.sqrt(+0.5 * chi_plus) * b_block
+                alpha1 = new_a - new_b
+            else:
+                X_safe = np.where(valid_mask, X, 1.0)
+                new_a = np.where(valid_mask,
+                                 (np.sqrt(-0.5 * chi_minus) / X_safe) * a_block,
+                                 0.0)
+                new_b = np.where(valid_mask,
+                                 (np.sqrt(+0.5 * chi_plus) / X_safe) * b_block,
+                                 0.0)
+                alpha1 = new_a + new_b
+
+            alpha1_blocks.append(alpha1)
+        return alpha1_blocks
+
+    def _flatten_blocks(self, blocks):
+        """Flatten a list of (n_bands, n_bands) blocks into a contiguous array.
+
+        Uses Fortran (column-major) order to match Julia's column-major storage.
+        """
+        return np.concatenate([b.ravel(order='F') for b in blocks])
+
+    def _unflatten_blocks(self, flat):
+        """Unflatten a contiguous array back into a list of blocks.
+
+        Uses Fortran (column-major) order to match Julia's column-major storage.
+        """
+        nb = self.n_bands
+        blocks = []
+        offset = 0
+        for iq1, iq2 in self.unique_pairs:
+            size = nb * nb
+            blocks.append(flat[offset:offset + size].reshape(nb, nb, order='F'))
+            offset += size
+        return blocks
+
+    def apply_anharmonic_FT(self, transpose=False, **kwargs):
+        """Apply the anharmonic part of L in q-space (Wigner formalism).
+
+        Calls the Julia q-space extension to compute the perturbed averages,
+        then assembles the output psi vector.
+
+        Returns
+        -------
+        ndarray(psi_size,), complex128
+        """
+        # If both D3 and D4 are ignored, return zero
+        if self.ignore_v3 and self.ignore_v4:
+            return np.zeros(self.get_psi_size(), dtype=np.complex128)
+
+        import julia.Main
+
+        R1 = self.get_R1_q()
+        # If D3 is ignored, zero out R1 so that D3 weight is zero
+        if self.ignore_v3:
+            R1 = np.zeros_like(R1)
+        alpha1_blocks = self.get_alpha1_beta1_wigner_q(get_alpha=True)
+        alpha1_flat = self._flatten_blocks(alpha1_blocks)
+
+        # Call Julia
+        f_pert, d2v_blocks = self._call_julia_qspace(R1, alpha1_flat)
+
+        # Build output psi
+        final_psi = np.zeros(self.get_psi_size(), dtype=np.complex128)
+
+        # R sector
+        final_psi[:self.n_bands] = f_pert
+
+        # a'/b' sectors
+        chi_minus_list = self.get_chi_minus_q()
+        chi_plus_list = self.get_chi_plus_q()
+
+        for pair_idx, (iq1, iq2) in enumerate(self.unique_pairs):
+            d2v_block = d2v_blocks[pair_idx]
+            pert_a = np.sqrt(-0.5 * chi_minus_list[pair_idx]) * d2v_block
+            pert_b = -np.sqrt(+0.5 * chi_plus_list[pair_idx]) * d2v_block
+            self.set_block_in_psi(pair_idx, pert_a, 'a', final_psi)
+            self.set_block_in_psi(pair_idx, pert_b, 'b', final_psi)
+
+        return final_psi
+
+    def _call_julia_qspace(self, R1, alpha1_flat):
+        """Call the Julia q-space extension with MPI parallelization.
+
+        Same MPI pattern as DynamicalLanczos.apply_anharmonic_FT.
+
+        Returns
+        -------
+        f_pert : ndarray(n_bands,), complex128
+        d2v_blocks : list of ndarray(n_bands, n_bands), complex128
+        """
+        import julia.Main
+
+        n_total = self.n_syms_qspace * self.N
+        n_processors = Parallel.GetNProc()
+
+        count = n_total // n_processors
+        remainer = n_total % n_processors
+        indices = []
+        for rank in range(n_processors):
+            if rank < remainer:
+                start = np.int64(rank * (count + 1))
+                stop = np.int64(start + count + 1)
+            else:
+                start = np.int64(rank * count + remainer)
+                stop = np.int64(start + count)
+            indices.append([start + 1, stop])  # 1-indexed for Julia
+
+        unique_pairs_arr = np.array(self.unique_pairs, dtype=np.int32) + 1  # 1-indexed
+
+        def get_combined(start_end):
+            return julia.Main.get_perturb_averages_qspace(
+                self.X_q, self.Y_q, self.w_q, self.rho,
+                R1, alpha1_flat,
+                np.float64(self.T), bool(not self.ignore_v4),
+                np.int64(self.iq_pert + 1),
+                self.q_pair_map + 1,  # 1-indexed
+                unique_pairs_arr,
+                int(start_end[0]), int(start_end[1])
+            )
+
+        combined = Parallel.GoParallel(get_combined, indices, "+")
+        f_pert = combined[:self.n_bands]
+        d2v_flat = combined[self.n_bands:]
+        d2v_blocks = self._unflatten_blocks(d2v_flat)
+        return f_pert, d2v_blocks
+
+    # ====================================================================
+    # Step 5+6: Combined L application (override apply_full_L)
+    # ====================================================================
+    def apply_full_L(self, target=None, force_t_0=False, force_FT=True,
+                     transpose=False, fast_lanczos=True):
+        """Apply the full L operator in q-space.
+
+        Parameters
+        ----------
+        target : ndarray or None
+            If provided, copy into self.psi first.
+        transpose : bool
+            Not used for Hermitian Lanczos.
+
+        Returns
+        -------
+        ndarray(psi_size,), complex128
+        """
+        if target is not None:
+            self.psi = target.copy()
+
+        result = self.apply_L1_FT(transpose=transpose)
+        result += self.apply_anharmonic_FT(transpose=transpose)
+
+        return result
+
+    # ====================================================================
+    # Step 7: Hermitian Lanczos (run_FT override)
+    # ====================================================================
+    def run_FT(self, n_iter, save_dir=None, save_each=5, verbose=True,
+               n_rep_orth=0, n_ortho=10, flush_output=True, debug=False,
+               prefix="LANCZOS", run_simm=None, optimized=False):
+        """Run the Hermitian Lanczos algorithm for q-space.
+
+        This is the same structure as the parent run_FT but with:
+        1. Forced run_simm = True (Hermitian)
+        2. Hermitian dot products: psi.conj().dot(psi * mask).real
+        3. Complex128 psi
+        4. Real coefficients (guaranteed by Hermitian L)
+        """
+        self.verbose = verbose
+
+        if not self.initialized:
+            if verbose:
+                print('Not initialized. Now we symmetrize\n')
+            self.prepare_symmetrization()
+
+        ERROR_MSG = """
+Error, you must initialize a perturbation to start the Lanczos.
+Use prepare_mode_q or prepare_perturbation_q before calling run_FT.
+"""
+        if self.psi is None:
+            raise ValueError(ERROR_MSG)
+
+        mask_dot = self.mask_dot_wigner(debug)
+        psi_norm = np.real(np.conj(self.psi).dot(self.psi * mask_dot))
+        if np.isnan(psi_norm) or psi_norm == 0:
+            raise ValueError(ERROR_MSG)
+
+        # Force symmetric Lanczos
+        run_simm = True
+
+        if Parallel.am_i_the_master():
+            if save_dir is not None:
+                if not os.path.exists(save_dir):
+                    os.makedirs(save_dir)
+
+        if verbose:
+            print('Running the Hermitian Lanczos in q-space')
+            print()
+
+        # Get current step
+        i_step = len(self.a_coeffs)
+
+        if verbose:
+            header = """
+<=====================================>
+|                                     |
+|     Q-SPACE LANCZOS ALGORITHM       |
+|                                     |
+<=====================================>
+
+Starting the algorithm.
+Starting from step %d
+""" % i_step
+            print(header)
+
+        # Initialize
+        if i_step == 0:
+            self.basis_Q = []
+            self.basis_P = []
+            self.s_norm = []
+            norm = np.sqrt(np.real(np.conj(self.psi).dot(self.psi * mask_dot)))
+            first_vector = self.psi / norm
+            self.basis_Q.append(first_vector)
+            self.basis_P.append(first_vector)
+            self.s_norm.append(1)
+        else:
+            if verbose:
+                print('Restarting the Lanczos')
+            self.basis_Q = list(self.basis_Q)
+            self.basis_P = list(self.basis_P)
+            self.s_norm = list(self.s_norm)
+            self.a_coeffs = list(self.a_coeffs)
+            self.b_coeffs = list(self.b_coeffs)
+            self.c_coeffs = list(self.c_coeffs)
+
+        psi_q = self.basis_Q[-1]
+        psi_p = self.basis_P[-1]
+
+        next_converged = False
+        converged = False
+
+        for i in range(i_step, i_step + n_iter):
+            if verbose:
+                print("\n ===== NEW STEP %d =====\n" % (i + 1))
+                if flush_output:
+                    sys.stdout.flush()
+
+            # Apply L (Hermitian => p_L = L_q)
+            t1 = time.time()
+            L_q = self.apply_full_L(psi_q)
+            p_L = np.copy(L_q)
+            t2 = time.time()
+
+            # p normalization
+            c_old = 1
+            if len(self.c_coeffs) > 0:
+                c_old = self.c_coeffs[-1]
+            p_norm = self.s_norm[-1] / c_old
+
+            # a coefficient (real for Hermitian L)
+            a_coeff = np.real(np.conj(psi_p).dot(L_q * mask_dot)) * p_norm
+
+            if np.isnan(a_coeff):
+                raise ValueError("Invalid value in Lanczos. Check frequencies/initialization.")
+
+            # Residuals
+            rk = L_q - a_coeff * psi_q
+            if len(self.basis_Q) > 1:
+                rk -= self.c_coeffs[-1] * self.basis_Q[-2]
+
+            sk = p_L - a_coeff * psi_p
+            if len(self.basis_P) > 1:
+                old_p_norm = self.s_norm[-2]
+                if len(self.c_coeffs) >= 2:
+                    old_p_norm = self.s_norm[-2] / self.c_coeffs[-2]
+                sk -= self.b_coeffs[-1] * self.basis_P[-2] * (old_p_norm / p_norm)
+
+            # s_norm
+            s_norm = np.sqrt(np.real(np.conj(sk).dot(sk * mask_dot)))
+            sk_tilde = sk / s_norm
+            s_norm *= p_norm
+
+            # b and c coefficients (real, should be equal for Hermitian L)
+            b_coeff = np.sqrt(np.real(np.conj(rk).dot(rk * mask_dot)))
+            c_coeff = np.real(np.conj(sk_tilde).dot((rk / b_coeff) * mask_dot)) * s_norm
+
+            self.a_coeffs.append(a_coeff)
+
+            if np.abs(b_coeff) < __EPSILON__ or next_converged:
+                if verbose:
+                    print("Converged (b = {})".format(b_coeff))
+                converged = True
+                break
+            if np.abs(c_coeff) < __EPSILON__:
+                if verbose:
+                    print("Converged (c = {})".format(c_coeff))
+                converged = True
+                break
+
+            psi_q = rk / b_coeff
+            psi_p = sk_tilde.copy()
+
+            # Gram-Schmidt
+            new_q = psi_q.copy()
+            new_p = psi_p.copy()
+
+            for k_orth in range(n_rep_orth):
+                start = max(0, len(self.basis_P) - (n_ortho or len(self.basis_P)))
+
+                for j in range(start, len(self.basis_P)):
+                    coeff1 = np.real(np.conj(self.basis_P[j]).dot(new_q * mask_dot))
+                    coeff2 = np.real(np.conj(self.basis_Q[j]).dot(new_p * mask_dot))
+                    new_q -= coeff1 * self.basis_P[j]
+                    new_p -= coeff2 * self.basis_Q[j]
+
+                normq = np.sqrt(np.real(np.conj(new_q).dot(new_q * mask_dot)))
+                if normq < __EPSILON__:
+                    next_converged = True
+                new_q /= normq
+
+                normp = np.real(np.conj(new_p).dot(new_p * mask_dot))
+                if np.abs(normp) < __EPSILON__:
+                    next_converged = True
+                new_p /= normp
+
+                s_norm = c_coeff / np.real(np.conj(new_p).dot(new_q * mask_dot))
+
+            if not converged:
+                self.basis_Q.append(new_q)
+                self.basis_P.append(new_p)
+                psi_q = new_q.copy()
+                psi_p = new_p.copy()
+
+                self.b_coeffs.append(b_coeff)
+                self.c_coeffs.append(c_coeff)
+                self.s_norm.append(s_norm)
+
+            if verbose:
+                print("Time for L application: %d s" % (t2 - t1))
+                print("a_%d = %.8e" % (i, self.a_coeffs[-1]))
+                print("b_%d = %.8e" % (i, self.b_coeffs[-1]))
+                print("c_%d = %.8e" % (i, self.c_coeffs[-1]))
+                print("|b-c| = %.8e" % np.abs(self.b_coeffs[-1] - self.c_coeffs[-1]))
+
+            if save_dir is not None:
+                if (i + 1) % save_each == 0:
+                    self.save_status("%s/%s_STEP%d" % (save_dir, prefix, i + 1))
+
+            if verbose:
+                print("Lanczos step %d completed." % (i + 1))
+
+        if converged and verbose:
+            print("   last a coeff = {}".format(self.a_coeffs[-1]))
+
+    # ====================================================================
+    # Step 8: Perturbation setup
+    # ====================================================================
+    def prepare_mode_q(self, iq, band_index):
+        """Prepare perturbation for mode (q, nu).
+
+        Parameters
+        ----------
+        iq : int
+            Index of the q-point.
+        band_index : int
+            Band index (0-based).
+        """
+        self.build_q_pair_map(iq)
+        self.reset_q()
+        self.psi[band_index] = 1.0 + 0j
+        self.perturbation_modulus = 1.0
+
+    def prepare_ir(self, effective_charges = None, pol_vec = np.array([1.0, 0.0, 0.0])):
+        """
+        PREPARE LANCZOS FOR INFRARED SPECTRUM COMPUTATION
+        =================================================
+
+        In this subroutine we prepare the lanczos algorithm for the computation of the
+        infrared spectrum signal.
+
+        Parameters
+        ----------
+            effective_charges : ndarray(size = (n_atoms, 3, 3), dtype = np.double)
+                The effective charges. Indices are: Number of atoms in the unit cell,
+                electric field component, atomic coordinate. If None, the effective charges
+                contained in the dynamical matrix will be considered.
+            pol_vec : ndarray(size = 3)
+                The polarization vector of the light.
+        """
+
+        ec = self.dyn.effective_charges
+        if not effective_charges is None:
+            ec = effective_charges
+
+        assert not ec is None, "Error, no effective charge found. Cannot initialize IR responce"
+
+        z_eff = np.einsum("abc, b", ec, pol_vec)
+
+        # Get the gamma effective charge
+        # FIX: remove double mass scaling and add supercell factor
+        n_cell = np.prod(self.dyn.GetSupercell())
+        new_zeff = z_eff.ravel() * np.sqrt(n_cell)
+
+        # This is a Gamma perturbation
+        self.prepare_perturbation_q(0, new_zeff)
+
+    def prepare_raman(self, pol_vec_in=np.array([1.0, 0.0, 0.0]), pol_vec_out=np.array([1.0, 0.0, 0.0]), 
+                     mixed=False, pol_in_2=None, pol_out_2=None, unpolarized=None):
+        """
+        PREPARE LANCZOS FOR RAMAN SPECTRUM COMPUTATION
+        ==============================================
+
+        In this subroutine we prepare the lanczos algorithm for the computation of the
+        Raman spectrum signal.
+
+        Parameters
+        ----------
+            pol_vec_in : ndarray(size = 3)
+                The polarization vector of the incoming light
+            pol_vec_out : ndarray(size = 3)
+                The polarization vector for the outcoming light
+            mixed : bool
+                If True, add another component of the Raman tensor
+            pol_in_2 : ndarray(size = 3) or None
+                Second incoming polarization if mixed=True
+            pol_out_2 : ndarray(size = 3) or None  
+                Second outcoming polarization if mixed=True
+            unpolarized : int or None
+                The perturbation for unpolarized raman (if different from None, overrides the behaviour
+                of pol_vec_in and pol_vec_out). Indices goes from 0 to 6 (included).
+                0 is alpha^2
+                1 + 2 + 3 + 4 + 5 + 6 are beta^2
+                alpha_0 = (xx + yy + zz)^2/9
+                beta_1 = (xx -yy)^2 / 2
+                beta_2 = (xx -zz)^2 / 2
+                beta_3 = (yy -zz)^2 / 2
+                beta_4 = 3xy^2
+                beta_5 = 3xz^2
+                beta_6 = 3yz^2
+
+                The total unpolarized raman intensity is 45 alpha^2 + 7 beta^2
+        """
+        # Check if the raman tensor is present
+        assert not self.dyn.raman_tensor is None, "Error, no Raman tensor found. Cannot initialize Raman response"
+
+        n_cell = np.prod(self.dyn.GetSupercell())
+        
+        if unpolarized is None:
+            # Get the raman vector (apply the ASR and contract the raman tensor with the polarization vectors)
+            raman_v = self.dyn.GetRamanVector(pol_vec_in, pol_vec_out)
+            
+            if mixed:
+                print('Prepare Raman')
+                print('Adding other component of the Raman tensor')
+                raman_v += self.dyn.GetRamanVector(pol_in_2, pol_out_2)
+            
+            # Scale for Γ-point constant perturbation
+            new_raman_v = raman_v.ravel() * np.sqrt(n_cell)
+            
+            # Convert in the polarization basis
+            self.prepare_perturbation_q(0, new_raman_v)
+        else:
+            px = np.array([1, 0, 0])
+            py = np.array([0, 1, 0])
+            pz = np.array([0, 0, 1])
+            
+            if unpolarized == 0:
+                # Alpha = (xx + yy + zz)^2/9
+                raman_v = self.dyn.GetRamanVector(px, px)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) / 3
+                self.prepare_perturbation_q(0, new_raman_v)
+                
+                raman_v = self.dyn.GetRamanVector(py, py)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) / 3
+                self.prepare_perturbation_q(0, new_raman_v, add=True)
+                
+                raman_v = self.dyn.GetRamanVector(pz, pz)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) / 3
+                self.prepare_perturbation_q(0, new_raman_v, add=True)
+            elif unpolarized == 1:
+                # (xx - yy)^2 / 2
+                raman_v = self.dyn.GetRamanVector(px, px)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) / np.sqrt(2)
+                self.prepare_perturbation_q(0, new_raman_v)
+                
+                raman_v = self.dyn.GetRamanVector(py, py)
+                new_raman_v = -raman_v.ravel() * np.sqrt(n_cell) / np.sqrt(2)
+                self.prepare_perturbation_q(0, new_raman_v, add=True)
+            elif unpolarized == 2:
+                # (xx - zz)^2 / 2
+                raman_v = self.dyn.GetRamanVector(px, px)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) / np.sqrt(2)
+                self.prepare_perturbation_q(0, new_raman_v)
+                
+                raman_v = self.dyn.GetRamanVector(pz, pz)
+                new_raman_v = -raman_v.ravel() * np.sqrt(n_cell) / np.sqrt(2)
+                self.prepare_perturbation_q(0, new_raman_v, add=True)
+            elif unpolarized == 3:
+                # (yy - zz)^2 / 2
+                raman_v = self.dyn.GetRamanVector(py, py)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) / np.sqrt(2)
+                self.prepare_perturbation_q(0, new_raman_v)
+                
+                raman_v = self.dyn.GetRamanVector(pz, pz)
+                new_raman_v = -raman_v.ravel() * np.sqrt(n_cell) / np.sqrt(2)
+                self.prepare_perturbation_q(0, new_raman_v, add=True)
+            elif unpolarized == 4:
+                # 3 xy^2
+                raman_v = self.dyn.GetRamanVector(px, py)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) * np.sqrt(3)
+                self.prepare_perturbation_q(0, new_raman_v)
+            elif unpolarized == 5:
+                # 3 yz^2
+                raman_v = self.dyn.GetRamanVector(py, pz)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) * np.sqrt(3)
+                self.prepare_perturbation_q(0, new_raman_v)
+            elif unpolarized == 6:
+                # 3 xz^2
+                raman_v = self.dyn.GetRamanVector(px, pz)
+                new_raman_v = raman_v.ravel() * np.sqrt(n_cell) * np.sqrt(3)
+                self.prepare_perturbation_q(0, new_raman_v)
+            else:
+                raise ValueError(f"Error, unpolarized must be between [0, ..., 6] got invalid {unpolarized}.")
+
+    def prepare_unpolarized_raman(self, index=0, debug=False):
+        """
+        PREPARE UNPOLARIZED RAMAN SIGNAL
+        ================================
+        
+        The raman tensor is read from the dynamical matrix provided by the original ensemble.
+        
+        The perturbations are prepared according to the formula (see https://doi.org/10.1021/jp5125266)
+        
+        ..math:
+        
+            I_unpol = 45/9 (xx + yy + zz)^2
+                      + 7/2 [(xx-yy)^2 + (xx-zz)^2 + (yy-zz)^2]
+                      + 7 * 3 [(xy)^2 + (yz)^2 + (xz)^2]
+        
+        Note: This method prepares the raw components WITHOUT prefactors.
+        Use get_prefactors_unpolarized_raman() to get the correct prefactors.
+        """
+        # Check if the raman tensor is present
+        assert not self.dyn.raman_tensor is None, "Error, no Raman tensor found. Cannot initialize the Raman response"
+        
+        labels = [i for i in range(7)]
+        if index not in labels:
+            raise ValueError(f'{index} should be in {labels}')
+        
+        epols = {'x': np.array([1, 0, 0]),
+                 'y': np.array([0, 1, 0]),
+                 'z': np.array([0, 0, 1])}
+        
+        n_cell = np.prod(self.dyn.GetSupercell())
+        
+        # (xx + yy + zz)^2
+        if index == 0:
+            raman_v = self.dyn.GetRamanVector(epols['x'], epols['x'])
+            raman_v += self.dyn.GetRamanVector(epols['y'], epols['y'])
+            raman_v += self.dyn.GetRamanVector(epols['z'], epols['z'])
+        # (xx - yy)^2    
+        elif index == 1:
+            raman_v = self.dyn.GetRamanVector(epols['x'], epols['x'])
+            raman_v -= self.dyn.GetRamanVector(epols['y'], epols['y'])
+        # (xx - zz)^2       
+        elif index == 2:
+            raman_v = self.dyn.GetRamanVector(epols['x'], epols['x'])
+            raman_v -= self.dyn.GetRamanVector(epols['z'], epols['z'])
+        # (yy - zz)^2   
+        elif index == 3:
+            raman_v = self.dyn.GetRamanVector(epols['y'], epols['y'])
+            raman_v -= self.dyn.GetRamanVector(epols['z'], epols['z'])
+        # (xy)^2
+        elif index == 4:
+            raman_v = self.dyn.GetRamanVector(epols['x'], epols['y'])
+        # (xz)^2
+        elif index == 5:
+            raman_v = self.dyn.GetRamanVector(epols['x'], epols['z'])
+        # (yz)^2
+        elif index == 6:
+            raman_v = self.dyn.GetRamanVector(epols['y'], epols['z'])
+            
+        if debug:
+            np.save(f'raman_v_{index}', raman_v)
+        
+        # Scale for Γ-point constant perturbation
+        new_raman_v = raman_v.ravel() * np.sqrt(n_cell)
+        
+        # Convert in the polarization basis
+        self.prepare_perturbation_q(0, new_raman_v)
+            
+        if debug:
+            print(f'[NEW] Perturbation modulus with eq Raman tensors = {self.perturbation_modulus}')
+        print()
+
+    def prepare_perturbation_q(self, iq, vector, add=False):
+        """Prepare perturbation at q from a real-space vector (3*n_at_uc,).
+
+        Projects the vector onto q-space eigenmodes at iq.
+
+        Parameters
+        ----------
+        iq : int
+            Index of the q-point.
+        vector : ndarray(3*n_at_uc,)
+            Perturbation vector in Cartesian real space.
+        add : bool
+            If true, the perturbation is added on top of the one already setup.
+            Calling add does not cause a reset of the Lanczos.
+        """
+        if not add:
+            self.build_q_pair_map(iq)
+            self.reset_q()
+        
+        m = np.tile(self.uci_structure.get_masses_array(), (3, 1)).T.ravel()
+        v_scaled = vector / np.sqrt(m)
+        R1 = np.conj(self.pols_q[:, :, iq]).T @ v_scaled  # (n_bands,) complex
+        self.psi[:self.n_bands] += R1
+        self.perturbation_modulus = np.real(np.conj(R1) @ R1)
+
+    def reset_q(self):
+        """Reset the Lanczos state for q-space."""
+        n = self.get_psi_size()
+        self.psi = np.zeros(n, dtype=np.complex128)
+
+        self.eigvals = None
+        self.eigvects = None
+
+        self.a_coeffs = []
+        self.b_coeffs = []
+        self.c_coeffs = []
+        self.krilov_basis = []
+        self.basis_P = []
+        self.basis_Q = []
+        self.s_norm = []
+
+    # ====================================================================
+    # Step 11: Q-space symmetry matrix construction
+    # ====================================================================
+    def prepare_symmetrization(self, no_sym=False, verbose=True, symmetries=None):
+        """Build q-space symmetry matrices and cache them in Julia.
+
+        Overrides the parent to build sparse complex symmetry matrices
+        in the q-space mode basis.
+
+        Uses spglib on the unit cell (not supercell) to get correct
+        fractional-coordinate rotations and translations, then converts
+        to Cartesian for the representation matrices.
+        """
+        self.initialized = True
+
+        if no_sym:
+            # Identity only
+            self.n_syms_qspace = 1
+            n_total = self.n_q * self.n_bands
+            # Build identity sparse matrix
+            import julia.Main
+            julia.Main.eval("""
+            function init_identity_qspace(n_total::Int64)
+                I_sparse = SparseArrays.sparse(
+                    Int32.(1:n_total), Int32.(1:n_total),
+                    ComplexF64.(ones(n_total)), n_total, n_total)
+                _cached_qspace_symmetries[] = [I_sparse]
+                return nothing
+            end
+            """)
+            julia.Main.init_identity_qspace(np.int64(n_total))
+            return
+
+        if not __SPGLIB__:
+            raise ImportError("spglib required for symmetrization")
+
+        # Get symmetries from the UNIT CELL directly via spglib.
+        # spglib returns rotations and translations in fractional
+        # (crystal) coordinates. We convert rotations to Cartesian via
+        # R_cart = M @ R_frac @ M^{-1} where M = unit_cell.T.
+        spg_data = spglib.get_symmetry(self.uci_structure.get_spglib_cell())
+        rot_frac_all = spg_data['rotations']   # (n_sym, 3, 3) integer
+        trans_frac_all = spg_data['translations']  # (n_sym, 3) fractional
+
+        M = self.uci_structure.unit_cell.T       # columns = lattice vectors
+        Minv = np.linalg.inv(M)
+
+        # Extract unique point-group rotations (keep first occurrence)
+        unique_pg = {}
+        for i in range(len(rot_frac_all)):
+            key = rot_frac_all[i].tobytes()
+            if key not in unique_pg:
+                unique_pg[key] = i
+        pg_indices = list(unique_pg.values())
+
+        if verbose:
+            print("Q-space: {} PG symmetries from {} total unit cell symmetries".format(
+                len(pg_indices), len(rot_frac_all)))
+
+        self._build_qspace_symmetries(
+            rot_frac_all, trans_frac_all, pg_indices, M, Minv,
+            verbose=verbose)
+
+    @staticmethod
+    def _get_atom_perm(structure, R_cart, t_cart, M, Minv, tol=0.1):
+        """Find atom permutation under symmetry {R|t}.
+
+        Returns irt such that R @ tau[kappa] + t ≡ tau[irt[kappa]] mod lattice.
+        """
+        nat = structure.N_atoms
+        irt = np.zeros(nat, dtype=int)
+        for kappa in range(nat):
+            tau = structure.coords[kappa]
+            mapped = R_cart @ tau + t_cart
+            for kp in range(nat):
+                diff = mapped - structure.coords[kp]
+                diff_frac = Minv @ diff
+                diff_frac -= np.round(diff_frac)
+                if np.linalg.norm(M @ diff_frac) < tol:
+                    irt[kappa] = kp
+                    break
+        return irt
+
+    def _build_qspace_symmetries(self, rot_frac_all, trans_frac_all,
+                                  pg_indices, M, Minv, verbose=True):
+        """Build sparse complex symmetry matrices for q-space modes.
+
+        For each PG symmetry {R|t}:
+          - Maps q -> Rq (permutes q-points)
+          - Rotates bands within each q-block via
+            D_{nu',nu}(iq'<-iq) = conj(pols_q[:,nu',iq']).T @ P_uc(q') @ pols_q[:,nu,iq]
+          - P_uc includes Cartesian rotation, atom permutation, and Bloch phase:
+            P_uc[3*kp:3*kp+3, 3*k:3*k+3] = exp(-2*pi*i * q' . L_k) * R_cart
+            where L_k = R_cart @ tau_k + t_cart - tau_kp is a lattice vector.
+        """
+        import julia.Main
+
+        nat_uc = self.uci_structure.N_atoms
+        bg = self.uci_structure.get_reciprocal_vectors() / (2 * np.pi)
+        n_total = self.n_q * self.n_bands
+        nb = self.n_bands
+
+        n_syms = len(pg_indices)
+        self.n_syms_qspace = n_syms
+
+        # Build all sparse matrices in Python, then pass to Julia
+        all_rows = []
+        all_cols = []
+        all_vals = []
+
+        for i_sym_idx in pg_indices:
+            R_frac = rot_frac_all[i_sym_idx].astype(float)
+            t_frac = trans_frac_all[i_sym_idx]
+
+            # Convert rotation and translation to Cartesian
+            R_cart = M @ R_frac @ Minv
+            t_cart = M @ t_frac
+
+            # Get atom permutation
+            irt = self._get_atom_perm(
+                self.uci_structure, R_cart, t_cart, M, Minv)
+
+            rows, cols, vals = [], [], []
+
+            for iq in range(self.n_q):
+                q = self.q_points[iq]
+                Rq = R_cart @ q
+
+                # Find iq' matching Rq
+                iq_prime = find_q_index(Rq, self.q_points, bg)
+                q_prime = self.q_points[iq_prime]
+
+                # Build P_uc with Bloch phase factor
+                P_uc = np.zeros((3 * nat_uc, 3 * nat_uc), dtype=np.complex128)
+                for kappa in range(nat_uc):
+                    kp = irt[kappa]
+                    tau_k = self.uci_structure.coords[kappa]
+                    tau_kp = self.uci_structure.coords[kp]
+                    # L is the lattice vector: R@tau + t - tau'
+                    L = R_cart @ tau_k + t_cart - tau_kp
+                    phase = np.exp(-2j * np.pi * q_prime @ L)
+                    P_uc[3 * kp:3 * kp + 3,
+                         3 * kappa:3 * kappa + 3] = phase * R_cart
+
+                # D block: representation matrix in eigenvector basis
+                D = np.conj(self.pols_q[:, :, iq_prime]).T @ P_uc @ self.pols_q[:, :, iq]
+
+                # Add to sparse entries
+                for nu1 in range(nb):
+                    for nu2 in range(nb):
+                        if abs(D[nu1, nu2]) > 1e-12:
+                            rows.append(iq_prime * nb + nu1)
+                            cols.append(iq * nb + nu2)
+                            vals.append(D[nu1, nu2])
+
+            all_rows.append(np.array(rows, dtype=np.int32))
+            all_cols.append(np.array(cols, dtype=np.int32))
+            all_vals.append(np.array(vals, dtype=np.complex128))
+
+        # Pass to Julia for caching (convert to 1-indexed)
+        for i in range(n_syms):
+            all_rows[i] += 1
+            all_cols[i] += 1
+
+        julia.Main.eval("""
+        function init_sparse_symmetries_qspace(
+            all_rows::Vector{Vector{Int32}},
+            all_cols::Vector{Vector{Int32}},
+            all_vals::Vector{Vector{ComplexF64}},
+            n_total::Int64
+        )
+            n_syms = length(all_rows)
+            mats = Vector{SparseMatrixCSC{ComplexF64,Int32}}(undef, n_syms)
+            for i in 1:n_syms
+                mats[i] = sparse(
+                    all_rows[i], all_cols[i], all_vals[i], n_total, n_total)
+            end
+            _cached_qspace_symmetries[] = mats
+            return nothing
+        end
+        """)
+
+        julia.Main.init_sparse_symmetries_qspace(
+            all_rows, all_cols, all_vals, np.int64(n_total))
+
+        if verbose:
+            print("Q-space symmetry matrices ({} x {}), {} symmetries cached in Julia".format(
+                n_total, n_total, n_syms))
+
+    # Override init to use q-space symmetrization
+    def init(self, use_symmetries=True):
+        """Initialize the q-space Lanczos calculation."""
+        self.prepare_symmetrization(no_sym=not use_symmetries)
+        self.initialized = True
diff --git a/Modules/__init__.py b/Modules/__init__.py
index 0f624e55..9a6f2ce2 100644
--- a/Modules/__init__.py
+++ b/Modules/__init__.py
@@ -5,4 +5,4 @@
 """
 
 
-__all__ = ["DynamicalLanczos", "cli"]
+__all__ = ["DynamicalLanczos", "QSpaceLanczos", "QSpaceHessian", "cli"]
diff --git a/Modules/tdscha_qspace.jl b/Modules/tdscha_qspace.jl
new file mode 100644
index 00000000..7ce7ae6c
--- /dev/null
+++ b/Modules/tdscha_qspace.jl
@@ -0,0 +1,538 @@
+# Q-Space Lanczos Julia Extension
+# ================================
+#
+# This module implements the q-space perturbed average calculations
+# in Julia for performance. The q-space version exploits Bloch's
+# theorem so that translations are handled via Fourier transform,
+# and only point-group symmetries remain.
+#
+# The q-space ensemble data X_q and Y_q are complex arrays indexed
+# as (n_q, n_configs, n_bands).
+#
+# Three separate functions mirror the real-space tdscha_core.jl:
+#   get_d2v_from_R_pert_qspace  — D3 contribution to d2v
+#   get_d2v_from_Y_pert_qspace  — D4 contribution to d2v
+#   get_f_from_Y_pert_qspace    — D4 contribution to f_pert
+
+using SparseArrays
+using LinearAlgebra
+using LinearAlgebra.BLAS
+
+if !isdefined(@__MODULE__, :RY_TO_K_Q)
+    const RY_TO_K_Q = 157887.32400374097
+end
+
+# Global cache for q-space symmetry matrices (complex)
+if !isdefined(@__MODULE__, :_cached_qspace_symmetries)
+    const _cached_qspace_symmetries = Ref{Union{Nothing,Vector{SparseMatrixCSC{ComplexF64,Int32}}}}(nothing)
+end
+
+
+"""
+    get_d2v_from_R_pert_qspace(...)
+
+D3 contribution to d2v from R^(1) perturbation.
+Mirrors get_d2v_dR2_from_R_pert_sym_fast in tdscha_core.jl.
+
+For each (config, sym):
+  weight_R  = sum_nu f_Y[nu,iq_pert] * x_rot[iq_pert,nu] * conj(R1[nu]) * rho/3
+  weight_Rf = sum_nu conj(R1[nu]) * y_rot[iq_pert,nu] * rho/3
+
+  For each pair p = (q1,q2):
+    d2v[p] += -weight_R  * (r1_q1 * conj(r2_q2)^T + r2_q1 * conj(r1_q2)^T)
+    d2v[p] += -weight_Rf * r1_q1 * conj(r1_q2)^T
+
+where r1 = f_Y * x, r2 = y.
+"""
+function get_d2v_from_R_pert_qspace(
+    X_q::Array{ComplexF64,3},
+    Y_q::Array{ComplexF64,3},
+    f_Y::Matrix{Float64},
+    rho::Vector{Float64},
+    R1::Vector{ComplexF64},
+    symmetries::Vector{SparseMatrixCSC{ComplexF64,Int32}},
+    iq_pert::Int64,
+    unique_pairs::Matrix{Int32},
+    n_bands::Int64,
+    n_q::Int64,
+    start_index::Int64,
+    end_index::Int64
+)
+    n_pairs = size(unique_pairs, 1)
+    n_syms = length(symmetries)
+    n_total = n_q * n_bands
+    N_eff = sum(rho)
+
+    # Output
+    d2v_blocks = [zeros(ComplexF64, n_bands, n_bands) for _ in 1:n_pairs]
+
+    # Buffers
+    x_buf = zeros(ComplexF64, n_total)
+    y_buf = zeros(ComplexF64, n_total)
+
+    for bigindex in start_index:end_index
+        i_config = div(bigindex - 1, n_syms) + 1
+        j_sym = mod(bigindex - 1, n_syms) + 1
+
+        # Build combined vector for this config
+        for iq in 1:n_q
+            for nu in 1:n_bands
+                idx = (iq - 1) * n_bands + nu
+                x_buf[idx] = X_q[iq, i_config, nu]
+                y_buf[idx] = Y_q[iq, i_config, nu]
+            end
+        end
+
+        # Apply symmetry
+        x_rot = symmetries[j_sym] * x_buf
+        y_rot = symmetries[j_sym] * y_buf
+
+        # Views at q_pert
+        x_pert = view(x_rot, (iq_pert-1)*n_bands+1:iq_pert*n_bands)
+        y_pert = view(y_rot, (iq_pert-1)*n_bands+1:iq_pert*n_bands)
+
+        # weight_R = sum_nu f_Y[nu,iq_pert] * x_pert[nu] * conj(R1[nu]) * rho/3
+        weight_R = zero(ComplexF64)
+        for nu in 1:n_bands
+            weight_R += f_Y[nu, iq_pert] * x_pert[nu] * conj(R1[nu])
+        end
+        weight_R *= rho[i_config] / 3.0
+
+        # weight_Rf = sum_nu conj(R1[nu]) * y_pert[nu] * rho/3
+        weight_Rf = zero(ComplexF64)
+        for nu in 1:n_bands
+            weight_Rf += conj(R1[nu]) * y_pert[nu]
+        end
+        weight_Rf *= rho[i_config] / 3.0
+
+        # Accumulate d2v for each pair
+        for p in 1:n_pairs
+            iq1 = unique_pairs[p, 1]
+            iq2 = unique_pairs[p, 2]
+
+            x_q1 = view(x_rot, (iq1-1)*n_bands+1:iq1*n_bands)
+            y_q1 = view(y_rot, (iq1-1)*n_bands+1:iq1*n_bands)
+            x_q2 = view(x_rot, (iq2-1)*n_bands+1:iq2*n_bands)
+            y_q2 = view(y_rot, (iq2-1)*n_bands+1:iq2*n_bands)
+
+            for nu1 in 1:n_bands
+                r1_1 = f_Y[nu1, iq1] * x_q1[nu1]  # r1 at q1
+                r2_1 = y_q1[nu1]                     # r2 at q1
+                for nu2 in 1:n_bands
+                    r1_2 = f_Y[nu2, iq2] * x_q2[nu2]  # r1 at q2
+                    r2_2 = y_q2[nu2]                     # r2 at q2
+
+                    # -weight_R * (r1_q1 * conj(r2_q2)^T + r2_q1 * conj(r1_q2)^T)
+                    contrib = -weight_R * (r1_1 * conj(r2_2) + r2_1 * conj(r1_2))
+                    # -weight_Rf * r1_q1 * conj(r1_q2)^T
+                    contrib -= weight_Rf * r1_1 * conj(r1_2)
+
+                    d2v_blocks[p][nu1, nu2] += contrib
+                end
+            end
+        end
+    end
+
+    # Normalize
+    norm_factor = n_syms * N_eff
+    for p in 1:n_pairs
+        d2v_blocks[p] ./= norm_factor
+    end
+
+    return d2v_blocks
+end
+
+
+"""
+    get_d2v_from_Y_pert_qspace(...)
+
+D4 contribution to d2v from alpha1 (Y1/Upsilon) perturbation.
+Mirrors get_d2v_dR2_from_Y_pert_sym_fast in tdscha_core.jl.
+
+CRITICAL: weights are TOTAL (summed over ALL pairs), not per-pair.
+
+For each (config, sym):
+  1. Compute buffer_u(q,nu) = sum_nu2 alpha1(q, q_pert-q)[nu,nu2] * x(q_pert-q, nu2)
+  2. Compute total_wD4 = -sum_all_pairs x^H * alpha1 * x, scaled by rho/8
+  3. Compute total_wb = -sum_{q,nu} conj(buffer_u) * f_psi * y, scaled by rho/4
+  4. Apply TOTAL weights to ALL d2v blocks
+"""
+function get_d2v_from_Y_pert_qspace(
+    X_q::Array{ComplexF64,3},
+    Y_q::Array{ComplexF64,3},
+    f_Y::Matrix{Float64},
+    f_psi::Matrix{Float64},
+    rho::Vector{Float64},
+    alpha1_blocks::Vector{Matrix{ComplexF64}},
+    symmetries::Vector{SparseMatrixCSC{ComplexF64,Int32}},
+    iq_pert::Int64,
+    unique_pairs::Matrix{Int32},
+    n_bands::Int64,
+    n_q::Int64,
+    start_index::Int64,
+    end_index::Int64
+)
+    n_pairs = size(unique_pairs, 1)
+    n_syms = length(symmetries)
+    n_total = n_q * n_bands
+    N_eff = sum(rho)
+
+    # Output
+    d2v_blocks = [zeros(ComplexF64, n_bands, n_bands) for _ in 1:n_pairs]
+
+    # Buffers
+    x_buf = zeros(ComplexF64, n_total)
+    y_buf = zeros(ComplexF64, n_total)
+    buffer_u = zeros(ComplexF64, n_q, n_bands)
+
+    for bigindex in start_index:end_index
+        i_config = div(bigindex - 1, n_syms) + 1
+        j_sym = mod(bigindex - 1, n_syms) + 1
+
+        # Build combined vector
+        for iq in 1:n_q
+            for nu in 1:n_bands
+                idx = (iq - 1) * n_bands + nu
+                x_buf[idx] = X_q[iq, i_config, nu]
+                y_buf[idx] = Y_q[iq, i_config, nu]
+            end
+        end
+
+        # Apply symmetry
+        x_rot = symmetries[j_sym] * x_buf
+        y_rot = symmetries[j_sym] * y_buf
+
+        # Step 1: Compute buffer_u and total_wD4
+        # buffer_u[iq1, nu1] = sum_nu2 alpha1[p][nu1, nu2] * x_rot[iq2, nu2]
+        # where (iq1, iq2) is the pair containing iq1
+        # total_wD4 = -sum_pairs (multiplicity) * x_q1^H * alpha1 * x_q2
+        total_wD4 = zero(ComplexF64)
+        fill!(buffer_u, zero(ComplexF64))
+
+        for p in 1:n_pairs
+            iq1 = unique_pairs[p, 1]
+            iq2 = unique_pairs[p, 2]
+
+            x_q1 = view(x_rot, (iq1-1)*n_bands+1:iq1*n_bands)
+            x_q2 = view(x_rot, (iq2-1)*n_bands+1:iq2*n_bands)
+
+            # buffer_u at iq1: sum_nu2 alpha1[p][nu1, nu2] * x_q2[nu2]
+            for nu1 in 1:n_bands
+                for nu2 in 1:n_bands
+                    buffer_u[iq1, nu1] += alpha1_blocks[p][nu1, nu2] * x_q2[nu2]
+                end
+            end
+
+            # If iq1 != iq2, also accumulate buffer_u at iq2
+            # For the reverse pair (iq2, iq1), the Hermitian Upsilon satisfies:
+            #   alpha1(q2,q1)[nu2,nu1] = conj(alpha1(q1,q2)[nu1,nu2])
+            # So buffer_u at iq2 uses the Hermitian conjugate of alpha1.
+            if iq1 != iq2
+                for nu2 in 1:n_bands
+                    for nu1 in 1:n_bands
+                        buffer_u[iq2, nu2] += conj(alpha1_blocks[p][nu1, nu2]) * x_q1[nu1]
+                    end
+                end
+            end
+
+            # Total weight: conj(x_q1)^T * alpha1 * x_q2
+            local_w = zero(ComplexF64)
+            for nu1 in 1:n_bands
+                for nu2 in 1:n_bands
+                    local_w += conj(x_q1[nu1]) * alpha1_blocks[p][nu1, nu2] * x_q2[nu2]
+                end
+            end
+            # For off-diagonal pairs, the reverse pair (iq2,iq1) contributes conj(local_w),
+            # so total = local_w + conj(local_w). In real-space (real x), this equals 2*local_w,
+            # but in q-space (complex x), we must use the correct Hermitian form.
+            if iq1 < iq2
+                total_wD4 += local_w + conj(local_w)
+            else
+                total_wD4 += local_w
+            end
+        end
+        total_wD4 *= -rho[i_config] / 8.0
+
+        # Step 2: Compute total_wb = -sum_{q,nu} conj(buffer_u[q,nu]) * f_psi[nu,q] * y_rot[q,nu]
+        total_wb = zero(ComplexF64)
+        for iq in 1:n_q
+            for nu in 1:n_bands
+                y_val = y_rot[(iq-1)*n_bands + nu]
+                total_wb -= conj(buffer_u[iq, nu]) * f_psi[nu, iq] * y_val
+            end
+        end
+        total_wb *= rho[i_config] / 4.0
+
+        # Step 3: Apply TOTAL weights to ALL d2v blocks
+        for p in 1:n_pairs
+            iq1 = unique_pairs[p, 1]
+            iq2 = unique_pairs[p, 2]
+
+            x_q1 = view(x_rot, (iq1-1)*n_bands+1:iq1*n_bands)
+            y_q1 = view(y_rot, (iq1-1)*n_bands+1:iq1*n_bands)
+            x_q2 = view(x_rot, (iq2-1)*n_bands+1:iq2*n_bands)
+            y_q2 = view(y_rot, (iq2-1)*n_bands+1:iq2*n_bands)
+
+            for nu1 in 1:n_bands
+                r1_1 = f_Y[nu1, iq1] * x_q1[nu1]
+                r2_1 = y_q1[nu1]
+                for nu2 in 1:n_bands
+                    r1_2 = f_Y[nu2, iq2] * x_q2[nu2]
+                    r2_2 = y_q2[nu2]
+
+                    # -total_wD4 * (r1*conj(r2)^T + r2*conj(r1)^T)
+                    contrib = -total_wD4 * (r1_1 * conj(r2_2) + r2_1 * conj(r1_2))
+                    # -total_wb * r1*conj(r1)^T
+                    contrib -= total_wb * r1_1 * conj(r1_2)
+
+                    d2v_blocks[p][nu1, nu2] += contrib
+                end
+            end
+        end
+    end
+
+    # Normalize
+    norm_factor = n_syms * N_eff
+    for p in 1:n_pairs
+        d2v_blocks[p] ./= norm_factor
+    end
+
+    return d2v_blocks
+end
+
+
+"""
+    get_f_from_Y_pert_qspace(...)
+
+D3 contribution to f_pert from alpha1/Y1 perturbation.
+Mirrors get_f_average_from_Y_pert in tdscha_core.jl.
+
+For each (config, sym):
+  total_sum = sum_pairs (mult) * conj(x_q1)^T * alpha1 * x_q2
+  buffer_u(q,nu) = sum_nu2 alpha1[...] * x(q2,nu2)
+  buf_f_weight = sum_{q,nu} conj(buffer_u) * f_psi * y
+
+  f_pert += (-total_sum/2) * rho/3 * y_rot[q_pert]
+  f_pert += (-buf_f_weight) * rho/3 * f_Y[q_pert] * x_rot[q_pert]
+"""
+function get_f_from_Y_pert_qspace(
+    X_q::Array{ComplexF64,3},
+    Y_q::Array{ComplexF64,3},
+    f_Y::Matrix{Float64},
+    f_psi::Matrix{Float64},
+    rho::Vector{Float64},
+    alpha1_blocks::Vector{Matrix{ComplexF64}},
+    symmetries::Vector{SparseMatrixCSC{ComplexF64,Int32}},
+    iq_pert::Int64,
+    unique_pairs::Matrix{Int32},
+    n_bands::Int64,
+    n_q::Int64,
+    start_index::Int64,
+    end_index::Int64
+)
+    n_pairs = size(unique_pairs, 1)
+    n_syms = length(symmetries)
+    n_total = n_q * n_bands
+    N_eff = sum(rho)
+
+    # Output
+    f_pert = zeros(ComplexF64, n_bands)
+
+    # Buffers
+    x_buf = zeros(ComplexF64, n_total)
+    y_buf = zeros(ComplexF64, n_total)
+    buffer_u = zeros(ComplexF64, n_q, n_bands)
+
+    for bigindex in start_index:end_index
+        i_config = div(bigindex - 1, n_syms) + 1
+        j_sym = mod(bigindex - 1, n_syms) + 1
+
+        # Build combined vector
+        for iq in 1:n_q
+            for nu in 1:n_bands
+                idx = (iq - 1) * n_bands + nu
+                x_buf[idx] = X_q[iq, i_config, nu]
+                y_buf[idx] = Y_q[iq, i_config, nu]
+            end
+        end
+
+        # Apply symmetry
+        x_rot = symmetries[j_sym] * x_buf
+        y_rot = symmetries[j_sym] * y_buf
+
+        # Compute buffer_u and total_sum (same as d2v function)
+        total_sum = zero(ComplexF64)
+        fill!(buffer_u, zero(ComplexF64))
+
+        for p in 1:n_pairs
+            iq1 = unique_pairs[p, 1]
+            iq2 = unique_pairs[p, 2]
+
+            x_q1 = view(x_rot, (iq1-1)*n_bands+1:iq1*n_bands)
+            x_q2 = view(x_rot, (iq2-1)*n_bands+1:iq2*n_bands)
+
+            # buffer_u at iq1
+            for nu1 in 1:n_bands
+                for nu2 in 1:n_bands
+                    buffer_u[iq1, nu1] += alpha1_blocks[p][nu1, nu2] * x_q2[nu2]
+                end
+            end
+
+            # buffer_u at iq2 (if not diagonal pair)
+            # Reverse pair uses Hermitian conjugate: alpha1(q2,q1) = alpha1(q1,q2)^H
+            if iq1 != iq2
+                for nu2 in 1:n_bands
+                    for nu1 in 1:n_bands
+                        buffer_u[iq2, nu2] += conj(alpha1_blocks[p][nu1, nu2]) * x_q1[nu1]
+                    end
+                end
+            end
+
+            # total_sum
+            local_w = zero(ComplexF64)
+            for nu1 in 1:n_bands
+                for nu2 in 1:n_bands
+                    local_w += conj(x_q1[nu1]) * alpha1_blocks[p][nu1, nu2] * x_q2[nu2]
+                end
+            end
+            # Reverse pair contributes conj(local_w), not local_w
+            if iq1 < iq2
+                total_sum += local_w + conj(local_w)
+            else
+                total_sum += local_w
+            end
+        end
+
+        # buf_f_weight = sum_{q,nu} conj(buffer_u[q,nu]) * f_psi[nu,q] * y_rot[q,nu]
+        buf_f_weight = zero(ComplexF64)
+        for iq in 1:n_q
+            for nu in 1:n_bands
+                y_val = y_rot[(iq-1)*n_bands + nu]
+                buf_f_weight += conj(buffer_u[iq, nu]) * f_psi[nu, iq] * y_val
+            end
+        end
+
+        # Two f_pert contributions:
+        # Term 1: (-total_sum/2) * rho/3 * y_rot[q_pert]
+        y_pert = view(y_rot, (iq_pert-1)*n_bands+1:iq_pert*n_bands)
+        x_pert = view(x_rot, (iq_pert-1)*n_bands+1:iq_pert*n_bands)
+
+        w1 = -total_sum / 2.0 * rho[i_config] / 3.0
+        for nu in 1:n_bands
+            f_pert[nu] += w1 * y_pert[nu]
+        end
+
+        # Term 2: (-buf_f_weight) * rho/3 * f_Y[nu,q_pert] * x_rot[q_pert,nu]
+        w2 = -buf_f_weight * rho[i_config] / 3.0
+        for nu in 1:n_bands
+            f_pert[nu] += w2 * f_Y[nu, iq_pert] * x_pert[nu]
+        end
+    end
+
+    # Normalize
+    f_pert ./= (n_syms * N_eff)
+
+    return f_pert
+end
+
+
+"""
+    get_perturb_averages_qspace(...)
+
+Combined entry point called from Python. Computes f_pert and d2v_blocks,
+packs them into a single flat array for MPI reduction.
+
+Returns: [f_pert(n_bands) ; d2v_blocks_flat(n_pairs * n_bands^2)]
+"""
+function get_perturb_averages_qspace(
+    X_q::Array{ComplexF64,3},
+    Y_q::Array{ComplexF64,3},
+    w_q::Matrix{Float64},
+    rho::Vector{Float64},
+    R1::Vector{ComplexF64},
+    alpha1_flat::Vector{ComplexF64},
+    temperature::Float64,
+    apply_v4::Bool,
+    iq_pert::Int64,
+    q_pair_map::Vector{Int32},
+    unique_pairs::Matrix{Int32},
+    start_index::Int64,
+    end_index::Int64
+)
+    n_q = size(X_q, 1)
+    n_bands = size(X_q, 3)
+    n_pairs = size(unique_pairs, 1)
+
+    # Get symmetries
+    symmetries = _cached_qspace_symmetries[]
+    if symmetries === nothing
+        error("Q-space symmetries not initialized. Call init_sparse_symmetries_qspace first.")
+    end
+
+    # Precompute occupation numbers and scaling factors
+    # Acoustic modes (w < threshold) get f_Y=0, f_psi=0 to avoid NaN/Inf
+    f_Y = zeros(Float64, n_bands, n_q)
+    f_psi = zeros(Float64, n_bands, n_q)
+    acoustic_eps = 1e-6
+
+    for iq in 1:n_q
+        for nu in 1:n_bands
+            w = w_q[nu, iq]
+            if w < acoustic_eps
+                f_Y[nu, iq] = 0.0
+                f_psi[nu, iq] = 0.0
+                continue
+            end
+            if temperature > 0
+                nw = 1.0 / (exp(w * RY_TO_K_Q / temperature) - 1.0)
+            else
+                nw = 0.0
+            end
+            f_Y[nu, iq] = 2.0 * w / (1.0 + 2.0 * nw)
+            f_psi[nu, iq] = (1.0 + 2.0 * nw) / (2.0 * w)
+        end
+    end
+
+    # Unpack alpha1 blocks
+    alpha1_blocks = Vector{Matrix{ComplexF64}}(undef, n_pairs)
+    offset = 1
+    for p in 1:n_pairs
+        alpha1_blocks[p] = reshape(alpha1_flat[offset:offset+n_bands^2-1], n_bands, n_bands)
+        offset += n_bands^2
+    end
+
+    # D3 contribution to d2v
+    d2v = get_d2v_from_R_pert_qspace(
+        X_q, Y_q, f_Y, rho, R1, symmetries,
+        iq_pert, unique_pairs, n_bands, n_q,
+        start_index, end_index)
+
+    # D3 contribution to f_pert from alpha1/Y1 perturbation
+    # This is a D3 term (not D4), so it must be computed regardless of apply_v4
+    f_pert = get_f_from_Y_pert_qspace(
+        X_q, Y_q, f_Y, f_psi, rho, alpha1_blocks, symmetries,
+        iq_pert, unique_pairs, n_bands, n_q,
+        start_index, end_index)
+
+    # D4 contribution to d2v
+    if apply_v4
+        d2v_v4 = get_d2v_from_Y_pert_qspace(
+            X_q, Y_q, f_Y, f_psi, rho, alpha1_blocks, symmetries,
+            iq_pert, unique_pairs, n_bands, n_q,
+            start_index, end_index)
+        for p in 1:n_pairs
+            d2v[p] .+= d2v_v4[p]
+        end
+    end
+
+    # Pack result: f_pert followed by flattened d2v blocks
+    result = zeros(ComplexF64, n_bands + n_pairs * n_bands^2)
+    result[1:n_bands] = f_pert
+    offset = n_bands + 1
+    for p in 1:n_pairs
+        result[offset:offset+n_bands^2-1] = vec(d2v[p])
+        offset += n_bands^2
+    end
+
+    return result
+end
diff --git a/docs/api/qspace_hessian.md b/docs/api/qspace_hessian.md
new file mode 100644
index 00000000..7c1cec11
--- /dev/null
+++ b/docs/api/qspace_hessian.md
@@ -0,0 +1,9 @@
+# QSpaceHessian API Reference
+
+::: tdscha.QSpaceHessian
+    options:
+        show_root_heading: true
+        show_source: true
+        heading_level: 2
+        members_order: source
+        filters: ["!^_"]
diff --git a/docs/api/qspace_lanczos.md b/docs/api/qspace_lanczos.md
new file mode 100644
index 00000000..2ec38d82
--- /dev/null
+++ b/docs/api/qspace_lanczos.md
@@ -0,0 +1,35 @@
+# QSpaceLanczos Module
+
+::: tdscha.QSpaceLanczos
+    options:
+      show_root_heading: true
+      show_source: true
+      heading_level: 2
+      members_order: source
+
+## QSpaceLanczos Class
+
+::: tdscha.QSpaceLanczos.QSpaceLanczos
+    options:
+      heading_level: 2
+      show_source: true
+      members_order: source
+      filters: ["!^_", "!^__"]
+
+### Key Differences from Lanczos
+
+| Feature | `Lanczos` (real-space) | `QSpaceLanczos` (q-space) |
+|---------|----------------------|--------------------------|
+| Psi vector | Real (`float64`) | Complex (`complex128`) |
+| Inner product | Standard dot product | Hermitian: $\langle p | q \rangle = \bar{p}^T (q \cdot m)$ |
+| Lanczos type | Bi-conjugate or symmetric | Hermitian symmetric ($b = c$, real coefficients) |
+| Two-phonon pairs | All supercell mode pairs | $(q_1, q_2)$ pairs with $q_1 + q_2 = q_\text{pert}$ |
+| Symmetries | Full space group | Point group only (translations analytic) |
+| Backend | C / MPI / Julia | Julia only |
+
+### Utility Functions
+
+::: tdscha.QSpaceLanczos.find_q_index
+    options:
+      heading_level: 3
+      show_source: true
diff --git a/docs/index.md b/docs/index.md
index d66b4b1b..22ca87d2 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -18,14 +18,16 @@ Part of the SSCHA ecosystem: `cellconstructor` → `python-sscha` → `tdscha`.
 
 6. **API Reference** - Automatically generated documentation:
    - [DynamicalLanczos](api/dynamical_lanczos.md) - Core Lanczos algorithm
+   - [QSpaceLanczos](api/qspace_lanczos.md) - Q-space (Bloch basis) Lanczos algorithm
    - [StaticHessian](api/static_hessian.md) - Free energy Hessian calculations
 
 ## Key Features
 
 - **Simulating the vibrational anharmonic spectra**: IR absorption, Raman scattering, Nuclear inelastic scattering, and phonon spectral functions.
-- **Full quantum treatment of atomic nuclei** 
+- **Full quantum treatment of atomic nuclei**
 - **Parallel execution** with MPI
 - **Symmetry-aware** calculations for efficiency
+- **Q-space Lanczos**: Bloch-basis formulation exploiting momentum conservation for large supercells (see [In-Depth Usage](usage.md#q-space-lanczos))
 
 ## Theoretical Foundation
 
diff --git a/docs/qspace-hessian.md b/docs/qspace-hessian.md
new file mode 100644
index 00000000..a7e03c1c
--- /dev/null
+++ b/docs/qspace-hessian.md
@@ -0,0 +1,208 @@
+# QSpaceHessian: Fast Free Energy Hessian in Q-Space
+
+The `QSpaceHessian` class computes the free energy Hessian matrix (second derivative of free energy with respect to atomic displacements) by working in the Bloch (q-space) basis, where the problem decomposes into independent blocks at each q-point.
+
+This is the **fastest and most memory-efficient** method available in the SSCHA ecosystem to compute the full anharmonic free energy Hessian, including fourth-order contributions. It supersedes both the direct `get_free_energy_hessian` from `python-sscha` and the real-space `StaticHessian` for systems with periodic boundary conditions.
+
+## Why QSpaceHessian?
+
+The free energy Hessian is defined as ([R. Bianco et al., Phys. Rev. B **96**, 014111 (2017)](https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.014111)):
+
+$$
+\frac{\partial^2 \mathcal{F}}{\partial \mathcal{R} \partial \mathcal{R}} = \overset{(2)}{D} - \overset{(3)}{D}\sqrt{-W}\left(\mathbb{1} + \sqrt{-W}\overset{(4)}{D}\sqrt{-W}\right)^{-1}\sqrt{-W}\,\overset{(3)}{D}
+$$
+
+The direct approach (`ensemble.get_free_energy_hessian()`) builds the $\overset{(4)}{D}$ matrix explicitly. This matrix lives in the space of mode *pairs* ($N_\text{modes}^2 \times N_\text{modes}^2$), which makes the method scale as:
+
+| | Direct (`get_free_energy_hessian`) | **QSpaceHessian** |
+|---|---|---|
+| **Memory** | $O(N_\text{modes}^4)$ | $O(n_\text{bands}^4)$ |
+| **Time** | $O(N_\text{modes}^6)$ | $O(n_\text{bands}^3 \times N_{q,\text{irr}})$ |
+
+where $N_\text{modes} = N_q \times n_\text{bands}$ is the total number of modes in the supercell. Since $N_\text{modes}$ grows linearly with the number of unit cells $N_\text{cell}$, the speedup of QSpaceHessian scales as:
+
+$$
+\text{Speedup} \sim \frac{N_\text{cell}^3}{N_{q,\text{irr}}}
+$$
+
+For a $4\times 4\times 4$ supercell ($N_\text{cell} = 64$, $N_{q,\text{irr}} \approx 4$): the speedup is roughly **65,000x**, and the memory reduction makes calculations feasible that would otherwise require terabytes of RAM.
+
+Even compared to the real-space `StaticHessian` (which already avoids building $\overset{(4)}{D}$ explicitly), QSpaceHessian is faster by a factor of $\sim N_\text{cell}$ because each iterative solve operates on $n_\text{bands}$-sized vectors instead of $N_\text{modes}$-sized ones, and point-group symmetry reduces the number of q-points to solve.
+
+## Basic Usage
+
+### Computing the Full Hessian
+
+```python
+import cellconstructor as CC
+import cellconstructor.Phonons
+import sscha.Ensemble
+import tdscha.QSpaceHessian as QH
+
+# Load the SSCHA dynamical matrix and ensemble
+dyn = CC.Phonons.Phonons("dyn_pop1_", nqirr=3)
+ensemble = sscha.Ensemble.Ensemble(dyn, T=300)
+ensemble.load_bin("data/", population_id=1)
+
+# Create and initialize the Hessian solver
+hess = QH.QSpaceHessian(ensemble)
+hess.init()
+
+# Compute the full Hessian at all q-points
+hessian_dyn = hess.compute_full_hessian()
+
+# Save the result
+hessian_dyn.save_qe("hessian_dyn_")
+
+# Extract anharmonic phonon frequencies
+w_hessian, pols = hessian_dyn.DiagonalizeSupercell()
+print("Anharmonic frequencies (cm-1):", w_hessian * CC.Units.RY_TO_CM)
+```
+
+### Computing at a Single Q-Point
+
+If you only need the Hessian at a specific q-point (e.g., to check for a soft mode at a high-symmetry point):
+
+```python
+hess = QH.QSpaceHessian(ensemble)
+hess.init()
+
+# Compute the Hessian at the Gamma point (iq=0)
+H_gamma = hess.compute_hessian_at_q(iq=0)
+
+# Eigenvalues give the squared renormalized frequencies
+eigvals = np.linalg.eigvalsh(H_gamma)
+freqs_cm = np.sqrt(np.abs(eigvals)) * np.sign(eigvals) * CC.Units.RY_TO_CM
+print("Frequencies at Gamma (cm-1):", np.sort(freqs_cm))
+```
+
+### Comparison with the Direct Method
+
+To verify your results or for small systems where both methods are feasible:
+
+```python
+# Direct method (from python-sscha)
+hessian_direct = ensemble.get_free_energy_hessian(include_v4=True)
+
+# Q-space method
+hess = QH.QSpaceHessian(ensemble)
+hess.init()
+hessian_qspace = hess.compute_full_hessian()
+
+# Compare eigenvalues at each q-point
+for iq in range(len(dyn.q_tot)):
+    w_direct = np.linalg.eigvalsh(hessian_direct.dynmats[iq])
+    w_qspace = np.linalg.eigvalsh(hessian_qspace.dynmats[iq])
+    print(f"iq={iq}: max diff = {np.max(np.abs(w_direct - w_qspace)):.2e}")
+```
+
+## Tuning the Solver
+
+### Convergence Parameters
+
+The iterative solver (GMRES with BiCGSTAB fallback) can be tuned:
+
+```python
+hessian_dyn = hess.compute_full_hessian(
+    tol=1e-6,           # Convergence tolerance (default: 1e-6)
+    max_iters=500,       # Maximum iterations per band (default: 500)
+    use_preconditioner=True  # Harmonic preconditioner (default: True)
+)
+```
+
+The harmonic preconditioner uses the inverse of the harmonic Liouvillian, which is diagonal in the mode basis, and dramatically reduces the number of iterations. Disabling it is not recommended.
+
+### Dense Fallback
+
+For problematic q-points where the iterative solver does not converge (e.g., systems near a phase transition where modes are very soft), a dense fallback can be enabled:
+
+```python
+hessian_dyn = hess.compute_full_hessian(
+    dense_fallback=True  # WARNING: O(psi_size^2) memory
+)
+```
+
+!!! warning
+    The dense fallback builds the full Liouvillian matrix in memory. For large systems this can be extremely expensive. It is **disabled by default** and should only be used for small systems or debugging. If the iterative solver fails, first try increasing `max_iters` or loosening `tol`.
+
+### Symmetries
+
+By default, `QSpaceHessian` uses crystal symmetries (via `spglib`) to reduce the number of q-points to the irreducible set. The Hessian at symmetry-equivalent q-points is obtained by rotation, saving a factor of $|G|/N_{q,\text{irr}}$ in computation time. This can be disabled:
+
+```python
+hess.init(use_symmetries=False)  # Solve at every q-point independently
+```
+
+## Deep Dive: How It Works
+
+### From Hessian to Linear System
+
+The key insight ([L. Monacelli and F. Mauri, 2021](https://journals.aps.org/prb/abstract/10.1103/8611-5k5v)) is that the free energy Hessian can be reformulated as the inverse of the static susceptibility:
+
+$$
+\frac{\partial^2 \mathcal{F}}{\partial \mathcal{R}_a \partial \mathcal{R}_b} = \left[\mathcal{G}(\omega=0)\right]^{-1}_{ab}
+$$
+
+where $\mathcal{G}$ is the static Green's function. Instead of building and inverting the $\overset{(4)}{D}$ matrix, we define an auxiliary Liouvillian $\mathcal{L}$ that acts on a vector $(\Upsilon, \bar{\mathcal{R}})$ in the extended space of phonon displacements ($\bar{\mathcal{R}}$, dimension $N_\text{modes}$) and two-phonon amplitudes ($\Upsilon$, dimension $N_\text{modes}^2$):
+
+$$
+\mathcal{L} \begin{pmatrix} \Upsilon \\ \bar{\mathcal{R}} \end{pmatrix} = \begin{pmatrix} 0 \\ \boldsymbol{a} \end{pmatrix}
+$$
+
+Solving this system for each unit vector $\boldsymbol{a} = \boldsymbol{e}_i$ yields $\bar{\mathcal{R}} = \mathcal{G}\boldsymbol{e}_i$, i.e., column $i$ of the static susceptibility $\mathcal{G}$. The Hessian follows as $\mathcal{H} = \mathcal{G}^{-1}$.
+
+### The Static Liouvillian
+
+The Liouvillian $\mathcal{L}$ splits into a harmonic part (diagonal in the mode basis) and an anharmonic part (computed via ensemble averages):
+
+$$
+\mathcal{L}^\text{(har)} = \begin{pmatrix} -W^{-1} & 0 \\ 0 & \overset{(2)}{D} \end{pmatrix}, \qquad
+\mathcal{L}^\text{(anh)} = \begin{pmatrix} \overset{(4)}{D} & \overset{(3)}{D} \\ \overset{(3)}{D} & 0 \end{pmatrix}
+$$
+
+The harmonic part is trivially invertible ($W^{-1}$ is the inverse of the two-phonon propagator, $\overset{(2)}{D}$ contains the squared SSCHA frequencies $\omega_\mu^2$). The anharmonic part is applied efficiently via importance-sampled ensemble averages, scaling as $O(N^2)$ per application instead of $O(N^4)$ to store $\overset{(4)}{D}$.
+
+### Q-Space Block Diagonality
+
+In the Bloch representation, translational symmetry makes $\mathcal{L}$ block-diagonal:
+
+$$
+\mathcal{L}(\mathbf{q}, \mathbf{q}') = \delta_{\mathbf{q}\mathbf{q}'}\, \mathcal{L}(\mathbf{q})
+$$
+
+Each block $\mathcal{L}(\mathbf{q})$ has dimension $n_\text{bands} + n_\text{bands}^2 \times n_\text{pairs}$ (where $n_\text{pairs}$ is the number of unique q-point pairs such that $\mathbf{q}_1 + \mathbf{q}_2 = \mathbf{q}$), independent of the supercell size. This is where the massive speedup comes from: instead of one huge linear system of size $N_\text{modes} + N_\text{modes}^2$, we solve $N_{q,\text{irr}}$ small systems of size $\sim n_\text{bands}^2$.
+
+### Iterative Solver and Preconditioner
+
+Each system $\mathcal{L}(\mathbf{q})\, \boldsymbol{x}_i = \boldsymbol{e}_i$ is solved iteratively with GMRES, preconditioned by the inverse of the harmonic Liouvillian:
+
+$$
+M = [\mathcal{L}^\text{(har)}]^{-1} = \begin{pmatrix} -W & 0 \\ 0 & [\overset{(2)}{D}]^{-1} \end{pmatrix}
+$$
+
+Since $\mathcal{L}$ is Hermitian with respect to a weighted inner product $\langle \boldsymbol{x} | \boldsymbol{y} \rangle = \boldsymbol{x}^\dagger D_\text{mask}\, \boldsymbol{y}$ (where $D_\text{mask}$ accounts for the multiplicity of off-diagonal vs. diagonal mode pairs), a similarity transformation $\tilde{\mathcal{L}} = D_\text{mask}^{1/2}\, \mathcal{L}\, D_\text{mask}^{-1/2}$ is applied to convert the problem to standard Hermitian form. This ensures proper convergence of the Krylov solvers and Hermiticity of the resulting Hessian.
+
+### Handling of Acoustic Modes and Non-Self-Conjugate Q-Points
+
+Special care is needed at q-points $\mathbf{q}$ for which the pair decomposition $\mathbf{q}_1 + \mathbf{q}_2 = \mathbf{q}$ involves the $\Gamma$ point ($\mathbf{q}_1 = \mathbf{0}$). This happens whenever $\mathbf{q}$ is not mapped to $-\mathbf{q}$ by a point-group symmetry (i.e., $\mathbf{q} \neq -\mathbf{q} + \mathbf{G}$ for any reciprocal lattice vector $\mathbf{G}$). In such cases, one of the q-point pairs is $(\mathbf{0}, \mathbf{q})$, and the three acoustic modes at $\Gamma$ (with $\omega = 0$) introduce a null space in the two-phonon propagator $W^{-1}$ (since $1/\Lambda \to 0$ when an acoustic mode is involved).
+
+Anharmonic $\overset{(3)}{D}$ coupling can mix this null space with the displacement sector $\bar{\mathcal{R}}$, making the linear system $\mathcal{L}\boldsymbol{x} = \boldsymbol{e}_i$ formally inconsistent. QSpaceHessian handles this by projecting out acoustic-mode components from the inner product (setting $D_\text{mask} = 0$ for two-phonon entries where either mode is acoustic). This ensures the null space remains confined to the two-phonon sector and does not contaminate the displacement response.
+
+### Hermiticity Enforcement
+
+The static susceptibility $\mathcal{G}$ should be Hermitian by construction: $\mathcal{G}_{ab} = \mathcal{G}_{ba}^*$. However, finite convergence tolerances in the iterative solver can introduce small anti-Hermitian components. QSpaceHessian explicitly symmetrizes both the susceptibility $\mathcal{G} \to (\mathcal{G} + \mathcal{G}^\dagger)/2$ and the final Hessian $\mathcal{H} \to (\mathcal{H} + \mathcal{H}^\dagger)/2$ to guarantee physically meaningful (real) eigenvalues.
+
+### Point-Group Symmetry Reduction
+
+Crystal point-group symmetries $\{R|\mathbf{t}\}$ map q-points into stars: $\mathbf{q}' = R\mathbf{q}$. The Hessian at a rotated q-point is obtained from the irreducible representative:
+
+$$
+\mathcal{H}(\mathbf{q}') = D(R)\, \mathcal{H}(\mathbf{q}_\text{irr})\, D(R)^\dagger
+$$
+
+where $D(R)$ is the representation matrix of the symmetry operation in the phonon mode basis, accounting for atom permutations, Cartesian rotation, and Bloch phase factors.
+
+## References
+
+1. R. Bianco, I. Errea, L. Paulatto, M. Calandra, and F. Mauri, *Phys. Rev. B* **96**, 014111 (2017). [DOI: 10.1103/PhysRevB.96.014111](https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.014111)
+2. L. Monacelli, *Phys. Rev. B* **112**, 014109 (2025). [DOI: 10.1103/8611-5k5v](https://journals.aps.org/prb/abstract/10.1103/8611-5k5v)
diff --git a/docs/usage.md b/docs/usage.md
index ad8d4bca..0315fac9 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -222,6 +222,158 @@ gf = lanczos.get_green_function_continued_fraction(
 ```
 
 
+## Q-Space Lanczos
+
+The `QSpaceLanczos` module provides a reformulation of the Lanczos algorithm in q-space (Bloch basis), exploiting momentum conservation from Bloch's theorem. This can give a significant speedup over the standard real-space calculation, proportional to the number of unit cells in the supercell ($N_\text{cell}$).
+
+### How It Works
+
+In the standard real-space Lanczos, the two-phonon sector has dimension $\sim n_\text{modes}^2 / 2$, where $n_\text{modes} = 3 N_\text{atoms,sc}$ is the number of supercell modes. For a 4x4x4 supercell of a 2-atom unit cell, this means $\sim 460{,}000$ entries.
+
+By working in q-space, momentum conservation ($\mathbf{q}_1 + \mathbf{q}_2 = \mathbf{q}_\text{pert} + \mathbf{G}$) makes the two-phonon sector **block-diagonal**: only pairs of q-points satisfying the conservation law contribute. The total two-phonon size drops from $\sim n_\text{modes}^2/2$ to $\sim N_\text{cell} \cdot n_\text{bands}^2/2$ (where $n_\text{bands} = 3 N_\text{atoms,uc}$). Moreover, translational symmetries are handled analytically via the Fourier transform, so only point-group symmetries need to be applied explicitly.
+
+### Requirements
+
+- **Julia** must be installed and configured (see [Installation](installation.md))
+- **spglib** for symmetry detection
+- The Q-space Lanczos always uses the **Wigner representation** and **Julia backend**
+
+### Basic Usage
+
+The workflow mirrors the standard Lanczos but uses `QSpaceLanczos` instead of `Lanczos`, and specifies perturbations by q-point index and band index rather than supercell mode index.
+
+```python
+import cellconstructor as CC
+import cellconstructor.Phonons
+import sscha, sscha.Ensemble
+import tdscha.QSpaceLanczos as QL
+
+# Load ensemble (same as standard workflow)
+dyn = CC.Phonons.Phonons("dyn_final_", nqirr=3)
+ens = sscha.Ensemble.Ensemble(dyn, 300)
+ens.load_bin("ensemble_dir", 1)
+
+# Create the Q-space Lanczos object
+qlanc = QL.QSpaceLanczos(ens)
+qlanc.init(use_symmetries=True)
+
+# Perturb a specific phonon mode at q-point 0 (Gamma), band 3
+qlanc.prepare_mode_q(iq=0, band_index=3)
+
+# Run the Lanczos iteration
+qlanc.run_FT(100)
+
+# Extract the Green function (same interface as standard Lanczos)
+import numpy as np
+w = np.linspace(0, 500, 1000) / CC.Units.RY_TO_CM  # frequency grid in Ry
+smearing = 5 / CC.Units.RY_TO_CM
+gf = qlanc.get_green_function_continued_fraction(w, smearing=smearing)
+spectral = -np.imag(gf)
+```
+
+### Choosing the Perturbation
+
+#### Single mode at a q-point
+
+To perturb a single phonon band at a specific q-point:
+
+```python
+qlanc.prepare_mode_q(iq, band_index)
+```
+
+- `iq`: index of the q-point in `qlanc.q_points` (0 = $\Gamma$)
+- `band_index`: band index (0-based, excluding acoustic modes is your responsibility)
+
+You can inspect the available q-points and frequencies:
+
+```python
+# Print q-points (Cartesian, 2pi/alat units)
+for i, q in enumerate(qlanc.q_points):
+    print(f"iq={i}: q = {q}")
+
+# Print band frequencies at a given q-point (in cm-1)
+iq = 0
+for nu in range(qlanc.n_bands):
+    freq = qlanc.w_q[nu, iq] * CC.Units.RY_TO_CM
+    print(f"  band {nu}: {freq:.2f} cm-1")
+```
+
+#### Custom perturbation vector
+
+To perturb with an arbitrary displacement pattern at a q-point (e.g., for IR-like perturbations):
+
+```python
+# vector is a real-space displacement pattern (3*n_atoms_uc,) in Cartesian coords
+qlanc.prepare_perturbation_q(iq, vector)
+```
+
+This projects the vector onto the q-space eigenmodes at `iq`.
+
+### Looping Over Multiple Modes
+
+To compute the full spectral function, you need to run one Lanczos calculation per mode.
+You can reset the state and prepare a new perturbation between runs:
+
+```python
+import cellconstructor as CC
+import cellconstructor.Phonons
+import sscha, sscha.Ensemble
+import tdscha.QSpaceLanczos as QL
+
+dyn = CC.Phonons.Phonons("dyn_final_", nqirr=3)
+ens = sscha.Ensemble.Ensemble(dyn, 300)
+ens.load_bin("ensemble_dir", 1)
+
+qlanc = QL.QSpaceLanczos(ens)
+qlanc.init(use_symmetries=True)
+
+# Loop over modes at Gamma
+iq = 0
+for band in range(qlanc.n_bands):
+    # Skip acoustic modes (zero frequency)
+    if qlanc.w_q[band, iq] < 1e-6:
+        continue
+
+    qlanc.prepare_mode_q(iq, band)
+    qlanc.run_FT(100)
+    qlanc.save_status(f"qspace_iq{iq}_band{band}.npz")
+```
+
+### Saving and Loading
+
+Checkpoint saving and loading works the same as the standard Lanczos:
+
+```python
+# Save during calculation
+qlanc.run_FT(100, save_each=10, save_dir="output", prefix="qlanc")
+
+# Save final result
+qlanc.save_status("qspace_result.npz")
+```
+
+!!! note "Restart limitations"
+    Restarting from a saved checkpoint currently requires re-creating the `QSpaceLanczos` object from the same ensemble, loading the status, and then continuing. The q-space internal state (pair maps, symmetries) is reconstructed from the ensemble on `init()`.
+
+### When to Use Q-Space vs Real-Space Lanczos
+
+| Feature | Real-space (`Lanczos`) | Q-space (`QSpaceLanczos`) |
+|---------|----------------------|--------------------------|
+| Backend | C, MPI, or Julia | Julia only |
+| Representation | Real or Wigner | Wigner only |
+| Perturbation | Supercell mode index | (q-point, band) index |
+| Symmetry handling | Full space group | Point group (translations analytic) |
+| Two-phonon size | $\sim n_\text{modes}^2/2$ | $\sim N_\text{cell} \cdot n_\text{bands}^2/2$ |
+| Best for | Small supercells, $\Gamma$-only | Large supercells, finite-q perturbations |
+
+The Q-space approach is most advantageous when:
+
+- The supercell is large (large $N_\text{cell}$), since the speedup scales as $\sim N_\text{cell}$
+- You want to study phonon spectral functions at specific q-points
+- Julia is available and configured
+
+For $\Gamma$-point-only calculations on small supercells, the standard Lanczos with `gamma_only=True` may be comparable or faster due to lower overhead.
+
+
 ## Advanced Features
 
 ### Custom Perturbations
diff --git a/meson.build b/meson.build
index 21571f4b..6bfb5b92 100644
--- a/meson.build
+++ b/meson.build
@@ -1,6 +1,6 @@
 project('tdscha',
   ['c'],
-  version: '1.6.0',
+  version: '1.7.0',
   license: 'GPL',
   meson_version: '>= 1.1.0', # <- set min version of meson.
   default_options : [
@@ -121,11 +121,14 @@ py.install_sources(
     [
       'Modules/__init__.py',
       'Modules/DynamicalLanczos.py',
+      'Modules/QSpaceLanczos.py',
+      'Modules/QSpaceHessian.py',
       'Modules/Dynamical.py',
       'Modules/Parallel.py',
       'Modules/Perturbations.py',
       'Modules/StaticHessian.py',
       'Modules/tdscha_core.jl',
+      'Modules/tdscha_qspace.jl',
       'Modules/Tools.py',
       'Modules/cli.py'
     ],
diff --git a/mkdocs.yml b/mkdocs.yml
index 82c928b7..4b4082e4 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -45,9 +45,12 @@ nav:
   - Quick Start: quickstart.md
   - In-Depth Usage: usage.md
   - StaticHessian: static-hessian.md
+  - QSpaceHessian: qspace-hessian.md
   - CLI Tools: cli.md
   - API Reference:
     - DynamicalLanczos: api/dynamical_lanczos.md
+    - QSpaceLanczos: api/qspace_lanczos.md
+    - QSpaceHessian: api/qspace_hessian.md
     - StaticHessian: api/static_hessian.md
 
 # Extra JavaScript for MathJax
diff --git a/pyproject.toml b/pyproject.toml
index f5d86368..9b85bf74 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ build-backend = "mesonpy"
 [project]
 # Project metadata, which was previously in the `setup()` call of setup.py.
 name = "tdscha"
-version = "1.6.0" # Make sure this version matches meson.build
+version = "1.7.0" # Make sure this version matches meson.build
 description = "Time Dependent Self Consistent Harmonic Approximation"
 authors = [{name = "Lorenzo Monacelli"}]
 readme = "README.md"
diff --git a/tests/test_lanczos_fast/test_c_code_lanczos/mode10.json b/tests/test_lanczos_fast/test_c_code_lanczos/mode10.json
index ca3d669f..7b80b56c 100644
--- a/tests/test_lanczos_fast/test_c_code_lanczos/mode10.json
+++ b/tests/test_lanczos_fast/test_c_code_lanczos/mode10.json
@@ -1 +1 @@
-{"T": 250, "n_steps": 5, "ignore_v2": false, "ignore_v3": false, "ignore_v4": false, "use_wigner": false, "run_sym": false, "data": {"n_configs": 10, "n_modes": 45, "n_syms": 384, "n_blocks": 9, "perturbation_modulus": 1, "reverse": false, "shift": 0}}
\ No newline at end of file
+{"T": 250, "n_steps": 5, "ignore_v2": false, "ignore_v3": false, "ignore_v4": false, "use_wigner": true, "run_sym": false, "data": {"n_configs": 10, "n_modes": 45, "n_syms": 384, "n_blocks": 9, "perturbation_modulus": 1, "reverse": false, "shift": 0}}
\ No newline at end of file
diff --git a/tests/test_qspace/test_ir_modulus.py b/tests/test_qspace/test_ir_modulus.py
new file mode 100644
index 00000000..59ea7ff3
--- /dev/null
+++ b/tests/test_qspace/test_ir_modulus.py
@@ -0,0 +1,155 @@
+"""
+Test that compares the perturbation modulus for IR between Q-space Lanczos
+and real-space Lanczos (both using Wigner mode) on the SnTe-like system.
+
+The perturbation modulus should match exactly (within numerical tolerance).
+"""
+from __future__ import print_function
+
+import numpy as np
+import os
+import sys
+import pytest
+
+import cellconstructor as CC
+import cellconstructor.Phonons
+import cellconstructor.Methods
+import cellconstructor.symmetries
+
+import sscha, sscha.Ensemble
+import tdscha.DynamicalLanczos as DL
+
+from tdscha.Parallel import pprint as print
+
+# Data directory (reuse test_julia data)
+DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                        '..', 'test_julia', 'data')
+
+# Temperature
+T = 250
+NQIRR = 3
+
+
+def _setup_realspace_lanczos_ir(pol_vec):
+    """Set up a real-space Lanczos calculation with IR perturbation."""
+    dyn = CC.Phonons.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    # Create ASR-satisfying effective charges: +I for first atom, -I for second, etc.
+    nat = dyn.structure.N_atoms
+    effective_charges = np.zeros((nat, 3, 3))
+    for i in range(nat):
+        # Alternate signs to satisfy ASR
+        sign = 1.0 if i % 2 == 0 else -1.0
+        effective_charges[i] = sign * np.eye(3)
+    # Ensure ASR is exactly satisfied
+    total_charge = np.sum(effective_charges, axis=0)
+    if np.max(np.abs(total_charge)) > 1e-12:
+        # Adjust to exactly satisfy ASR
+        effective_charges -= total_charge / nat
+    
+    ens = sscha.Ensemble.Ensemble(dyn, T)
+    ens.load_bin(DATA_DIR, 1)
+
+    lanc = DL.Lanczos(ens, lo_to_split=None)
+    lanc.ignore_harmonic = False
+    lanc.ignore_v3 = False
+    lanc.ignore_v4 = False
+    lanc.use_wigner = True
+    lanc.mode = DL.MODE_FAST_SERIAL
+    lanc.init(use_symmetries=True)
+    lanc.prepare_ir(effective_charges=effective_charges, pol_vec=pol_vec)
+    return lanc
+
+
+def _setup_qspace_lanczos_ir(pol_vec):
+    """Set up a Q-space Lanczos calculation with IR perturbation."""
+    try:
+        import tdscha.QSpaceLanczos as QL
+    except ImportError:
+        pytest.skip("Julia not available for QSpaceLanczos")
+
+    if not QL.__JULIA_EXT__:
+        pytest.skip("Julia extension not loaded")
+
+    dyn = CC.Phonons.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    nat = dyn.structure.N_atoms
+    # Create ASR-satisfying effective charges: +I for first atom, -I for second, etc.
+    effective_charges = np.zeros((nat, 3, 3))
+    for i in range(nat):
+        # Alternate signs to satisfy ASR
+        sign = 1.0 if i % 2 == 0 else -1.0
+        effective_charges[i] = sign * np.eye(3)
+    # Ensure ASR is exactly satisfied
+    total_charge = np.sum(effective_charges, axis=0)
+    if np.max(np.abs(total_charge)) > 1e-12:
+        # Adjust to exactly satisfy ASR
+        effective_charges -= total_charge / nat
+    
+    ens = sscha.Ensemble.Ensemble(dyn, T)
+    ens.load_bin(DATA_DIR, 1)
+
+    qlanc = QL.QSpaceLanczos(ens, lo_to_split=None)
+    qlanc.ignore_harmonic = False
+    qlanc.ignore_v3 = False
+    qlanc.ignore_v4 = False
+    qlanc.init(use_symmetries=True)
+    qlanc.prepare_ir(effective_charges=effective_charges, pol_vec=pol_vec)
+    return qlanc
+
+
+def test_ir_perturbation_modulus(verbose=True):
+    """
+    Compare perturbation_modulus after prepare_ir between real-space and q-space.
+    """
+    try:
+        import tdscha.QSpaceLanczos as QL
+        if not QL.__JULIA_EXT__:
+            pytest.skip("Julia extension not loaded")
+    except ImportError:
+        pytest.skip("Julia not available for QSpaceLanczos")
+
+    # Test two polarization vectors
+    for pol_vec in (np.array([1., 0., 0.]), np.array([0., 1., 0.]), np.array([0., 0., 1.])):
+        if verbose:
+            print(f"\n=== Testing IR perturbation modulus with pol_vec = {pol_vec} ===")
+        
+        lanc_real = _setup_realspace_lanczos_ir(pol_vec)
+        lanc_qspace = _setup_qspace_lanczos_ir(pol_vec)
+        
+        mod_real = lanc_real.perturbation_modulus
+        mod_qspace = lanc_qspace.perturbation_modulus
+        
+        if verbose:
+            print(f"Real-space perturbation_modulus = {mod_real:.12e}")
+            print(f"Q-space perturbation_modulus    = {mod_qspace:.12e}")
+            print(f"Relative difference = {abs(mod_real - mod_qspace) / max(mod_real, mod_qspace):.2e}")
+        
+        # The modulus should agree within 1e-10 relative tolerance
+        rtol = 1e-10
+        atol = 1e-14
+        np.testing.assert_allclose(mod_qspace, mod_real, rtol=rtol, atol=atol,
+                                   err_msg=f"IR perturbation modulus mismatch for pol_vec {pol_vec}")
+        
+        # Also compare the R sector of psi (first n_modes/n_bands entries)
+        # Real-space psi is real, q-space psi is complex (but should be real at Gamma)
+        # Note: shapes are different - q-space has n_bands (unit cell modes),
+        # real-space has n_modes (supercell modes minus translations)
+        # We can't compare directly, but we can check that the modulus matches
+        if verbose:
+            n_modes_real = lanc_real.n_modes
+            n_bands_qspace = lanc_qspace.n_bands
+            print(f"Real-space n_modes = {n_modes_real}, Q-space n_bands = {n_bands_qspace}")
+            
+            # Extract R sector
+            psi_r_real = lanc_real.psi[:n_modes_real]
+            psi_r_qspace = lanc_qspace.psi[:n_bands_qspace]
+            print(f"Real-space R sector norm = {np.linalg.norm(psi_r_real):.12e}")
+            print(f"Q-space R sector norm = {np.linalg.norm(psi_r_qspace):.12e}")
+    
+    if verbose:
+        print("\n=== All IR perturbation modulus tests passed ===")
+
+
+if __name__ == "__main__":
+    # Run the test with verbose output
+    test_ir_perturbation_modulus(verbose=True)
+    print("\nAll tests passed.")
\ No newline at end of file
diff --git a/tests/test_qspace/test_qspace_hessian.py b/tests/test_qspace/test_qspace_hessian.py
new file mode 100644
index 00000000..2fef6644
--- /dev/null
+++ b/tests/test_qspace/test_qspace_hessian.py
@@ -0,0 +1,170 @@
+"""
+Test Q-Space Hessian against reference ens.get_free_energy_hessian().
+
+Uses the SnTe 2×2×2 test data from tests/test_julia/data/ (T=250K, NQIRR=3).
+"""
+from __future__ import print_function
+
+import numpy as np
+import os
+import pytest
+
+import cellconstructor as CC
+import cellconstructor.Phonons
+
+import sscha, sscha.Ensemble
+
+# Test parameters
+T = 250
+NQIRR = 3
+
+DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                        '..', 'test_julia', 'data')
+
+
+def _load_ensemble():
+    """Load the SnTe test ensemble."""
+    dyn = CC.Phonons.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    ens = sscha.Ensemble.Ensemble(dyn, T)
+    ens.load_bin(DATA_DIR, 1)
+    return ens
+
+
+def test_qspace_hessian_gamma():
+    """Compare Gamma-point Hessian eigenvalues: Q-space vs reference."""
+    try:
+        import tdscha.QSpaceHessian as QH
+        import tdscha.QSpaceLanczos as QL
+        if not QL.__JULIA_EXT__:
+            pytest.skip("Julia extension not loaded")
+    except ImportError:
+        pytest.skip("Julia or QSpaceHessian not available")
+
+    ens = _load_ensemble()
+
+    # -- Reference: ens.get_free_energy_hessian --
+    ref_hessian = ens.get_free_energy_hessian(include_v4=True,
+                                               use_symmetries=True)
+    ref_dynmat_gamma = ref_hessian.dynmats[0]
+    w, pols = ref_hessian.DyagDinQ(0)
+    w = w**2 * np.sign(w)  # Convert to eigenvalues of the Hessian
+    # Filter out acoustic (near-zero) eigenvalues
+    ref_non_acoustic = w[np.abs(w) > 1e-8]
+
+    # -- Q-space Hessian at Gamma --
+    qh = QH.QSpaceHessian(ens, verbose=True, lo_to_split=None)
+    qh.init(use_symmetries=True)
+    H_q_gamma = qh.compute_hessian_at_q(0, tol=1e-8, max_iters=1000)
+
+    # Transform to Cartesian
+    pol = qh.pols_q[:, :, 0]
+    Phi_q = pol @ H_q_gamma @ np.conj(pol).T
+    qspace_evals = np.sort(np.real(np.linalg.eigvalsh(Phi_q)))
+    qspace_non_acoustic = qspace_evals[np.abs(qspace_evals) > 1e-8]
+
+    print()
+    print("=== Gamma-point Hessian eigenvalue comparison ===")
+    print("Reference (ens.get_free_energy_hessian):")
+    for i, ev in enumerate(ref_non_acoustic):
+        print("  {:2d}: {:.8e}".format(i, ev))
+    print("Q-space Hessian:")
+    for i, ev in enumerate(qspace_non_acoustic):
+        print("  {:2d}: {:.8e}".format(i, ev))
+
+    # Compare eigenvalues
+    n_compare = min(len(ref_non_acoustic), len(qspace_non_acoustic))
+    assert n_compare > 0, "No non-acoustic eigenvalues found"
+
+    # Use relative tolerance on eigenvalues
+    for i in range(n_compare):
+        ref_val = ref_non_acoustic[i]
+        qsp_val = qspace_non_acoustic[i]
+        rel_diff = abs(ref_val - qsp_val) / max(abs(ref_val), 1e-15)
+        print("  eigenvalue {}: ref={:.8e}, qsp={:.8e}, rel_diff={:.2e}".format(
+            i, ref_val, qsp_val, rel_diff))
+        assert rel_diff < 0.05, (
+            "Eigenvalue {} differs too much: ref={:.6e}, qsp={:.6e}, "
+            "rel_diff={:.4e}".format(i, ref_val, qsp_val, rel_diff))
+
+    print("=== Gamma-point Hessian test PASSED ===")
+
+
+def test_qspace_hessian_symmetry():
+    """Verify eigenvalues at symmetry-equivalent q-points match."""
+    try:
+        import tdscha.QSpaceHessian as QH
+        import tdscha.QSpaceLanczos as QL
+        if not QL.__JULIA_EXT__:
+            pytest.skip("Julia extension not loaded")
+    except ImportError:
+        pytest.skip("Julia or QSpaceHessian not available")
+
+    ens = _load_ensemble()
+
+    qh = QH.QSpaceHessian(ens, verbose=True, lo_to_split=None)
+    qh.init(use_symmetries=True)
+
+    # Compute full hessian (all q-points)
+    hessian = qh.compute_full_hessian(tol=1e-8, max_iters=1000)
+
+    # For each star, check that eigenvalues are equivalent
+    print()
+    print("=== Symmetry equivalence test ===")
+    for iq_irr in qh.irr_qpoints:
+        star = qh.q_star_map[iq_irr]
+        if len(star) <= 1:
+            continue
+
+        ref_H = qh.H_q_dict[iq_irr]
+        ref_evals = np.sort(np.real(np.linalg.eigvalsh(ref_H)))
+
+        for iq_rot, R_cart, t_cart in star:
+            if iq_rot == iq_irr:
+                continue
+            rot_H = qh.H_q_dict[iq_rot]
+            rot_evals = np.sort(np.real(np.linalg.eigvalsh(rot_H)))
+
+            max_diff = np.max(np.abs(ref_evals - rot_evals))
+            print("  q_irr={}, q_rot={}: max eigenvalue diff = {:.2e}".format(
+                iq_irr, iq_rot, max_diff))
+            assert max_diff < 1e-6, (
+                "Eigenvalues at q={} and q={} differ by {:.2e}".format(
+                    iq_irr, iq_rot, max_diff))
+
+    print("=== Symmetry equivalence test PASSED ===")
+
+
+    hessian_other = ens.get_free_energy_hessian(include_v4=True, use_symmetries=True)
+    print()
+    print("=== Test the complete Hessian matrix ===")
+    badvalues = []
+    for iq, q in enumerate(hessian.q_tot):
+        w_good, _ = hessian_other.DyagDinQ(iq)
+        w_test, _ = hessian.DyagDinQ(iq)
+
+        print("  q = {}".format(q))
+        for k in range(len(w_good)):
+            w_g = w_good[k]
+            w_t = w_test[k]
+            rel_diff = abs(w_g - w_t) / max(abs(w_g), 1e-15)
+
+            if iq == 0 and abs(w_g) < 1e-9:
+                continue
+
+            print("    mode {}: ref={:.8e}, test={:.8e}, rel_diff={:.2e}".format(
+                k, w_g, w_t, rel_diff))
+            if rel_diff > 0.05:
+                badvalues.append((k, q, w_g, w_t, rel_diff))
+            
+    print("=== Summary of modes with large discrepancies (>5% relative difference) ===")
+    for klist in badvalues:
+        k, q, w_g, w_t, rel_diff = klist
+        assert rel_diff < 0.05, (
+                "Mode {} at q={} differs too much: ref={:.6e}, test={:.6e}, "
+                "rel_diff={:.4e}".format(k, q, w_g, w_t, rel_diff))
+
+    print("=== Test the complete Hessian matrix  PASSED ===")
+
+if __name__ == "__main__":
+    test_qspace_hessian_gamma()
+    test_qspace_hessian_symmetry()
diff --git a/tests/test_qspace/test_qspace_lanczos.py b/tests/test_qspace/test_qspace_lanczos.py
new file mode 100644
index 00000000..777e6483
--- /dev/null
+++ b/tests/test_qspace/test_qspace_lanczos.py
@@ -0,0 +1,273 @@
+"""
+Test that compares the spectral function computed via Q-space Lanczos
+and real-space Lanczos (both using Wigner mode) on the same SnTe-like system.
+
+The benchmark is on the Green function (real and imaginary parts)
+evaluated at omega=0 and at the frequency of the perturbed mode.
+
+This test perturbs a Gamma-point optical mode. Because the optical modes
+at Gamma are triply degenerate, we must carefully project the supercell
+eigenvector onto q-space bands to get the correct mapping.
+"""
+from __future__ import print_function
+
+import numpy as np
+import os
+import pytest
+
+import cellconstructor as CC
+import cellconstructor.Phonons
+import cellconstructor.Methods
+import cellconstructor.symmetries
+
+import sscha, sscha.Ensemble
+import tdscha.DynamicalLanczos as DL
+
+from tdscha.Parallel import pprint as print
+
+# Test parameters
+N_STEPS = 10
+T = 250
+NQIRR = 3
+
+# Tolerance on the Green function comparison (relative)
+GF_RTOL = 0.01  # 1% relative tolerance
+
+# Data directory (reuse test_julia data)
+DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                        '..', 'test_julia', 'data')
+
+
+def _find_gamma_mode_mapping():
+    """Find the correct mapping between a supercell mode and q-space band at Gamma.
+
+    Because the Gamma optical modes are degenerate, we cannot simply match by
+    frequency — the eigenvector gauge within the degenerate subspace may differ.
+    Instead, we project the supercell eigenvector onto the q-space band basis
+    to find the correct band_index.
+
+    Returns
+    -------
+    mode_index : int
+        Supercell mode index (after translation removal) for the selected mode.
+    band_index : int
+        Matching band index at Gamma (0-based).
+    w_q : ndarray
+        Frequencies at each q-point, shape (n_bands, n_q).
+    pols_q : ndarray
+        Polarization vectors at each q-point, shape (3*nat_uc, n_bands, n_q).
+    """
+    dyn = CC.Phonons.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    ws_sc, pols_sc, w_q, pols_q = dyn.DiagonalizeSupercell(return_qmodes=True)
+
+    super_structure = dyn.structure.generate_supercell(dyn.GetSupercell())
+    m = super_structure.get_masses_array()  # (nat_sc,)
+    trans_mask = CC.Methods.get_translations(pols_sc, m)
+    good_ws = ws_sc[~trans_mask]
+    orig_indices = np.where(~trans_mask)[0]
+
+    n_bands = 3 * dyn.structure.N_atoms
+    n_cell = np.prod(dyn.GetSupercell())
+    nat_uc = dyn.structure.N_atoms
+    nat_sc = super_structure.N_atoms
+    itau = super_structure.get_itau(dyn.structure) - 1  # 0-indexed
+
+    # Find first non-acoustic Gamma band
+    for band in range(n_bands):
+        if w_q[band, 0] > 1e-6:
+            target_freq = w_q[band, 0]
+            break
+    else:
+        raise ValueError("No non-acoustic Gamma mode found")
+
+    # Find the supercell mode matching this frequency
+    mode_index = np.argmin(np.abs(good_ws - target_freq))
+    freq_diff = abs(good_ws[mode_index] - target_freq)
+    assert freq_diff < 1e-8, (
+        "Mode frequency mismatch: {:.10e} vs {:.10e}".format(
+            good_ws[mode_index], target_freq))
+
+    # Project the supercell eigenvector onto Gamma q-space bands
+    orig_mode = orig_indices[mode_index]
+    pol_sc_mode = pols_sc[:, orig_mode]  # (3*N_sc,)
+
+    # Sum over supercell images to get Gamma component
+    pol_gamma = np.zeros(3 * nat_uc)
+    for i_sc in range(nat_sc):
+        i_uc = itau[i_sc]
+        pol_gamma[3*i_uc:3*i_uc+3] += pol_sc_mode[3*i_sc:3*i_sc+3]
+    pol_gamma /= np.sqrt(n_cell)
+
+    # Project onto q-space bands: R1[nu] = conj(pol_q[:, nu, 0]).T @ pol_gamma
+    R1 = np.conj(pols_q[:, :, 0]).T @ pol_gamma
+    band_index = np.argmax(np.abs(R1))
+
+    assert np.abs(R1[band_index]) > 0.99, (
+        "Projection onto q-space bands is not clean: |R1| = {}, R1 = {}".format(
+            np.abs(R1), R1))
+
+    return mode_index, band_index, w_q, pols_q
+
+
+def _setup_realspace_lanczos(mode_index):
+    """Set up and run a real-space Lanczos calculation (Wigner, serial C)."""
+    dyn = CC.Phonons.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    ens = sscha.Ensemble.Ensemble(dyn, T)
+    ens.load_bin(DATA_DIR, 1)
+
+    lanc = DL.Lanczos(ens, lo_to_split=None)
+    lanc.ignore_harmonic = False
+    lanc.ignore_v3 = False
+    lanc.ignore_v4 = False
+    lanc.use_wigner = True
+    lanc.mode = DL.MODE_FAST_SERIAL
+    lanc.init(use_symmetries=True)
+    lanc.prepare_mode(mode_index)
+    lanc.run_FT(N_STEPS, run_simm=True, verbose=False)
+    return lanc
+
+
+def _setup_qspace_lanczos(iq, band_index):
+    """Set up and run a Q-space Lanczos calculation."""
+    try:
+        import tdscha.QSpaceLanczos as QL
+    except ImportError:
+        pytest.skip("Julia not available for QSpaceLanczos")
+
+    if not QL.__JULIA_EXT__:
+        pytest.skip("Julia extension not loaded")
+
+    dyn = CC.Phonons.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    ens = sscha.Ensemble.Ensemble(dyn, T)
+    ens.load_bin(DATA_DIR, 1)
+
+    qlanc = QL.QSpaceLanczos(ens, lo_to_split=None)
+    qlanc.ignore_harmonic = False
+    qlanc.ignore_v3 = False
+    qlanc.ignore_v4 = False
+    qlanc.init(use_symmetries=True)
+    qlanc.prepare_mode_q(iq, band_index)
+    qlanc.run_FT(N_STEPS, verbose=False)
+    return qlanc
+
+
+def test_qspace_vs_realspace_green_function(verbose=False):
+    """
+    Compare Green functions from Q-space and real-space Lanczos
+    at omega=0 and at the perturbed mode frequency.
+
+    The renormalized frequency from 1/Re[G(0)] must agree within tolerance.
+    """
+    try:
+        import tdscha.QSpaceLanczos as QL
+        if not QL.__JULIA_EXT__:
+            pytest.skip("Julia extension not loaded")
+    except ImportError:
+        pytest.skip("Julia not available for QSpaceLanczos")
+
+    mode_index, band_index, w_q, pols_q = _find_gamma_mode_mapping()
+
+    if verbose:
+        print()
+        print("=== Q-space vs real-space Lanczos comparison ===")
+        print("Testing Gamma band {} (freq {:.2f} cm-1)".format(
+            band_index, w_q[band_index, 0] * CC.Units.RY_TO_CM))
+        print("  -> supercell mode index {}".format(mode_index))
+        print()
+
+    # Run both calculations
+    lanc_real = _setup_realspace_lanczos(mode_index)
+    lanc_qspace = _setup_qspace_lanczos(0, band_index)
+
+    # Get the harmonic frequency of the perturbed mode (in Ry)
+    w_mode = lanc_real.w[mode_index]
+
+    if verbose:
+        print("Lanczos coefficients comparison:")
+        print("  a_0 real-space: {:.10e}".format(lanc_real.a_coeffs[0]))
+        print("  a_0 q-space:    {:.10e}".format(lanc_qspace.a_coeffs[0]))
+        print("  b_0 real-space: {:.10e}".format(lanc_real.b_coeffs[0]))
+        print("  b_0 q-space:    {:.10e}".format(lanc_qspace.b_coeffs[0]))
+        print()
+
+    # Small imaginary broadening
+    smearing = w_mode * 0.1
+
+    # Evaluate the Green function at omega=0 and at the mode frequency
+    w_points = np.array([0.0, w_mode])
+
+    gf_real = lanc_real.get_green_function_continued_fraction(
+        w_points, use_terminator=False, smearing=smearing
+    )
+    gf_qspace = lanc_qspace.get_green_function_continued_fraction(
+        w_points, use_terminator=False, smearing=smearing
+    )
+
+    real_real = np.real(gf_real)
+    real_qspace = np.real(gf_qspace)
+    spectral_real = -np.imag(gf_real)
+    spectral_qspace = -np.imag(gf_qspace)
+
+    if verbose:
+        print("At omega = 0:")
+        print("  Re[G] real-space:  {:.10e}".format(real_real[0]))
+        print("  Re[G] q-space:     {:.10e}".format(real_qspace[0]))
+        print("  Im[G] real-space:  {:.10e}".format(spectral_real[0]))
+        print("  Im[G] q-space:     {:.10e}".format(spectral_qspace[0]))
+
+        print()
+        print("At omega = w_mode ({:.2f} cm-1):".format(
+            w_mode * CC.Units.RY_TO_CM))
+        print("  Re[G] real-space:  {:.10e}".format(real_real[1]))
+        print("  Re[G] q-space:     {:.10e}".format(real_qspace[1]))
+        print("  Im[G] real-space:  {:.10e}".format(spectral_real[1]))
+        print("  Im[G] q-space:     {:.10e}".format(spectral_qspace[1]))
+
+    # --- Assertions ---
+    # Renormalized frequency from Re[G(0)]: w^2 = 1/Re[G(0)]
+    w2_real = 1.0 / real_real[0]
+    w2_qspace = 1.0 / real_qspace[0]
+    freq_real = np.sign(w2_real) * np.sqrt(np.abs(w2_real)) * CC.Units.RY_TO_CM
+    freq_qspace = np.sign(w2_qspace) * np.sqrt(np.abs(w2_qspace)) * CC.Units.RY_TO_CM
+
+    if verbose:
+        print()
+        print("Renormalized frequency (real-space):  {:.6f} cm-1".format(freq_real))
+        print("Renormalized frequency (Q-space):     {:.6f} cm-1".format(freq_qspace))
+        print("Difference:                           {:.6f} cm-1".format(
+            abs(freq_real - freq_qspace)))
+
+    assert abs(freq_real - freq_qspace) < 0.1, (
+        "Renormalized frequencies differ too much: "
+        "real-space={:.4f}, q-space={:.4f} cm-1".format(freq_real, freq_qspace)
+    )
+
+    # Compare spectral function at omega = w_mode
+    ref_spectral = max(abs(spectral_real[1]), abs(spectral_qspace[1]))
+    if ref_spectral > 1e-15:
+        rel_diff_spectral = abs(spectral_real[1] - spectral_qspace[1]) / ref_spectral
+        print("Relative diff in spectral function at w_mode: {:.6e}".format(
+            rel_diff_spectral))
+        assert rel_diff_spectral < GF_RTOL, (
+            "Spectral functions at w_mode differ by {:.4e} "
+            "(tolerance: {})".format(rel_diff_spectral, GF_RTOL)
+        )
+
+    # Compare real part of GF at omega = w_mode
+    ref_real_wm = max(abs(real_real[1]), abs(real_qspace[1]))
+    if ref_real_wm > 1e-15:
+        rel_diff_real = abs(real_real[1] - real_qspace[1]) / ref_real_wm
+        print("Relative diff in Re[G] at w_mode:              {:.6e}".format(
+            rel_diff_real))
+        assert rel_diff_real < GF_RTOL, (
+            "Real parts of GF at w_mode differ by {:.4e} "
+            "(tolerance: {})".format(rel_diff_real, GF_RTOL)
+        )
+
+    if verbose:
+        print()
+        print("=== All benchmarks passed ===")
+
+
+if __name__ == "__main__":
+    test_qspace_vs_realspace_green_function(verbose=True)
diff --git a/tests/test_qspace/test_qspace_noinvq.py b/tests/test_qspace/test_qspace_noinvq.py
new file mode 100644
index 00000000..2213dfc0
--- /dev/null
+++ b/tests/test_qspace/test_qspace_noinvq.py
@@ -0,0 +1,106 @@
+"""
+Test that compares the static limit of green functions at a
+q point different from Gamma on the SnTe-like system.
+
+This compares the new q-space implementation, the old real-space implementation of Lanczos
+and the get_free_energy_hessian one.
+"""
+
+import numpy as np
+import os, pytest
+
+import cellconstructor as CC
+import cellconstructor.Phonons
+import cellconstructor.Units
+
+import sscha, sscha.Ensemble
+import tdscha.DynamicalLanczos as DL
+import tdscha.QSpaceLanczos as QL
+
+
+N_STEPS = 30
+T = 250
+NQIRR = 3
+
+IQ = 1
+NMODE = 0
+
+DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                        '..', 'test_julia', 'data')
+
+
+def test_qspace_noinvq(test_v3 = True, test_v4=True, verbose=False):
+    # Load the original dynamical matrix
+    dyn = CC.Phonons.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+
+    # Load the ensemble
+    ensemble = sscha.Ensemble.Ensemble(dyn, T)
+    ensemble.load_bin(DATA_DIR, 1)
+
+    # Compute the free energy hessian
+    hessian = dyn.Copy()
+    if test_v3:
+        hessian = ensemble.get_free_energy_hessian(include_v4 = test_v4)
+
+    # Initialize the q-space and real-space Lanczos solvers
+    q_lanczos = QL.QSpaceLanczos(ensemble)
+    q_lanczos.ignore_v3 = not test_v3
+    if test_v3:
+        q_lanczos.ignore_v4 = not test_v4
+    else:
+        q_lanczos.ignore_v4 = True
+    q_lanczos.init()
+
+    r_lanczos = DL.Lanczos(ensemble)
+    r_lanczos.ignore_v3 = not test_v3
+    if test_v3:
+        r_lanczos.ignore_v4 = not test_v4
+    else:
+        r_lanczos.ignore_v4 = True
+    r_lanczos.init()
+
+    # Run the q-space Lanczos solver
+    q_lanczos.prepare_mode_q(IQ, NMODE)
+
+    # Get the equivalence with r_lanczos mode
+    mode_id = np.argmin(np.abs(r_lanczos.w - q_lanczos.w_q[NMODE, IQ]))
+    r_lanczos.prepare_mode(mode_id)
+
+    # Run the two lanczos
+    q_lanczos.run_FT(N_STEPS)
+    r_lanczos.run_FT(N_STEPS)
+
+    # Get the static frequencies
+    gf_q = q_lanczos.get_green_function_continued_fraction(np.array([0.0]), smearing=0.0, use_terminator=False)
+    gf_r = r_lanczos.get_green_function_continued_fraction(np.array([0.0]), smearing=0.0, use_terminator=False)
+
+    w_qlanczos = np.sqrt(np.abs(np.real(1.0/gf_q[0]))) * np.sign(np.real(gf_q[0]))
+    w_rlanczos = np.sqrt(np.abs(np.real(1.0/gf_r[0]))) * np.sign(np.real(gf_r[0]))
+
+    if verbose:
+        print()
+        print(" ---------- TEST SINGLE Q RESULTS ---------- ")
+        print(" v3 = ", test_v3, " v4 = ", test_v4)
+        print(f"Q-space Lanczos frequency: {w_qlanczos*CC.Units.RY_TO_CM:.6f} cm-1")
+        print(f"Real-space Lanczos frequency: {w_rlanczos*CC.Units.RY_TO_CM:.6f} cm-1")
+
+    # Compute the free energy hessian frequency
+    Dmat = hessian.dynmats[IQ]
+    m_sqrt = np.sqrt(q_lanczos.m)
+    Dmat /= np.outer(m_sqrt, m_sqrt)
+    w2_hessian = q_lanczos.pols_q[:, NMODE, IQ] @ Dmat @ q_lanczos.pols_q[:, NMODE, IQ]
+    w_hessian = np.real(np.sqrt(np.abs(w2_hessian)) * np.sign(w2_hessian))
+
+    if verbose:
+        print(f"Free energy hessian frequency: {w_hessian*CC.Units.RY_TO_CM:.6f} cm-1")
+
+    # Compare the results
+    assert np.isclose(w_qlanczos, w_rlanczos, atol=1e-5), f"Q-space and real-space Lanczos frequencies differ: {w_qlanczos*CC.Units.RY_TO_CM:.6f} cm-1 vs {w_rlanczos*CC.Units.RY_TO_CM:.6f} cm-1"
+    assert np.isclose(w_qlanczos, w_hessian, atol=1e-5), f"Q-space Lanczos and free energy hessian frequencies differ: {w_qlanczos*CC.Units.RY_TO_CM:.6f} cm-1 vs {w_hessian*CC.Units.RY_TO_CM:.6f} cm-1"
+    
+
+    
+    
+if __name__ == "__main__":
+    test_qspace_noinvq(test_v3=True, test_v4=False, verbose=True)
+    test_qspace_noinvq(test_v3=True, test_v4=True, verbose=True)
diff --git a/tests/test_qspace/test_raman_modulus.py b/tests/test_qspace/test_raman_modulus.py
new file mode 100644
index 00000000..8059005e
--- /dev/null
+++ b/tests/test_qspace/test_raman_modulus.py
@@ -0,0 +1,239 @@
+"""
+Test that compares the perturbation modulus for Raman between Q-space Lanczos
+and real-space Lanczos (both using Wigner mode) on the SnTe-like system.
+
+The perturbation modulus should match exactly (within numerical tolerance).
+"""
+from __future__ import print_function
+
+import numpy as np
+import os
+import sys
+import pytest
+
+import cellconstructor.Phonons as CC
+import cellconstructor.Methods
+import cellconstructor.symmetries
+
+import sscha.Ensemble
+import tdscha.DynamicalLanczos as DL
+
+from tdscha.Parallel import pprint as print
+
+# Data directory (reuse test_julia data)
+DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                        '..', 'test_julia', 'data')
+
+# Temperature
+T = 250
+NQIRR = 3
+
+
+def _create_dummy_raman_tensor(dyn):
+    """Create a dummy Raman tensor for testing."""
+    nat = dyn.structure.N_atoms
+    # Create a simple Raman tensor: R_{αβ,i} = δ_{αβ} * sign(i) where i is atom index
+    # For 2 atoms: atom 0 gets +I, atom 1 gets -I (satisfies translation invariance)
+    raman_tensor = np.zeros((3, 3, 3 * nat))
+    for i in range(nat):
+        sign = 1.0 if i % 2 == 0 else -1.0
+        for alpha in range(3):
+            raman_tensor[alpha, alpha, 3*i + alpha] = sign
+    return raman_tensor
+
+
+def _setup_realspace_lanczos_raman(pol_vec_in, pol_vec_out, unpolarized=None):
+    """Set up a real-space Lanczos calculation with Raman perturbation."""
+    dyn = CC.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    
+    ens = sscha.Ensemble.Ensemble(dyn, T)
+    ens.load_bin(DATA_DIR, 1)
+    
+    # Add dummy Raman tensor if missing (AFTER loading binary data)
+    # Lanczos uses ens.current_dyn, not ens.dyn_0
+    if ens.current_dyn.raman_tensor is None:
+        ens.current_dyn.raman_tensor = _create_dummy_raman_tensor(ens.current_dyn)
+
+    lanc = DL.Lanczos(ens, lo_to_split=None)
+    lanc.ignore_harmonic = False
+    lanc.ignore_v3 = False
+    lanc.ignore_v4 = False
+    lanc.use_wigner = True
+    lanc.mode = DL.MODE_FAST_SERIAL
+    lanc.init(use_symmetries=True)
+    
+    if unpolarized is None:
+        lanc.prepare_raman(pol_vec_in=pol_vec_in, pol_vec_out=pol_vec_out)
+    else:
+        lanc.prepare_raman(unpolarized=unpolarized)
+    
+    return lanc
+
+
+def _setup_qspace_lanczos_raman(pol_vec_in, pol_vec_out, unpolarized=None):
+    """Set up a Q-space Lanczos calculation with Raman perturbation."""
+    try:
+        import tdscha.QSpaceLanczos as QL
+    except ImportError:
+        pytest.skip("Julia not available for QSpaceLanczos")
+
+    if not QL.__JULIA_EXT__:
+        pytest.skip("Julia extension not loaded")
+
+    dyn = CC.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+    
+    ens = sscha.Ensemble.Ensemble(dyn, T)
+    ens.load_bin(DATA_DIR, 1)
+    
+    # Add dummy Raman tensor if missing (AFTER loading binary data)
+    # Lanczos uses ens.current_dyn, not ens.dyn_0
+    if ens.current_dyn.raman_tensor is None:
+        ens.current_dyn.raman_tensor = _create_dummy_raman_tensor(ens.current_dyn)
+
+    qlanc = QL.QSpaceLanczos(ens, lo_to_split=None)
+    qlanc.ignore_harmonic = False
+    qlanc.ignore_v3 = False
+    qlanc.ignore_v4 = False
+    qlanc.init(use_symmetries=True)
+    
+    if unpolarized is None:
+        qlanc.prepare_raman(pol_vec_in=pol_vec_in, pol_vec_out=pol_vec_out)
+    else:
+        qlanc.prepare_raman(unpolarized=unpolarized)
+    
+    return qlanc
+
+
+def test_raman_perturbation_modulus(verbose=True):
+    """
+    Compare perturbation_modulus after prepare_raman between real-space and q-space.
+    """
+    try:
+        import tdscha.QSpaceLanczos as QL
+        if not QL.__JULIA_EXT__:
+            pytest.skip("Julia extension not loaded")
+    except ImportError:
+        pytest.skip("Julia not available for QSpaceLanczos")
+
+    # Test simple polarized Raman
+    test_cases = [
+        # (pol_in, pol_out, description)
+        (np.array([1., 0., 0.]), np.array([1., 0., 0.]), "xx"),
+        (np.array([1., 0., 0.]), np.array([0., 1., 0.]), "xy"),
+        (np.array([0., 1., 0.]), np.array([0., 1., 0.]), "yy"),
+        (np.array([0., 0., 1.]), np.array([0., 0., 1.]), "zz"),
+    ]
+    
+    for pol_in, pol_out, desc in test_cases:
+        if verbose:
+            print(f"\n=== Testing Raman perturbation modulus {desc} ===")
+        
+        lanc_real = _setup_realspace_lanczos_raman(pol_in, pol_out)
+        lanc_qspace = _setup_qspace_lanczos_raman(pol_in, pol_out)
+        
+        mod_real = lanc_real.perturbation_modulus
+        mod_qspace = lanc_qspace.perturbation_modulus
+        
+        if verbose:
+            print(f"Real-space perturbation_modulus = {mod_real:.12e}")
+            print(f"Q-space perturbation_modulus    = {mod_qspace:.12e}")
+            print(f"Relative difference = {abs(mod_real - mod_qspace) / max(mod_real, mod_qspace):.2e}")
+        
+        # The modulus should agree within 1e-10 relative tolerance
+        rtol = 1e-10
+        atol = 1e-14
+        np.testing.assert_allclose(mod_qspace, mod_real, rtol=rtol, atol=atol,
+                                   err_msg=f"Raman perturbation modulus mismatch for {desc}")
+    
+    # Test unpolarized components (with prefactors)
+    for unpolarized in range(7):
+        if verbose:
+            print(f"\n=== Testing unpolarized Raman component {unpolarized} ===")
+        
+        lanc_real = _setup_realspace_lanczos_raman(None, None, unpolarized=unpolarized)
+        lanc_qspace = _setup_qspace_lanczos_raman(None, None, unpolarized=unpolarized)
+        
+        mod_real = lanc_real.perturbation_modulus
+        mod_qspace = lanc_qspace.perturbation_modulus
+        
+        if verbose:
+            print(f"Real-space perturbation_modulus = {mod_real:.12e}")
+            print(f"Q-space perturbation_modulus    = {mod_qspace:.12e}")
+            print(f"Relative difference = {abs(mod_real - mod_qspace) / max(mod_real, mod_qspace):.2e}")
+        
+        # The modulus should agree within 1e-10 relative tolerance
+        rtol = 1e-10
+        atol = 1e-14
+        np.testing.assert_allclose(mod_qspace, mod_real, rtol=rtol, atol=atol,
+                                   err_msg=f"Unpolarized Raman component {unpolarized} modulus mismatch")
+    
+    if verbose:
+        print("\n=== All Raman perturbation modulus tests passed ===")
+
+
+def test_unpolarized_raman_modulus(verbose=True):
+    """
+    Compare perturbation_modulus after prepare_unpolarized_raman between real-space and q-space.
+    """
+    try:
+        import tdscha.QSpaceLanczos as QL
+        if not QL.__JULIA_EXT__:
+            pytest.skip("Julia extension not loaded")
+    except ImportError:
+        pytest.skip("Julia not available for QSpaceLanczos")
+
+    for index in range(7):
+        if verbose:
+            print(f"\n=== Testing prepare_unpolarized_raman index {index} ===")
+        
+        dyn = CC.Phonons(os.path.join(DATA_DIR, "dyn_gen_pop1_"), NQIRR)
+        
+        ens = sscha.Ensemble.Ensemble(dyn, T)
+        ens.load_bin(DATA_DIR, 1)
+        
+        # Add dummy Raman tensor if missing (AFTER loading binary data)
+        # Lanczos uses ens.current_dyn, not ens.dyn_0
+        if ens.current_dyn.raman_tensor is None:
+            ens.current_dyn.raman_tensor = _create_dummy_raman_tensor(ens.current_dyn)
+
+        # Real-space
+        lanc_real = DL.Lanczos(ens, lo_to_split=None)
+        lanc_real.ignore_harmonic = False
+        lanc_real.ignore_v3 = False
+        lanc_real.ignore_v4 = False
+        lanc_real.use_wigner = True
+        lanc_real.mode = DL.MODE_FAST_SERIAL
+        lanc_real.init(use_symmetries=True)
+        lanc_real.prepare_unpolarized_raman(index=index)
+        
+        # Q-space
+        qlanc = QL.QSpaceLanczos(ens, lo_to_split=None)
+        qlanc.ignore_harmonic = False
+        qlanc.ignore_v3 = False
+        qlanc.ignore_v4 = False
+        qlanc.init(use_symmetries=True)
+        qlanc.prepare_unpolarized_raman(index=index)
+        
+        mod_real = lanc_real.perturbation_modulus
+        mod_qspace = qlanc.perturbation_modulus
+        
+        if verbose:
+            print(f"Real-space perturbation_modulus = {mod_real:.12e}")
+            print(f"Q-space perturbation_modulus    = {mod_qspace:.12e}")
+            print(f"Relative difference = {abs(mod_real - mod_qspace) / max(mod_real, mod_qspace):.2e}")
+        
+        # The modulus should agree within 1e-10 relative tolerance
+        rtol = 1e-10
+        atol = 1e-14
+        np.testing.assert_allclose(mod_qspace, mod_real, rtol=rtol, atol=atol,
+                                   err_msg=f"prepare_unpolarized_raman index {index} modulus mismatch")
+    
+    if verbose:
+        print("\n=== All prepare_unpolarized_raman tests passed ===")
+
+
+if __name__ == "__main__":
+    # Run the tests with verbose output
+    test_raman_perturbation_modulus(verbose=True)
+    test_unpolarized_raman_modulus(verbose=True)
+    print("\nAll tests passed.")
\ No newline at end of file