From 3b9777652a48795423ec565d080a64b82d66eda2 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 26 Oct 2023 19:03:13 -0700
Subject: [PATCH 01/44] using family information as linearopertator

---
 glmnet/base.py | 23 +++--------------
 glmnet/cox.py  | 47 +++++++++++++++++++++++++++++++++--
 glmnet/glm.py  | 67 +++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 104 insertions(+), 33 deletions(-)

diff --git a/glmnet/base.py b/glmnet/base.py
index 950ca0d..e9f7e94 100644
--- a/glmnet/base.py
+++ b/glmnet/base.py
@@ -126,27 +126,10 @@ def quadratic_form(self,
             G_sum = n
             
         else:
-            G = np.asarray(G)
-            if G.ndim == 2:
-                if np.linalg.norm(G-G.T)/np.linalg.norm(G) > 1e-3:
-                    warnings.warn('G should be symmetric, using (G+G.T)/2')
-                    G = (G + G.T) / 2
-                GX = G @ X_R
-                X1_block = self.X.T @ G.sum(0)
-            elif G.ndim == 1:
-                if not np.all(G >= 0):
-                    raise ValueError('weights should be non-negative')
-                if not scipy.sparse.issparse(self.X):
-                    GX = G[:,None] * X_R
-                else:
-                    GX = scipy.sparse.diags(G) @ X_R
-                X1_block = self.X.T @ G
-            else:
-                raise ValueError("G should be 1-dim (treated as diagonal) or 2-dim") 
-            if scipy.sparse.issparse(GX):
-                GX = GX.toarray()
+            GX = G @ X_R
+            G1 = G @ np.ones(G.shape[0])
             XX_block = self.X.T @ GX
-            G_sum = G.sum()
+            G_sum = G1.sum()
             
         if scipy.sparse.issparse(XX_block):
             XX_block = XX_block.toarray()
diff --git a/glmnet/cox.py b/glmnet/cox.py
index 7d55ee0..643c1c0 100644
--- a/glmnet/cox.py
+++ b/glmnet/cox.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pandas as pd
 
+from scipy.stats import norm as normal_dbn
+
 from sklearn.utils import check_X_y
 from sklearn.base import BaseEstimator
 
@@ -41,7 +43,8 @@ def update(self,
             self.link_parameter = self.linear_predictor
         else:
             self.link_parameter = self.linear_predictor + offset
-
+        self.mean_parameter = self.link_parameter
+        
         # shorthand
         self.mu = self.link_parameter
         self.eta = self.linear_predictor
@@ -121,6 +124,7 @@ def null_fit(self,
     def get_null_deviance(self,
                           y,
                           sample_weight,
+                          offset, # ignored for Cox
                           fit_intercept):
         mu0 = self.null_fit(y, sample_weight, fit_intercept)
         return mu0, self.deviance(y, mu0, sample_weight)
@@ -153,6 +157,11 @@ def get_response_and_weights(self,
 
         return pseudo_response, newton_weights
     
+    def information(self,
+                    state,
+                    sample_weight):
+        return self._coxdev.information(state.link_parameter,
+                                        sample_weight)
 
 @dataclass
 class CoxLM(GLM):
@@ -181,6 +190,41 @@ def _check(self,
                          check=check,
                          multi_output=True)
 
+    def _summarize(self,
+                   exclude,
+                   dispersion,
+                   sample_weight,
+                   X_shape):
+
+        # IRLS used normalized weights,
+        # this unnormalizes them...
+
+        unscaled_precision_ = self.design_.quadratic_form(self._information)
+        
+        keep = np.ones(unscaled_precision_.shape[0]-1, bool)
+        if exclude is not []:
+            keep[exclude] = 0
+        keep = np.hstack([self.fit_intercept, keep]).astype(bool)
+        covariance_ = dispersion * np.linalg.inv(unscaled_precision_[keep][:,keep])
+
+        SE = np.sqrt(np.diag(covariance_)) 
+        index = self.feature_names_in_
+        if self.fit_intercept:
+            coef = np.hstack([self.intercept_, self.coef_])
+            T = np.hstack([self.intercept_ / SE[0], self.coef_ / SE[1:]])
+            index = ['intercept'] + index
+        else:
+            coef = self.coef_
+            T = self.coef_ / SE
+
+        summary_ = pd.DataFrame({'coef':coef,
+                                 'std err': SE,
+                                 'z': T,
+                                 'P>|z|': 2 * normal_dbn.sf(np.fabs(T))},
+                                index=index)
+        return covariance_, summary_
+
+
 @dataclass
 class RegCoxLM(RegGLM):
     
@@ -236,7 +280,6 @@ def _get_family_spec(self,
     def _get_initial_state(self,
                            X,
                            y,
-                           sample_weight,
                            exclude):
 
         n, p = X.shape
diff --git a/glmnet/glm.py b/glmnet/glm.py
index 5be19c6..278e308 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -10,6 +10,7 @@
 import pandas as pd
 
 import scipy.sparse
+from scipy.sparse.linalg import LinearOperator
 from scipy.stats import norm as normal_dbn
 from scipy.stats import t as t_dbn
 
@@ -174,6 +175,46 @@ def _accuracy_score(y, yhat, sample_weight): # for binary data classifying at p=
 
         return scorers_
 
+    def information(self,
+                    state,
+                    sample_weight=None):
+
+        family = self.base
+
+        # some checks for NAs/zeros
+        varmu = family.variance(state.mu)
+        if np.any(np.isnan(varmu)): raise ValueError("NAs in V(mu)")
+
+        if np.any(varmu == 0): raise ValueError("0s in V(mu)")
+
+        dmu_deta = family.link.inverse_deriv(state.link_parameter)
+        if np.any(np.isnan(dmu_deta)): raise ValueError("NAs in d(mu)/d(eta)")
+
+        W = dmu_deta**2 / varmu
+        if sample_weight is not None:
+            W *= sample_weight
+            
+        n = W.shape[0]
+        W = W.reshape(-1)
+        return DiagonalOperator(W)
+
+@dataclass
+class DiagonalOperator(LinearOperator):
+
+    weights: np.ndarray
+
+    def __post_init__(self):
+        self.weights = np.asarray(self.weights).reshape(-1)
+        n = self.weights.shape[0]
+        self.shape = (n, n)
+
+    def _matvec(self, arg):
+        return self.weights * arg.reshape(-1)
+
+    def _adjoint(self, arg):
+        return self._matvec(arg)
+    
+
 @add_dataclass_docstring
 @dataclass
 class GLMControl(object):
@@ -452,17 +493,20 @@ def obj_function(response,
         (converged,
          boundary,
          state,
-         self._final_weights) = IRLS(regularizer,
-                                     self._family,
-                                     design,
-                                     response,
-                                     offset,
-                                     normed_sample_weight,
-                                     state,
-                                     obj_function,
-                                     self.control)
-
+         _) = IRLS(regularizer,
+                   self._family,
+                   design,
+                   response,
+                   offset,
+                   normed_sample_weight,
+                   state,
+                   obj_function,
+                   self.control)
 
+        if self.summarize:
+            self._information = self._family.information(state,
+                                                         sample_weight)
+            
         # checks on convergence and fitted values
         if not converged:
             if self.control.logging: logging.debug("Fitting IRLS: algorithm did not converge")
@@ -622,7 +666,8 @@ def _summarize(self,
 
         # IRLS used normalized weights,
         # this unnormalizes them...
-        unscaled_precision_ = self.design_.quadratic_form(self._final_weights * sample_weight.sum()) 
+
+        unscaled_precision_ = self.design_.quadratic_form(self._information)
         
         keep = np.ones(unscaled_precision_.shape[0]-1, bool)
         if exclude is not []:

From c3feb6c974f7afb0f288cbb1938c3c6feb424ed5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 26 Oct 2023 19:27:41 -0700
Subject: [PATCH 02/44] initial commit of inference

---
 glmnet/inference.py | 1088 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1088 insertions(+)
 create mode 100644 glmnet/inference.py

diff --git a/glmnet/inference.py b/glmnet/inference.py
new file mode 100644
index 0000000..b56312d
--- /dev/null
+++ b/glmnet/inference.py
@@ -0,0 +1,1088 @@
+"""
+This module contains the core code needed for post selection
+inference based on affine selection procedures as
+described in the papers `Kac Rice`_, `Spacings`_, `covTest`_
+and `post selection LASSO`_.
+
+.. _covTest: http://arxiv.org/abs/1301.7161
+.. _Kac Rice: http://arxiv.org/abs/1308.3020
+.. _Spacings: http://arxiv.org/abs/1401.3889
+.. _post selection LASSO: http://arxiv.org/abs/1311.6238
+.. _sample carving: http://arxiv.org/abs/1410.2597
+
+"""
+
+from warnings import warn
+from copy import copy
+
+import numpy as np
+
+from .discrete_family import discrete_family
+from mpmath import mp
+
+WARNINGS = False
+
+class constraints(object):
+
+    r"""
+    This class is the core object for affine selection procedures.
+    It is meant to describe sets of the form $C$
+    where
+
+    .. math::
+
+       C = \left\{z: Az\leq b \right \}
+
+    Its main purpose is to consider slices through $C$
+    and the conditional distribution of a Gaussian $N(\mu,\Sigma)$
+    restricted to such slices.
+
+    Notes
+    -----
+
+    In this parameterization, the parameter `self.mean` corresponds
+    to the *reference measure* that is being truncated. It is not the
+    mean of the truncated Gaussian.
+
+    >>> positive = constraints(-np.identity(2), np.zeros(2))
+    >>> Y = np.array([3, 4.4])
+    >>> eta = np.array([1, 1], np.float)
+    >>> list(positive.interval(eta, Y))  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    [4.62...,  10.17...]
+    >>> positive.pivot(eta, Y) # doctest: +ELLIPSIS 
+    5.187...-07
+    >>> list(positive.bounds(eta, Y)) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    [1.399..., 7.4..., inf, 1.414...]  
+    >>> 
+
+    """
+
+    def __init__(self, 
+                 linear_part,
+                 offset,
+                 covariance=None,
+                 mean=None,
+                 rank=None):
+        r"""
+        Create a new inequality. 
+
+        Parameters
+        ----------
+
+        linear_part : np.float((q,p))
+            The linear part, $A$ of the affine constraint
+            $\{z:Az \leq b\}$. 
+
+        offset: np.float(q)
+            The offset part, $b$ of the affine constraint
+            $\{z:Az \leq b\}$. 
+
+        covariance : np.float((p,p))
+            Covariance matrix of Gaussian distribution to be 
+            truncated. Defaults to `np.identity(self.dim)`.
+
+        mean : np.float(p)
+            Mean vector of Gaussian distribution to be 
+            truncated. Defaults to `np.zeros(self.dim)`.
+
+        rank : int
+            If not None, this should specify
+            the rank of the covariance matrix. Defaults
+            to self.dim.
+
+        """
+
+        self.linear_part, self.offset = \
+            linear_part, np.asarray(offset)
+        
+        if self.linear_part.ndim == 2:
+            self.dim = self.linear_part.shape[1]
+        else:
+            self.dim = self.linear_part.shape[0]
+
+        if rank is None:
+            self.rank = self.dim
+        else:
+            self.rank = rank
+
+        if covariance is None:
+            covariance = np.identity(self.dim)
+        self.covariance = covariance
+
+        if mean is None:
+            mean = np.zeros(self.dim)
+        self.mean = mean
+
+    def __copy__(self):
+        r"""
+        A copy of the constraints.
+
+        Also copies _sqrt_cov, _sqrt_inv if attributes are present.
+        """
+        con = constraints(copy(self.linear_part),
+                          copy(self.offset),
+                          mean=copy(self.mean),
+                          covariance=copy(self.covariance),
+                          rank=self.rank)
+        if hasattr(self, "_sqrt_cov"):
+            con._sqrt_cov = self._sqrt_cov.copy()
+            con._sqrt_inv = self._sqrt_inv.copy()
+            con._rowspace = self._rowspace.copy()
+        return con
+
+    def __call__(self, Y, tol=1.e-3):
+        r"""
+        Check whether Y satisfies the linear
+        inequality constraints.
+        >>> A = np.array([[1., -1.], [1., -1.]])
+        >>> B = np.array([1., 1.])
+        >>> con = constraints(A, B)
+        >>> Y = np.array([-1., 1.])
+        >>> con(Y)
+        True
+        """
+        V1 = self.value(Y)
+        return np.all(V1 < tol * np.fabs(V1).max(0))
+
+    def value(self, Y):
+        r"""
+        Compute $\max(Ay-b)$.
+        """
+        return (self.linear_part.dot(Y) - self.offset).max()
+
+    def conditional(self,
+                    linear_part,
+                    value,
+                    rank=None):
+        """
+        Return an equivalent constraint 
+        after having conditioned on a linear equality.
+        
+        Let the inequality constraints be specified by
+        `(A,b)` and the equality constraints be specified
+        by `(C,d)`. We form equivalent inequality constraints by 
+        considering the residual
+
+        .. math::
+           
+           AY - E(AY|CY=d)
+
+        Parameters
+        ----------
+
+        linear_part : np.float((k,q))
+             Linear part of equality constraint, `C` above.
+
+        value : np.float(k)
+             Value of equality constraint, `b` above.
+
+        rank : int
+            If not None, this should specify
+            the rank of `linear_part`. Defaults
+            to `min(k,q)`.
+
+        Returns
+        -------
+
+        conditional_con : `constraints`
+             Affine constraints having applied equality constraint.
+
+        """
+
+        S = self.covariance
+        C, d = linear_part, value
+
+        M1 = S.dot(C.T)
+        M2 = C.dot(M1)
+
+        if M2.shape:
+            M2i = np.linalg.pinv(M2)
+            delta_cov = M1.dot(M2i.dot(M1.T))
+            delta_mean = M1.dot(M2i.dot(C.dot(self.mean) - d))
+        else:
+            delta_cov = np.multiply.outer(M1, M1) / M2
+            delta_mean = M1 * (C.dot(self.mean) - d) / M2
+
+        if rank is None:
+            if len(linear_part.shape) == 2:
+                rank = min(linear_part.shape)
+            else:
+                rank = 1
+
+        return constraints(self.linear_part,
+                           self.offset,
+                           covariance=self.covariance - delta_cov,
+                           mean=self.mean - delta_mean,
+                           rank=self.rank - rank)
+
+    def bounds(self, direction_of_interest, Y):
+        r"""
+        For a realization $Y$ of the random variable $N(\mu,\Sigma)$
+        truncated to $C$ specified by `self.constraints` compute
+        the slice of the inequality constraints in a 
+        given direction $\eta$.
+
+        Parameters
+        ----------
+
+        direction_of_interest: np.float
+            A direction $\eta$ for which we may want to form 
+            selection intervals or a test.
+
+        Y : np.float
+            A realization of $N(\mu,\Sigma)$ where 
+            $\Sigma$ is `self.covariance`.
+
+        Returns
+        -------
+
+        L : np.float
+            Lower truncation bound.
+
+        Z : np.float
+            The observed $\eta^TY$
+
+        U : np.float
+            Upper truncation bound.
+
+        S : np.float
+            Standard deviation of $\eta^TY$.
+
+        
+        """
+        return interval_constraints(self.linear_part,
+                                    self.offset,
+                                    self.covariance,
+                                    Y,
+                                    direction_of_interest)
+
+    def pivot(self, 
+              direction_of_interest, 
+              Y,
+              null_value=None,
+              alternative='greater'):
+        r"""
+        For a realization $Y$ of the random variable $N(\mu,\Sigma)$
+        truncated to $C$ specified by `self.constraints` compute
+        the slice of the inequality constraints in a 
+        given direction $\eta$ and test whether 
+        $\eta^T\mu$ is greater then 0, less than 0 or equal to 0.
+
+        Parameters
+        ----------
+
+        direction_of_interest: np.float
+            A direction $\eta$ for which we may want to form 
+            selection intervals or a test.
+
+        Y : np.float
+            A realization of $N(0,\Sigma)$ where 
+            $\Sigma$ is `self.covariance`.
+
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+
+        Returns
+        -------
+
+        P : np.float
+            $p$-value of corresponding test.
+
+        Notes
+        -----
+
+        All of the tests are based on the exact pivot $F$ given
+        by the truncated Gaussian distribution for the
+        given direction $\eta$. If the alternative is 'greater'
+        then we return $1-F$; if it is 'less' we return $F$
+        and if it is 'twosided' we return $2 \min(F,1-F)$.
+
+        """
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+        L, Z, U, S = self.bounds(direction_of_interest, Y)
+
+        if null_value is None:
+            meanZ = (direction_of_interest * self.mean).sum()
+        else:
+            meanZ = null_value
+
+        P = truncnorm_cdf((Z-meanZ)/S, (L-meanZ)/S, (U-meanZ)/S)
+
+        if alternative == 'greater':
+            return 1 - P
+        elif alternative == 'less':
+            return P
+        else:
+            return max(2 * min(P, 1-P), 0)
+
+    def interval(self, direction_of_interest, Y,
+                 alpha=0.05, UMAU=False):
+        r"""
+        For a realization $Y$ of the random variable $N(\mu,\Sigma)$
+        truncated to $C$ specified by `self.constraints` compute
+        the slice of the inequality constraints in a 
+        given direction $\eta$ and test whether 
+        $\eta^T\mu$ is greater then 0, less than 0 or equal to 0.
+        
+        Parameters
+        ----------
+
+        direction_of_interest: np.float
+
+            A direction $\eta$ for which we may want to form 
+            selection intervals or a test.
+
+        Y : np.float
+
+            A realization of $N(0,\Sigma)$ where 
+            $\Sigma$ is `self.covariance`.
+
+        alpha : float
+
+            What level of confidence?
+
+        UMAU : bool
+
+            Use the UMAU intervals?
+
+        Returns
+        -------
+
+        [U,L] : selection interval
+
+        
+        """
+        ## THE DOCUMENTATION IS NOT GOOD ! HAS TO BE CHANGED !
+
+        return selection_interval( \
+            self.linear_part,
+            self.offset,
+            self.covariance,
+            Y,
+            direction_of_interest,
+            alpha=alpha,
+            UMAU=UMAU)
+
+    def covariance_factors(self, force=True):
+        """
+        Factor `self.covariance`,
+        finding a possibly non-square square-root.
+
+        Parameters
+        ----------
+
+        force : bool
+            If True, force a recomputation of
+            the covariance. If not, assumes that
+            covariance has not changed.
+
+        """
+        if not hasattr(self, "_sqrt_cov") or force:
+
+            # original matrix is np.dot(U, (D**2 * U).T)
+
+            U, D = np.linalg.svd(self.covariance)[:2]
+            D = np.sqrt(D[:self.rank])
+            U = U[:,:self.rank]
+        
+            self._sqrt_cov = U * D[None,:]
+            self._sqrt_inv = (U / D[None,:]).T
+            self._rowspace = U
+
+        return self._sqrt_cov, self._sqrt_inv, self._rowspace
+
+    def whiten(self):
+        """
+
+        Return a whitened version of constraints in a different
+        basis, and a change of basis matrix.
+
+        If `self.covariance` is rank deficient, the change-of
+        basis matrix will not be square.
+
+        Returns
+        -------
+
+        inverse_map : callable
+
+        forward_map : callable
+
+        white_con : `constraints`
+
+        """
+        sqrt_cov, sqrt_inv = self.covariance_factors()[:2]
+
+        new_A = self.linear_part.dot(sqrt_cov)
+        den = np.sqrt((new_A**2).sum(1))
+        new_b = self.offset - self.linear_part.dot(self.mean)
+        new_con = constraints(new_A / den[:,None], new_b / den)
+
+        mu = self.mean.copy()
+
+        def inverse_map(Z): 
+            if Z.ndim == 2:
+                return sqrt_cov.dot(Z) + mu[:,None]
+            else:
+                return sqrt_cov.dot(Z) + mu
+
+        forward_map = lambda W: sqrt_inv.dot(W - mu)
+
+        return inverse_map, forward_map, new_con
+
+    def project_rowspace(self, direction):
+        """
+        Project a vector onto rowspace
+        of the covariance.
+        """
+        rowspace = self.covariance_factors()[-1]
+        return rowspace.dot(rowspace.T.dot(direction))
+
+    def solve(self, direction):
+        """
+        Compute the inverse of the covariance
+        times a direction vector.
+        """
+        sqrt_inv = self.covariance_factors()[1]
+        return sqrt_inv.T.dot(sqrt_inv.dot(direction))
+
+def stack(*cons):
+    """
+    Combine constraints into a large constaint
+    by intersection. 
+
+    Parameters
+    ----------
+
+    cons : [`selection.affine.constraints`_]
+         A sequence of constraints.
+
+    Returns
+    -------
+
+    intersection : `selection.affine.constraints`_
+
+    Notes
+    -----
+
+    Resulting constraint will have mean 0 and covariance $I$.
+
+    """
+    ineq, ineq_off = [], []
+    for con in cons:
+        ineq.append(con.linear_part)
+        ineq_off.append(con.offset)
+
+    intersection = constraints(np.vstack(ineq), 
+                               np.hstack(ineq_off))
+    return intersection
+
+def interval_constraints(support_directions, 
+                         support_offsets,
+                         covariance,
+                         observed_data, 
+                         direction_of_interest,
+                         tol = 1.e-4):
+    r"""
+    Given an affine constraint $\{z:Az \leq b \leq \}$ (elementwise)
+    specified with $A$ as `support_directions` and $b$ as
+    `support_offset`, a new direction of interest $\eta$, and
+    an `observed_data` is Gaussian vector $Z \sim N(\mu,\Sigma)$ 
+    with `covariance` matrix $\Sigma$, this
+    function returns $\eta^TZ$ as well as an interval
+    bounding this value. 
+
+    The interval constructed is such that the endpoints are 
+    independent of $\eta^TZ$, hence the $p$-value
+    of `Kac Rice`_
+    can be used to form an exact pivot.
+
+    Parameters
+    ----------
+
+    support_directions : np.float
+         Matrix specifying constraint, $A$.
+
+    support_offsets : np.float
+         Offset in constraint, $b$.
+
+    covariance : np.float
+         Covariance matrix of `observed_data`.
+
+    observed_data : np.float
+         Observations.
+
+    direction_of_interest : np.float
+         Direction in which we're interested for the
+         contrast.
+
+    tol : float
+         Relative tolerance parameter for deciding 
+         sign of $Az-b$.
+
+    Returns
+    -------
+
+    lower_bound : float
+
+    observed : float
+
+    upper_bound : float
+
+    sigma : float
+
+    """
+
+    # shorthand
+    A, b, S, X, w = (support_directions,
+                     support_offsets,
+                     covariance,
+                     observed_data,
+                     direction_of_interest)
+
+    U = A.dot(X) - b
+    if not np.all(U  < tol * np.fabs(U).max()) and WARNINGS:
+        warn('constraints not satisfied: %s' % repr(U))
+
+    Sw = S.dot(w)
+    sigma = np.sqrt((w*Sw).sum())
+    alpha = A.dot(Sw) / sigma**2
+    V = (w*X).sum() # \eta^TZ
+
+    # adding the zero_coords in the denominator ensures that
+    # there are no divide-by-zero errors in RHS
+    # these coords are never used in upper_bound or lower_bound
+
+    zero_coords = alpha == 0
+    RHS = (-U + V * alpha) / (alpha + zero_coords)
+    RHS[zero_coords] = np.nan
+
+    pos_coords = alpha > tol * np.fabs(alpha).max()
+    if np.any(pos_coords):
+        upper_bound = RHS[pos_coords].min()
+    else:
+        upper_bound = np.inf
+    neg_coords = alpha < -tol * np.fabs(alpha).max()
+    if np.any(neg_coords):
+        lower_bound = RHS[neg_coords].max()
+    else:
+        lower_bound = -np.inf
+
+    return lower_bound, V, upper_bound, sigma
+
+def selection_interval(support_directions, 
+                       support_offsets,
+                       covariance,
+                       noisy_observation,
+                       observation,
+                       direction_of_interest,
+                       percent_smoothing,
+                       tol = 1.e-4,
+                       level = 0.90,
+                       UMAU=True):
+    """
+    Given an affine in cone constraint $\{z:Az+b \leq 0\}$ (elementwise)
+    specified with $A$ as `support_directions` and $b$ as
+    `support_offset`, a new direction of interest $\eta$, and
+    `noisy_observation` is Gaussian vector $Z \sim N(\mu,\Sigma)$ 
+    with `covariance` matrix $\Sigma$, this
+    function returns a confidence interval
+    for $\eta^T\mu$.
+
+    Parameters
+    ----------
+
+    support_directions : np.float
+         Matrix specifying constraint, $A$.
+
+    support_offset : np.float
+         Offset in constraint, $b$.
+
+    covariance : np.float
+         Covariance matrix of `observed_data`.
+
+    noisy_observation : np.float
+         Observations.
+
+    observation : np.float
+         Observations.
+
+    direction_of_interest : np.float
+         Direction in which we're interested for the
+         contrast.
+
+    tol : float
+         Relative tolerance parameter for deciding 
+         sign of $Az-b$.
+
+    Returns
+    -------
+
+    confidence_interval : (float, float)
+
+    """
+
+    (lower_bound,
+     noisy_estimate,
+     upper_bound,
+     sigma) = interval_constraints(support_directions, 
+                                   support_offsets,
+                                   covariance,
+                                   noisy_observation,
+                                   direction_of_interest,
+                                   tol=tol)
+
+    grid = np.linspace(lower_bound - 4 * sigma, upper_bound + 4 * sigma, 801)
+    weight = normal_dbn.cdf((upper_bound - grid) / sigma) - normal_dbn.cdf((lower_bound - grid) / sigma)
+    estimate = (direction_of_interest * observation).sum()
+    return discrete_family(grid, weight).equal_tailed_interval(estimate,
+                                                               alpha=1-level)
+
+def find_root(f, y, lb, ub, tol=1e-6):
+    """
+    searches for solution to f(x) = y in (lb, ub), where 
+    f is a monotone decreasing function
+    """       
+    
+    # make sure solution is in range
+    a, b   = lb, ub
+    fa, fb = f(a), f(b)
+    
+    # assume a < b
+    if fa > y and fb > y:
+        while fb > y : 
+            b, fb = b + (b-a), f(b + (b-a))
+    elif fa < y and fb < y:
+        while fa < y : 
+            a, fa = a - (b-a), f(a - (b-a))
+    
+    
+    # determine the necessary number of iterations
+    try:
+        max_iter = int( np.ceil( ( np.log(tol) - np.log(b-a) ) / np.log(0.5) ) )
+    except OverflowError:
+        warnings.warn('root finding failed, returning np.nan')
+        return np.nan
+        
+
+    # bisect (slow but sure) until solution is obtained
+    for _ in range(max_iter):
+        try:
+            c, fc  = (a+b)/2, f((a+b)/2)
+            if fc > y: a = c
+            elif fc < y: b = c
+        except OverflowError:
+            warnings.warn('root finding failed, returning np.nan')
+            return np.nan
+
+    return c
+        
+class discrete_family(object):
+
+    def __init__(self, sufficient_stat, weights, theta=0.):
+        r"""
+        A  discrete 1-dimensional
+        exponential family with reference measure $\sum_j w_j \delta_{X_j}$
+        and sufficient statistic `sufficient_stat`. For any $\theta$, the distribution
+        is
+
+        .. math::
+        
+            P_{\theta} = \sum_{j} e^{\theta X_j - \Lambda(\theta)} w_j \delta_{X_j}
+
+        where
+
+        .. math::
+
+            \Lambda(\theta) = \log \left(\sum_j w_j e^{\theta X_j} \right).
+
+        Parameters
+        ----------
+
+        sufficient_stat : `np.float((n))`
+
+        weights : `np.float(n)`
+
+        Notes
+        -----
+
+        The weights are normalized to sum to 1.
+        """
+        xw = np.array(sorted(zip(sufficient_stat, weights)), np.float)
+        self._x = xw[:,0]
+        self._w = xw[:,1]
+        self._lw = np.log(xw[:,1])
+        self._w /= self._w.sum() # make sure they are a pmf
+        self.n = len(xw)
+        self._theta = np.nan
+        self.theta = theta
+
+    @property
+    def theta(self):
+        """
+        The natural parameter of the family.
+        """
+        return self._theta
+
+    @theta.setter
+    def theta(self, _theta):
+        if _theta != self._theta:
+            _thetaX = _theta * self.sufficient_stat + self._lw
+            _largest = _thetaX.max() - 5 # try to avoid over/under flow, 5 seems arbitrary
+            _exp_thetaX = np.exp(_thetaX - _largest)
+            _prod = _exp_thetaX
+            self._partition = np.sum(_prod)
+            self._pdf = _prod / self._partition
+            self._partition *= np.exp(_largest)
+        self._theta = _theta
+
+    @property
+    def partition(self):
+        r"""
+        Partition function at `self.theta`:
+
+        .. math::
+
+            \sum_j e^{\theta X_j} w_j
+        """
+        if hasattr(self, "_partition"):
+            return self._partition
+
+    @property
+    def sufficient_stat(self):
+        """
+        Sufficient statistics of the exponential family.
+        """
+        return self._x
+
+    @property
+    def weights(self):
+        """
+        Weights of the exponential family.
+        """
+        return self._w
+
+    def pdf(self, theta):
+        r"""
+        Density of $P_{\theta}$ with respect to $P_0$.
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        Returns
+        -------
+
+        pdf : np.float
+        
+        """
+        self.theta = theta # compute partition if necessary
+        return self._pdf
+ 
+    def cdf(self, theta, x=None, gamma=1):
+        r"""
+        The cumulative distribution function of $P_{\theta}$ with
+        weight `gamma` at `x`
+
+        .. math::
+
+            P_{\theta}(X < x) + \gamma * P_{\theta}(X = x)
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        x : float (optional)
+             Where to evaluate CDF.
+
+        gamma : float(optional)
+             Weight given at `x`.
+
+        Returns
+        -------
+
+        cdf : np.float
+
+        """
+        pdf = self.pdf(theta)
+        if x is None:
+            return np.cumsum(pdf) - pdf * (1 - gamma)
+        else:
+            tr = np.sum(pdf * (self.sufficient_stat < x)) 
+            if x in self.sufficient_stat:
+                tr += gamma * np.sum(pdf[np.where(self.sufficient_stat == x)])
+            return tr
+
+    def sf(self, theta, x=None, gamma=0, return_unnorm=False):
+        r"""
+        The complementary cumulative distribution function 
+        (i.e. survival function) of $P_{\theta}$ with
+        weight `gamma` at `x`
+
+        .. math::
+
+            P_{\theta}(X > x) + \gamma * P_{\theta}(X = x)
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        x : float (optional)
+             Where to evaluate SF.
+
+        gamma : float(optional)
+             Weight given at `x`.
+
+        Returns
+        -------
+
+        sf : np.float
+
+        """
+        pdf = self.pdf(theta)
+        if x is None:
+            return np.cumsum(pdf[::-1])[::-1] - pdf * (1 - gamma)
+        else:
+            tr = np.sum(pdf * (self.sufficient_stat > x)) 
+            if x in self.sufficient_stat:
+                tr += gamma * np.sum(pdf[np.where(self.sufficient_stat == x)])
+            return tr
+
+    def E(self, theta, func):
+        r"""
+        Expectation of `func` under $P_{\theta}$
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        func : callable
+             Assumed to be vectorized.
+
+        gamma : float(optional)
+             Weight given at `x`.
+
+        Returns
+        -------
+
+        E : np.float
+
+        """
+        T = np.asarray(func(self.sufficient_stat))
+        pdf_ = self.pdf(theta)
+
+        if T.ndim == 1:
+            return (T * pdf_).sum()
+        else:
+            val = (T * pdf_[:,None]).sum(0)
+            return val
+
+
+    def Var(self, theta, func):
+        r"""
+        Variance of `func` under $P_{\theta}$
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        func : callable
+             Assumed to be vectorized.
+
+        Returns
+        -------
+
+        var : np.float
+
+        """
+
+        mu = self.E(theta, func)
+        return self.E(theta, lambda x: (func(x)-mu)**2)
+        
+    def Cov(self, theta, func1, func2):
+        r"""
+        Covariance of `func1` and `func2` under $P_{\theta}$
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        func1, func2 : callable
+             Assumed to be vectorized.
+
+        Returns
+        -------
+
+        cov : np.float
+
+        """
+
+        mu1 = self.E(theta, func1)
+        mu2 = self.E(theta, func2)
+        return self.E(theta, lambda x: (func1(x)-mu1)*(func2(x)-mu2))
+
+    def equal_tailed_interval(self,
+                              observed,
+                              alpha=0.05,
+                              randomize=True,
+                              auxVar=None,
+                              tol=1e-6):
+        """
+        Form interval by inverting
+        equal-tailed test with $\alpha/2$ in each tail.
+
+        Parameters
+        ----------
+
+        observed : float
+             Observed sufficient statistic.
+
+        alpha : float (optional)
+             Size of two-sided test.
+
+        randomize : bool
+             Perform the randomized test (or conservative test).
+
+        auxVar : [None, float]
+             If randomizing and not None, use this
+             as the random uniform variate.
+
+        Returns
+        -------
+
+        lower, upper : float
+             Limits of confidence interval.
+
+        """
+
+        mu = self.E(self.theta, lambda x: x)
+        sigma  = np.sqrt(self.Var(self.theta, lambda x: x))
+        lb = mu - 20 * sigma
+        ub = mu + 20 * sigma
+        F = lambda th : self.cdf(th, observed)
+        L = find_root(F, 1.0 - 0.5 * alpha, lb, ub)
+        U = find_root(F, 0.5 * alpha, lb, ub)
+        return L, U
+
+    def equal_tailed_test(self,
+                          theta0,
+                          observed,
+                          alpha=0.05):
+        r"""
+        Equal tailed test of H_0:theta=theta_0
+
+        Parameters
+        ----------
+
+        theta0 : float
+             Natural parameter under null hypothesis.
+
+        observed : float
+             Observed sufficient statistic.
+
+        alpha : float (optional)
+             Size of two-sided test.
+
+        randomize : bool
+             Perform the randomized test (or conservative test).
+
+        Returns
+        -------
+
+        decision : np.bool
+             Is the null hypothesis $H_0:\theta=\theta_0$ rejected?
+   
+        Notes
+        -----
+
+        We need an auxiliary uniform variable to carry out the randomized test.
+        Larger auxVar corresponds to x being slightly "larger." It can be passed in,
+        or chosen at random. If randomize=False, we get a conservative test.
+        """
+
+        pval = self.cdf(theta0, observed, gamma=0.5)
+        return min(pval, 1-pval) < alpha
+
+    def MLE(self,
+            observed,
+            initial=0,
+            max_iter=20,
+            tol=1.e-4):
+
+        r"""
+        Compute the maximum likelihood estimator
+        based on observed sufficient statistic `observed`.
+
+        Parameters
+        ----------
+
+        observed : float
+             Observed value of sufficient statistic
+
+        initial : float
+             Starting point for Newton-Raphson
+
+        max_iter : int (optional)
+             Maximum number of Newton-Raphson iterations
+
+        tol : float (optional)
+             Tolerance parameter for stopping, based
+             on relative change in parameter estimate.
+             Iteration stops when the change is smaller
+             than `tol * max(1, np.fabs(cur_estimate))`.
+
+        Returns
+        -------
+
+        theta_hat : float
+             Maximum likelihood estimator.
+   
+        std_err : float
+             Estimated variance of `theta_hat` based
+             on inverse of variance of sufficient
+             statistic at `theta_hat`, i.e. the
+             observed Fisher information.
+
+        """
+
+        cur_est = initial
+
+        def first_two_moments(x):
+            return np.array([x, x**2]).T
+        
+        for i in range(max_iter):
+            cur_moments = self.E(cur_est, first_two_moments) # gradient and
+                                                             # Hessian of CGF
+                                                             # (almost)
+            grad, hessian = (cur_moments[0] - observed, 
+                             cur_moments[1] - cur_moments[0]**2)
+            next_est = cur_est - grad / hessian # newton step
+
+            if np.fabs(next_est - cur_est) < tol * max(1, np.fabs(cur_est)):
+                break
+            cur_est = next_est
+
+            if i == max_iter - 1:
+                warnings.warn('Newton-Raphson failed to converge after %d iterations' % max_iter)
+
+        cur_moments = self.E(cur_est, first_two_moments) # gradient and
+                                                         # Hessian of CGF
+                                                         # (almost)
+        grad, hessian = (cur_moments[0] - observed, 
+                         cur_moments[1] - cur_moments[0]**2)
+
+        return cur_est, 1. / hessian, grad
+

From 6da6b65633374d7eeb98c9221cbe8f95b7f9fc86 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 26 Oct 2023 20:38:14 -0700
Subject: [PATCH 03/44] retain a reference to state

---
 glmnet/glm.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/glmnet/glm.py b/glmnet/glm.py
index 5be19c6..4f3214f 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -473,9 +473,7 @@ def obj_function(response,
                                                state.mean_parameter,
                                                sample_weight) # not the normalized weights!
 
-        if offset is None:
-            offset = np.zeros(y.shape[0])
-
+        self.state_ = state
         self._set_coef_intercept(state)
 
         if (hasattr(self._family, "base") and 

From b34b385700555f7323a27bccba5b8b331f944573 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 26 Oct 2023 22:19:24 -0700
Subject: [PATCH 04/44] WIP: exact smoothed polyhedral

---
 glmnet/inference.py       | 54 ++++++++++++++++++++++++++++++++++++---
 glmnet/regularized_glm.py |  2 +-
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index b56312d..a9d5bc7 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -14,13 +14,59 @@
 
 from warnings import warn
 from copy import copy
-
 import numpy as np
 
-from .discrete_family import discrete_family
-from mpmath import mp
+def lasso_inference(glmnet,
+                    lambda_val,
+                    selection_data,
+                    full_data):
+
+    fixed_lambda = glmnet.fixed_lambda_estimator(lambda_val)
+    X_sel, Y_sel, weight_sel = selection_data
+    fixed_lambda.fit(X_sel, Y_sel, sample_weight=weight_sel)
+    FL = fixed_lambda # shorthand
+    
+    if (not np.all(FL.upper_limits == FL.control.big) or
+        not np.all(FL.lower_limits == -FL.control.big)):
+        raise NotImplementedError('upper/lower limits coming soon')
+
+    active_set = np.nonzero(FL.coef != 0)[0]
+    state = FL.state_
+    information = FL._family.information(state,
+                                         sample_weight)
 
-WARNINGS = False
+    Q_E = FL._design.quadratic_form(information,
+                                    columns=active_set)
+    
+    C_E = np.linalg.inv(Q_E)
+    keep = np.zeros(Q_E.shape[0])
+    penfac = FL.penalty_factors[active_set]
+    sings = np.sign(FL.coef_[active_set])
+    if FL.fit_intercept:
+        penfac = np.hstack([0, penfac])
+        signs = np.hstack([0, signs])
+        keep[0] = 1
+        keep[1 + active_set] = 1
+        Q_E = Q_E[keep]
+        stacked = np.hstack([state.intercept_,
+                             state.coef_[active_set]])
+    else:
+        keep[active_set] = 1
+        Q_E = Q_E[keep]
+        stacked = state.coef_[active_set]
+
+    C_E = np.linalg.inv(Q_E)
+    delta = np.zeros(Q_E.shape[0])
+    delta[1:] = lambda_val * penfac * signs
+    delta = C_E @ delta
+    noisy_mle = stacked + C_E @ neg_score
+
+    penalized = penfac > 0
+    sel_P = -signs[penalized][:,None] * np.eye(C_E.shape[0])[penalized]
+    con = constraints(sel_P,
+                      -signs[penalized] * delta[penalized],
+                      covariance=C_E) # adjust 
+    return con
 
 class constraints(object):
 
diff --git a/glmnet/regularized_glm.py b/glmnet/regularized_glm.py
index 9d1e294..2285e68 100644
--- a/glmnet/regularized_glm.py
+++ b/glmnet/regularized_glm.py
@@ -113,7 +113,7 @@ def newton_step(self,
         
         warm = (cur_state.coef,
                 cur_state.intercept,
-                cur_state.eta) # just X\beta -- doesn't include offset
+                cur_state.linear_parameter) # just X\beta -- doesn't include offset
 
         elnet_fit = self.elnet_estimator.fit(design,
                                              z,

From 8294e7274f4ed7cd18e703dc2b983aaa2003c85f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 26 Oct 2023 22:23:00 -0700
Subject: [PATCH 05/44] WIP: put fixed lambda in inference module

---
 glmnet/glmnet.py    |  2 --
 glmnet/inference.py | 27 ++++++++++++++++-----------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/glmnet/glmnet.py b/glmnet/glmnet.py
index 49621e8..b767ab7 100644
--- a/glmnet/glmnet.py
+++ b/glmnet/glmnet.py
@@ -463,5 +463,3 @@ def _get_initial_state(self,
                 intercept_ = 0
         return GLMState(coef_, intercept_), keep.astype(float)
 
-
-
diff --git a/glmnet/inference.py b/glmnet/inference.py
index a9d5bc7..121c1bf 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -1,14 +1,6 @@
 """
 This module contains the core code needed for post selection
-inference based on affine selection procedures as
-described in the papers `Kac Rice`_, `Spacings`_, `covTest`_
-and `post selection LASSO`_.
-
-.. _covTest: http://arxiv.org/abs/1301.7161
-.. _Kac Rice: http://arxiv.org/abs/1308.3020
-.. _Spacings: http://arxiv.org/abs/1401.3889
-.. _post selection LASSO: http://arxiv.org/abs/1311.6238
-.. _sample carving: http://arxiv.org/abs/1410.2597
+after LASSO.
 
 """
 
@@ -16,12 +8,25 @@
 from copy import copy
 import numpy as np
 
-def lasso_inference(glmnet,
+def fixed_lambda_estimator(glmnet_obj,
+                           lambda_val):
+    check_is_fitted(glmnet_obj, ["coefs_", "feature_names_in_"])
+    estimator = clone(glmnet_obj.reg_glm_est_)
+    estimator.lambda_val = lambda_val
+    coefs, intercepts = glmnet_obj.interpolate_coefs([lambda_val])
+    cls = glmnet_obj.state_.__class__
+    state = cls(coefs[0], intercepts[0])
+    estimator.regularizer_ = glmnet_obj.reg_glm_est_.regularizer_
+    estimator.regularizer_.warm_state = state
+
+    return estimator
+
+def lasso_inference(glmnet_obj,
                     lambda_val,
                     selection_data,
                     full_data):
 
-    fixed_lambda = glmnet.fixed_lambda_estimator(lambda_val)
+    fixed_lambda = fixed_lambda_estimator(glmnet_obj, lambda_val)
     X_sel, Y_sel, weight_sel = selection_data
     fixed_lambda.fit(X_sel, Y_sel, sample_weight=weight_sel)
     FL = fixed_lambda # shorthand

From c1971a85c195b7306581a5d9dd5fd9d1859ff831 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 26 Oct 2023 23:05:40 -0700
Subject: [PATCH 06/44] fitting on selection, refit on GLM; added a method to
 get an LM for a given GLMNet object -- need to do for fastpaths

---
 glmnet/base.py            |  1 +
 glmnet/cox.py             | 11 +++++----
 glmnet/docstrings.py      |  5 +++++
 glmnet/glm.py             |  3 ++-
 glmnet/glmnet.py          |  9 ++++++++
 glmnet/inference.py       | 47 ++++++++++++++++++++++++++-------------
 glmnet/paths/fastnet.py   |  4 ++++
 glmnet/regularized_glm.py |  9 +++++++-
 8 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/glmnet/base.py b/glmnet/base.py
index e9f7e94..534bc5a 100644
--- a/glmnet/base.py
+++ b/glmnet/base.py
@@ -129,6 +129,7 @@ def quadratic_form(self,
             GX = G @ X_R
             G1 = G @ np.ones(G.shape[0])
             XX_block = self.X.T @ GX
+            X1_block = self.X.T @ G1
             G_sum = G1.sum()
             
         if scipy.sparse.issparse(XX_block):
diff --git a/glmnet/cox.py b/glmnet/cox.py
index 643c1c0..5333d72 100644
--- a/glmnet/cox.py
+++ b/glmnet/cox.py
@@ -277,6 +277,12 @@ def _get_family_spec(self,
                              status_id=self.family.status_id,
                              start_id=self.family.start_id)
     
+    def get_LM(self):
+        return CoxLM(family=self.family,
+                     offset_id=self.offset_id,
+                     weight_id=self.weight_id,
+                     response_id=self.response_id)
+
     def _get_initial_state(self,
                            X,
                            y,
@@ -292,10 +298,7 @@ def _get_initial_state(self,
         if keep.sum() > 0:
             X_keep = X[:,keep]
 
-            coxlm = CoxLM(family=self.family,
-                          offset_id=self.offset_id,
-                          weight_id=self.weight_id,
-                          response_id=self.response_id)
+            coxlm = self.get_LM()
             coxlm.fit(X_keep, y)
             coef_[keep] = coxlm.coef_
 
diff --git a/glmnet/docstrings.py b/glmnet/docstrings.py
index 8baf97d..d7e08c5 100644
--- a/glmnet/docstrings.py
+++ b/glmnet/docstrings.py
@@ -12,6 +12,11 @@
 #     Data class containing GLMNet control parameters.
 #     ''',
 
+    'intercept':'''
+intercept:
+    For a Design, is there an intercept?
+    ''',
+    
     'nlambda':'''
 nlambda: int
     Number of values on data-dependent grid of lambda values.
diff --git a/glmnet/glm.py b/glmnet/glm.py
index dda3fd7..620e629 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -503,6 +503,8 @@ def obj_function(response,
                    obj_function,
                    self.control)
 
+        self.state_ = state
+
         if self.summarize:
             self._information = self._family.information(state,
                                                          sample_weight)
@@ -517,7 +519,6 @@ def obj_function(response,
                                                state.mean_parameter,
                                                sample_weight) # not the normalized weights!
 
-        self.state_ = state
         self._set_coef_intercept(state)
 
         if (hasattr(self._family, "base") and 
diff --git a/glmnet/glmnet.py b/glmnet/glmnet.py
index b767ab7..099b186 100644
--- a/glmnet/glmnet.py
+++ b/glmnet/glmnet.py
@@ -184,6 +184,8 @@ def fit(self,
                                   check=False,
                                   fit_null=False)
 
+            self.state_ = self.reg_glm_est_.state_
+
             coefs_.append(self.reg_glm_est_.coef_.copy())
             intercepts_.append(self.reg_glm_est_.intercept_)
             dev_ratios_.append(1 - self.reg_glm_est_.deviance_ / self.null_deviance_)
@@ -463,3 +465,10 @@ def _get_initial_state(self,
                 intercept_ = 0
         return GLMState(coef_, intercept_), keep.astype(float)
 
+    def get_LM(self):
+        return GLM(family=self.family,
+                   fit_intercept=self.fit_intercept,
+                   offset_id=self.offset_id,
+                   weight_id=self.weight_id,
+                   response_id=self.response_id)
+
diff --git a/glmnet/inference.py b/glmnet/inference.py
index 121c1bf..1ba3e5e 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -7,6 +7,10 @@
 from warnings import warn
 from copy import copy
 import numpy as np
+import pandas as pd
+
+from sklearn.utils.validation import check_is_fitted
+from sklearn.base import clone
 
 def fixed_lambda_estimator(glmnet_obj,
                            lambda_val):
@@ -31,46 +35,59 @@ def lasso_inference(glmnet_obj,
     fixed_lambda.fit(X_sel, Y_sel, sample_weight=weight_sel)
     FL = fixed_lambda # shorthand
     
-    if (not np.all(FL.upper_limits == FL.control.big) or
-        not np.all(FL.lower_limits == -FL.control.big)):
+    if (not np.all(FL.upper_limits == np.inf) or
+        not np.all(FL.lower_limits == -np.inf)):
         raise NotImplementedError('upper/lower limits coming soon')
 
-    active_set = np.nonzero(FL.coef != 0)[0]
+    active_set = np.nonzero(FL.coef_ != 0)[0]
     state = FL.state_
     information = FL._family.information(state,
-                                         sample_weight)
+                                         weight_sel)
 
-    Q_E = FL._design.quadratic_form(information,
+    Q_E = FL.design_.quadratic_form(information,
                                     columns=active_set)
-    
-    C_E = np.linalg.inv(Q_E)
-    keep = np.zeros(Q_E.shape[0])
-    penfac = FL.penalty_factors[active_set]
-    sings = np.sign(FL.coef_[active_set])
+    keep = np.zeros(Q_E.shape[0], bool)
+    if hasattr(FL, 'penalty_factors'):
+        penfac = FL.penalty_factors[active_set]
+    else:
+        penfac = np.ones(active_set.shape[0])
+    signs = np.sign(FL.coef_[active_set])
     if FL.fit_intercept:
         penfac = np.hstack([0, penfac])
         signs = np.hstack([0, signs])
         keep[0] = 1
         keep[1 + active_set] = 1
         Q_E = Q_E[keep]
-        stacked = np.hstack([state.intercept_,
-                             state.coef_[active_set]])
+        stacked = np.hstack([state.intercept,
+                             state.coef[active_set]])
     else:
         keep[active_set] = 1
         Q_E = Q_E[keep]
-        stacked = state.coef_[active_set]
+        stacked = state.coef[active_set]
 
     C_E = np.linalg.inv(Q_E)
     delta = np.zeros(Q_E.shape[0])
-    delta[1:] = lambda_val * penfac * signs
+    delta[1:] = lambda_val * penfac[1:] * signs[1:]
     delta = C_E @ delta
-    noisy_mle = stacked + C_E @ neg_score
+    noisy_mle = stacked + C_E @ delta
 
     penalized = penfac > 0
     sel_P = -signs[penalized][:,None] * np.eye(C_E.shape[0])[penalized]
     con = constraints(sel_P,
                       -signs[penalized] * delta[penalized],
                       covariance=C_E) # adjust 
+
+    # fit full model
+
+    X_full, Y_full, weight_full = full_data
+
+    unreg_LM = glmnet_obj.get_LM()
+    unreg_LM.summarize = True
+    unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
+    C_full = unreg_LM.covariance_
+
+    selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_E).sum(), 0, 1)
+    print(selection_proportion)
     return con
 
 class constraints(object):
diff --git a/glmnet/paths/fastnet.py b/glmnet/paths/fastnet.py
index 5246565..e3f8c0f 100644
--- a/glmnet/paths/fastnet.py
+++ b/glmnet/paths/fastnet.py
@@ -11,6 +11,7 @@
 
 from ..base import _get_design
 from ..glmnet import GLMNet
+from ..glm import GLMState
 from ..elnet import (_check_and_set_limits,
                     _check_and_set_vp,
                     _design_wrapper_args)
@@ -128,6 +129,9 @@ def fit(self,
         self.coefs_ = result['coefs']
         self.intercepts_ = result['intercepts']
             
+        self.state_ = GLMState(self.coefs_[-1],
+                               self.intercepts_[-1])
+
         self.lambda_values_ = result['lambda_values']
         nfits = self.lambda_values_.shape[0]
         dev_ratios_ = self._fit['dev'][:nfits]
diff --git a/glmnet/regularized_glm.py b/glmnet/regularized_glm.py
index 2285e68..f21bcb6 100644
--- a/glmnet/regularized_glm.py
+++ b/glmnet/regularized_glm.py
@@ -113,7 +113,7 @@ def newton_step(self,
         
         warm = (cur_state.coef,
                 cur_state.intercept,
-                cur_state.linear_parameter) # just X\beta -- doesn't include offset
+                cur_state.linear_predictor) # just X\beta -- doesn't include offset
 
         elnet_fit = self.elnet_estimator.fit(design,
                                              z,
@@ -141,6 +141,13 @@ class RegGLM(GLM,
 
     control: RegGLMControl = field(default_factory=RegGLMControl)
 
+    def get_LM(self):
+        return GLM(family=self.family,
+                   fit_intercept=self.fit_intercept,
+                   offset_id=self.offset_id,
+                   weight_id=self.weight_id,
+                   response_id=self.response_id)
+
     def _get_regularizer(self,
                          X):
 

From a60a88b8d92cb4a5425438f1b6a49df92ccaba46 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 26 Oct 2023 23:32:50 -0700
Subject: [PATCH 07/44] refit on selected to get better estimate of variance at
 mle, not lasso

---
 glmnet/inference.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 1ba3e5e..13ae587 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -86,7 +86,11 @@ def lasso_inference(glmnet_obj,
     unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
     C_full = unreg_LM.covariance_
 
-    selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_E).sum(), 0, 1)
+    unreg_sel_LM = glmnet_obj.get_LM()
+    unreg_sel_LM.summarize = True
+    unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
+    C_sel = unreg_sel_LM.covariance_
+    selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_sel).sum(), 0, 1)
     print(selection_proportion)
     return con
 

From 2d5c165699187dcd6f0f3397983a1cadfb219c38 Mon Sep 17 00:00:00 2001
From: kevinbfry <15952367+kevinbfry@users.noreply.github.com>
Date: Tue, 31 Oct 2023 14:42:30 -0700
Subject: [PATCH 08/44] lasso_inference runs, but intervals wrong

---
 glmnet/inference.py | 94 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 73 insertions(+), 21 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 13ae587..28a2218 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -4,10 +4,12 @@
 
 """
 
+import warnings
 from warnings import warn
 from copy import copy
 import numpy as np
 import pandas as pd
+from scipy.stats import norm as normal_dbn
 
 from sklearn.utils.validation import check_is_fitted
 from sklearn.base import clone
@@ -28,7 +30,8 @@ def fixed_lambda_estimator(glmnet_obj,
 def lasso_inference(glmnet_obj,
                     lambda_val,
                     selection_data,
-                    full_data):
+                    full_data,
+                    level=.9):
 
     fixed_lambda = fixed_lambda_estimator(glmnet_obj, lambda_val)
     X_sel, Y_sel, weight_sel = selection_data
@@ -43,7 +46,7 @@ def lasso_inference(glmnet_obj,
     state = FL.state_
     information = FL._family.information(state,
                                          weight_sel)
-
+    
     Q_E = FL.design_.quadratic_form(information,
                                     columns=active_set)
     keep = np.zeros(Q_E.shape[0], bool)
@@ -57,16 +60,19 @@ def lasso_inference(glmnet_obj,
         signs = np.hstack([0, signs])
         keep[0] = 1
         keep[1 + active_set] = 1
+        print("fitting intercept")
+        # C_E_active = C_E[keep]
         Q_E = Q_E[keep]
         stacked = np.hstack([state.intercept,
                              state.coef[active_set]])
     else:
         keep[active_set] = 1
-        Q_E = Q_E[keep]
+        # C_E_active = C_E[keep]
+        Q_E = Q_E[keep][:,1:]
         stacked = state.coef[active_set]
 
     C_E = np.linalg.inv(Q_E)
-    delta = np.zeros(Q_E.shape[0])
+    delta = np.zeros(C_E.shape[0])
     delta[1:] = lambda_val * penfac[1:] * signs[1:]
     delta = C_E @ delta
     noisy_mle = stacked + C_E @ delta
@@ -89,10 +95,40 @@ def lasso_inference(glmnet_obj,
     unreg_sel_LM = glmnet_obj.get_LM()
     unreg_sel_LM.summarize = True
     unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
+    print(unreg_sel_LM.summary_)
     C_sel = unreg_sel_LM.covariance_
-    selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_sel).sum(), 0, 1)
-    print(selection_proportion)
-    return con
+    # selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_sel).sum(), 0, 1)
+    
+    ## TODO: will this handle fit_intercept?
+    if FL.fit_intercept:
+        stacked = np.hstack([unreg_LM.intercept_,
+                             unreg_LM.coef_])
+    else:
+        full_mle = unreg_LM.coef_
+
+    ## iterate over coordinates
+    Ls = np.zeros_like(noisy_mle)
+    Us = np.zeros_like(noisy_mle)
+    for i in range(len(noisy_mle)):
+        e_i = np.zeros_like(noisy_mle)
+        e_i[i] = 1.
+        ## call selection_interval and return
+        # print(selection_proportion)
+        L, U = selection_interval(
+            support_directions=con.linear_part,
+            support_offsets=con.offset,
+            covariance_noisy=C_sel,
+            covariance_full=C_full,
+            noisy_observation=noisy_mle,
+            observation=full_mle,
+            direction_of_interest=e_i,
+            # tol=,
+            level=level,
+            # UMAU=,
+        )
+        Ls[i] = L
+        Us[i] = U
+    return (Ls, Us)
 
 class constraints(object):
 
@@ -422,7 +458,7 @@ def interval(self, direction_of_interest, Y,
         Returns
         -------
 
-        [U,L] : selection interval
+        [L,U] : selection interval
 
         
         """
@@ -434,7 +470,7 @@ def interval(self, direction_of_interest, Y,
             self.covariance,
             Y,
             direction_of_interest,
-            alpha=alpha,
+            level=1. - alpha,
             UMAU=UMAU)
 
     def covariance_factors(self, force=True):
@@ -519,6 +555,10 @@ def solve(self, direction):
         sqrt_inv = self.covariance_factors()[1]
         return sqrt_inv.T.dot(sqrt_inv.dot(direction))
 
+
+
+
+
 def stack(*cons):
     """
     Combine constraints into a large constaint
@@ -645,11 +685,12 @@ def interval_constraints(support_directions,
 
 def selection_interval(support_directions, 
                        support_offsets,
-                       covariance,
+                       covariance_noisy,
+                       covariance_full,
                        noisy_observation,
                        observation,
                        direction_of_interest,
-                       percent_smoothing,
+                    #    percent_smoothing, ## sqrt(alpha)
                        tol = 1.e-4,
                        level = 0.90,
                        UMAU=True):
@@ -696,18 +737,29 @@ def selection_interval(support_directions,
     """
 
     (lower_bound,
-     noisy_estimate,
+     _,
      upper_bound,
-     sigma) = interval_constraints(support_directions, 
-                                   support_offsets,
-                                   covariance,
-                                   noisy_observation,
-                                   direction_of_interest,
-                                   tol=tol)
-
+     _) = interval_constraints(support_directions, 
+                                support_offsets,
+                                covariance_noisy,
+                                noisy_observation,
+                                direction_of_interest,
+                                tol=tol)
+    ## lars path lockhart tibs^2 taylor paper
+    sigma = np.sqrt(direction_of_interest.T @ covariance_full @ direction_of_interest)
+    ## sqrt(alpha) is just sigma_noisy - sigma_full
+    smoothing_sigma = np.sqrt(direction_of_interest.T @ covariance_noisy @ direction_of_interest - sigma**2)
     grid = np.linspace(lower_bound - 4 * sigma, upper_bound + 4 * sigma, 801)
-    weight = normal_dbn.cdf((upper_bound - grid) / sigma) - normal_dbn.cdf((lower_bound - grid) / sigma)
+    weight = (
+        normal_dbn.cdf(
+            (upper_bound - grid) / (smoothing_sigma)
+        ) - normal_dbn.cdf(
+            (lower_bound - grid) / (smoothing_sigma)
+        )
+    )
+    weight *= normal_dbn.pdf(grid / sigma)
     estimate = (direction_of_interest * observation).sum()
+    # assert(0==1)
     return discrete_family(grid, weight).equal_tailed_interval(estimate,
                                                                alpha=1-level)
 
@@ -781,7 +833,7 @@ def __init__(self, sufficient_stat, weights, theta=0.):
 
         The weights are normalized to sum to 1.
         """
-        xw = np.array(sorted(zip(sufficient_stat, weights)), np.float)
+        xw = np.array(sorted(zip(sufficient_stat, weights)), float)
         self._x = xw[:,0]
         self._w = xw[:,1]
         self._lw = np.log(xw[:,1])

From d67a1e7f0a4c0744354ddba602db5ecc446a7d3c Mon Sep 17 00:00:00 2001
From: kevinbfry <15952367+kevinbfry@users.noreply.github.com>
Date: Tue, 31 Oct 2023 14:56:35 -0700
Subject: [PATCH 09/44] lasso_inference intervals look plausible

---
 glmnet/inference.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 28a2218..16be87d 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -90,12 +90,12 @@ def lasso_inference(glmnet_obj,
     unreg_LM = glmnet_obj.get_LM()
     unreg_LM.summarize = True
     unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
+    print(unreg_LM.summary_)
     C_full = unreg_LM.covariance_
 
     unreg_sel_LM = glmnet_obj.get_LM()
     unreg_sel_LM.summarize = True
     unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
-    print(unreg_sel_LM.summary_)
     C_sel = unreg_sel_LM.covariance_
     # selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_sel).sum(), 0, 1)
     
@@ -109,12 +109,14 @@ def lasso_inference(glmnet_obj,
     ## iterate over coordinates
     Ls = np.zeros_like(noisy_mle)
     Us = np.zeros_like(noisy_mle)
+    mles = np.zeros_like(noisy_mle)
+    pvals = np.zeros_like(noisy_mle)
     for i in range(len(noisy_mle)):
         e_i = np.zeros_like(noisy_mle)
         e_i[i] = 1.
         ## call selection_interval and return
         # print(selection_proportion)
-        L, U = selection_interval(
+        L, U, mle, p = selection_interval(
             support_directions=con.linear_part,
             support_offsets=con.offset,
             covariance_noisy=C_sel,
@@ -128,7 +130,9 @@ def lasso_inference(glmnet_obj,
         )
         Ls[i] = L
         Us[i] = U
-    return (Ls, Us)
+        mles[i] = mle
+        pvals[i] = p
+    return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us})
 
 class constraints(object):
 
@@ -760,8 +764,17 @@ def selection_interval(support_directions,
     weight *= normal_dbn.pdf(grid / sigma)
     estimate = (direction_of_interest * observation).sum()
     # assert(0==1)
-    return discrete_family(grid, weight).equal_tailed_interval(estimate,
-                                                               alpha=1-level)
+    sel_distr = discrete_family(grid, weight)
+    L, U = sel_distr.equal_tailed_interval(estimate,
+                                            alpha=1-level)
+    mle, _, _ = sel_distr.MLE(estimate)
+    mle *= sigma**2
+    pval = sel_distr.cdf(0, estimate)
+    pval = 2 * min(pval, 1-pval)
+    L *= sigma**2
+    U *= sigma**2
+
+    return L, U, mle, pval
 
 def find_root(f, y, lb, ub, tol=1e-6):
     """

From bd04add0d0c9187437fa75ad6adb8e1014951dbb Mon Sep 17 00:00:00 2001
From: kevinbfry <15952367+kevinbfry@users.noreply.github.com>
Date: Tue, 31 Oct 2023 15:11:24 -0700
Subject: [PATCH 10/44] dispersion=None signature in GLMBase.fit()

---
 glmnet/glm.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/glmnet/glm.py b/glmnet/glm.py
index 620e629..3878ba5 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -407,7 +407,7 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 4 options non sklearn API
-            dispersion=1,
+            dispersion=None,
             check=True,
             fit_null=True):
 
@@ -521,12 +521,14 @@ def obj_function(response,
 
         self._set_coef_intercept(state)
 
-        if (hasattr(self._family, "base") and 
+        if dispersion is not None:
+            self.dispersion_ = dispersion
+        elif (hasattr(self._family, "base") and 
             isinstance(self._family.base, sm_family.Gaussian)): # GLM specific
             # usual estimate of sigma^2
             self.dispersion_ = self.deviance_ / (nobs-nvar-self.fit_intercept) 
         else:
-            self.dispersion_ = dispersion
+            self.dispersion_ = 1
 
         return self
     fit.__doc__ = '''

From edfd5e169e2cdedbd41ca3a221033534c47018fb Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 14 Nov 2023 12:54:21 -0800
Subject: [PATCH 11/44] BF: delta not quite right, fix grid in case of infinite
 limits

---
 glmnet/inference.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 16be87d..543c817 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -55,30 +55,30 @@ def lasso_inference(glmnet_obj,
     else:
         penfac = np.ones(active_set.shape[0])
     signs = np.sign(FL.coef_[active_set])
+
     if FL.fit_intercept:
         penfac = np.hstack([0, penfac])
         signs = np.hstack([0, signs])
         keep[0] = 1
         keep[1 + active_set] = 1
-        print("fitting intercept")
-        # C_E_active = C_E[keep]
         Q_E = Q_E[keep]
         stacked = np.hstack([state.intercept,
                              state.coef[active_set]])
+        delta = np.zeros(Q_E.shape[0])
+        delta[1:] = lambda_val * penfac[1:] * signs[1:]
     else:
         keep[active_set] = 1
         # C_E_active = C_E[keep]
         Q_E = Q_E[keep][:,1:]
         stacked = state.coef[active_set]
+        delta = lambda_val * penfac * signs
 
     C_E = np.linalg.inv(Q_E)
-    delta = np.zeros(C_E.shape[0])
-    delta[1:] = lambda_val * penfac[1:] * signs[1:]
     delta = C_E @ delta
-    noisy_mle = stacked + C_E @ delta
+    noisy_mle = stacked + delta
 
     penalized = penfac > 0
-    sel_P = -signs[penalized][:,None] * np.eye(C_E.shape[0])[penalized]
+    sel_P = -np.diag(signs[penalized]) @ np.eye(C_E.shape[0])[penalized]
     con = constraints(sel_P,
                       -signs[penalized] * delta[penalized],
                       covariance=C_E) # adjust 
@@ -90,7 +90,6 @@ def lasso_inference(glmnet_obj,
     unreg_LM = glmnet_obj.get_LM()
     unreg_LM.summarize = True
     unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
-    print(unreg_LM.summary_)
     C_full = unreg_LM.covariance_
 
     unreg_sel_LM = glmnet_obj.get_LM()
@@ -101,8 +100,8 @@ def lasso_inference(glmnet_obj,
     
     ## TODO: will this handle fit_intercept?
     if FL.fit_intercept:
-        stacked = np.hstack([unreg_LM.intercept_,
-                             unreg_LM.coef_])
+        full_mle = np.hstack([unreg_LM.intercept_,
+                              unreg_LM.coef_])
     else:
         full_mle = unreg_LM.coef_
 
@@ -115,7 +114,6 @@ def lasso_inference(glmnet_obj,
         e_i = np.zeros_like(noisy_mle)
         e_i[i] = 1.
         ## call selection_interval and return
-        # print(selection_proportion)
         L, U, mle, p = selection_interval(
             support_directions=con.linear_part,
             support_offsets=con.offset,
@@ -658,7 +656,7 @@ def interval_constraints(support_directions,
                      direction_of_interest)
 
     U = A.dot(X) - b
-    if not np.all(U  < tol * np.fabs(U).max()) and WARNINGS:
+    if not np.all(U  < tol * np.fabs(U).max()):
         warn('constraints not satisfied: %s' % repr(U))
 
     Sw = S.dot(w)
@@ -753,7 +751,8 @@ def selection_interval(support_directions,
     sigma = np.sqrt(direction_of_interest.T @ covariance_full @ direction_of_interest)
     ## sqrt(alpha) is just sigma_noisy - sigma_full
     smoothing_sigma = np.sqrt(direction_of_interest.T @ covariance_noisy @ direction_of_interest - sigma**2)
-    grid = np.linspace(lower_bound - 4 * sigma, upper_bound + 4 * sigma, 801)
+    estimate = (direction_of_interest * observation).sum()
+    grid = np.linspace(estimate - 10 * sigma, estimate + 10 * sigma, 801)
     weight = (
         normal_dbn.cdf(
             (upper_bound - grid) / (smoothing_sigma)
@@ -762,7 +761,7 @@ def selection_interval(support_directions,
         )
     )
     weight *= normal_dbn.pdf(grid / sigma)
-    estimate = (direction_of_interest * observation).sum()
+
     # assert(0==1)
     sel_distr = discrete_family(grid, weight)
     L, U = sel_distr.equal_tailed_interval(estimate,

From 85c0083e01b17938b52693a15987f1f7f1390c53 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 14 Nov 2023 13:30:14 -0800
Subject: [PATCH 12/44] BF: weight sum factor

---
 glmnet/inference.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 543c817..0922035 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -35,6 +35,9 @@ def lasso_inference(glmnet_obj,
 
     fixed_lambda = fixed_lambda_estimator(glmnet_obj, lambda_val)
     X_sel, Y_sel, weight_sel = selection_data
+    if weight_sel is None:
+        weight_sel = np.ones(X_sel.shape[0])
+        
     fixed_lambda.fit(X_sel, Y_sel, sample_weight=weight_sel)
     FL = fixed_lambda # shorthand
     
@@ -56,6 +59,8 @@ def lasso_inference(glmnet_obj,
         penfac = np.ones(active_set.shape[0])
     signs = np.sign(FL.coef_[active_set])
 
+    # we multiply by weight_sel.sum() due to this factor
+    # appearing in glmnet objective...
     if FL.fit_intercept:
         penfac = np.hstack([0, penfac])
         signs = np.hstack([0, signs])
@@ -65,13 +70,13 @@ def lasso_inference(glmnet_obj,
         stacked = np.hstack([state.intercept,
                              state.coef[active_set]])
         delta = np.zeros(Q_E.shape[0])
-        delta[1:] = lambda_val * penfac[1:] * signs[1:]
+        delta[1:] = weight_sel.sum() * lambda_val * penfac[1:] * signs[1:]
     else:
         keep[active_set] = 1
         # C_E_active = C_E[keep]
         Q_E = Q_E[keep][:,1:]
         stacked = state.coef[active_set]
-        delta = lambda_val * penfac * signs
+        delta = weight_sel.sum() * lambda_val * penfac * signs
 
     C_E = np.linalg.inv(Q_E)
     delta = C_E @ delta
@@ -91,7 +96,6 @@ def lasso_inference(glmnet_obj,
     unreg_LM.summarize = True
     unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
     C_full = unreg_LM.covariance_
-
     unreg_sel_LM = glmnet_obj.get_LM()
     unreg_sel_LM.summarize = True
     unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
@@ -743,16 +747,16 @@ def selection_interval(support_directions,
      upper_bound,
      _) = interval_constraints(support_directions, 
                                 support_offsets,
-                                covariance_noisy,
+                                covariance_full,
                                 noisy_observation,
                                 direction_of_interest,
                                 tol=tol)
     ## lars path lockhart tibs^2 taylor paper
     sigma = np.sqrt(direction_of_interest.T @ covariance_full @ direction_of_interest)
     ## sqrt(alpha) is just sigma_noisy - sigma_full
-    smoothing_sigma = np.sqrt(direction_of_interest.T @ covariance_noisy @ direction_of_interest - sigma**2)
+    smoothing_sigma = np.sqrt(max(direction_of_interest.T @ covariance_noisy @ direction_of_interest - sigma**2, 1e-12 * sigma**2))
     estimate = (direction_of_interest * observation).sum()
-    grid = np.linspace(estimate - 10 * sigma, estimate + 10 * sigma, 801)
+    grid = np.linspace(estimate - 20 * sigma, estimate + 20 * sigma, 801)
     weight = (
         normal_dbn.cdf(
             (upper_bound - grid) / (smoothing_sigma)

From 519ba93b5ced8bd1efe4ace4681e38584cddb337 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 14 Nov 2023 14:51:38 -0800
Subject: [PATCH 13/44] cleanup of Q_E, affine_con

---
 glmnet/inference.py | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 0922035..f8c916a 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -35,6 +35,7 @@ def lasso_inference(glmnet_obj,
 
     fixed_lambda = fixed_lambda_estimator(glmnet_obj, lambda_val)
     X_sel, Y_sel, weight_sel = selection_data
+
     if weight_sel is None:
         weight_sel = np.ones(X_sel.shape[0])
         
@@ -46,13 +47,19 @@ def lasso_inference(glmnet_obj,
         raise NotImplementedError('upper/lower limits coming soon')
 
     active_set = np.nonzero(FL.coef_ != 0)[0]
+
+    # find selection data covariance
+
+    unreg_sel_LM = glmnet_obj.get_LM()
+    unreg_sel_LM.summarize = True
+    unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
+    C_sel = unreg_sel_LM.covariance_
+
     state = FL.state_
     information = FL._family.information(state,
                                          weight_sel)
     
-    Q_E = FL.design_.quadratic_form(information,
-                                    columns=active_set)
-    keep = np.zeros(Q_E.shape[0], bool)
+    keep = np.zeros(FL.design_.shape[1], bool)
     if hasattr(FL, 'penalty_factors'):
         penfac = FL.penalty_factors[active_set]
     else:
@@ -66,27 +73,20 @@ def lasso_inference(glmnet_obj,
         signs = np.hstack([0, signs])
         keep[0] = 1
         keep[1 + active_set] = 1
-        Q_E = Q_E[keep]
         stacked = np.hstack([state.intercept,
                              state.coef[active_set]])
-        delta = np.zeros(Q_E.shape[0])
+        delta = np.zeros(keep.sum())
         delta[1:] = weight_sel.sum() * lambda_val * penfac[1:] * signs[1:]
     else:
         keep[active_set] = 1
-        # C_E_active = C_E[keep]
-        Q_E = Q_E[keep][:,1:]
         stacked = state.coef[active_set]
         delta = weight_sel.sum() * lambda_val * penfac * signs
 
-    C_E = np.linalg.inv(Q_E)
-    delta = C_E @ delta
+    delta = C_sel @ delta
     noisy_mle = stacked + delta
 
     penalized = penfac > 0
-    sel_P = -np.diag(signs[penalized]) @ np.eye(C_E.shape[0])[penalized]
-    con = constraints(sel_P,
-                      -signs[penalized] * delta[penalized],
-                      covariance=C_E) # adjust 
+    sel_P = -np.diag(signs[penalized]) @ np.eye(C_sel.shape[0])[penalized]
 
     # fit full model
 
@@ -96,10 +96,7 @@ def lasso_inference(glmnet_obj,
     unreg_LM.summarize = True
     unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
     C_full = unreg_LM.covariance_
-    unreg_sel_LM = glmnet_obj.get_LM()
-    unreg_sel_LM.summarize = True
-    unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
-    C_sel = unreg_sel_LM.covariance_
+
     # selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_sel).sum(), 0, 1)
     
     ## TODO: will this handle fit_intercept?
@@ -119,16 +116,14 @@ def lasso_inference(glmnet_obj,
         e_i[i] = 1.
         ## call selection_interval and return
         L, U, mle, p = selection_interval(
-            support_directions=con.linear_part,
-            support_offsets=con.offset,
+            support_directions=sel_P,
+            support_offsets=-signs[penalized] * delta[penalized], 
             covariance_noisy=C_sel,
             covariance_full=C_full,
             noisy_observation=noisy_mle,
             observation=full_mle,
             direction_of_interest=e_i,
-            # tol=,
             level=level,
-            # UMAU=,
         )
         Ls[i] = L
         Us[i] = U

From 35a2ba3ec79a2f4e222ea90101d66d967599dd23 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 15 Nov 2023 09:30:13 -0800
Subject: [PATCH 14/44] adding Lee pvalues, dependency on mpmath as well

---
 glmnet/inference.py | 119 ++++++++++++++++++++++++++++++++++++--------
 pyproject.toml      |   1 +
 2 files changed, 99 insertions(+), 21 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index f8c916a..c2145ca 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -11,6 +11,9 @@
 import pandas as pd
 from scipy.stats import norm as normal_dbn
 
+import mpmath as mp
+mp.dps = 80
+
 from sklearn.utils.validation import check_is_fitted
 from sklearn.base import clone
 
@@ -691,7 +694,6 @@ def selection_interval(support_directions,
                        noisy_observation,
                        observation,
                        direction_of_interest,
-                    #    percent_smoothing, ## sqrt(alpha)
                        tol = 1.e-4,
                        level = 0.90,
                        UMAU=True):
@@ -749,31 +751,106 @@ def selection_interval(support_directions,
     ## lars path lockhart tibs^2 taylor paper
     sigma = np.sqrt(direction_of_interest.T @ covariance_full @ direction_of_interest)
     ## sqrt(alpha) is just sigma_noisy - sigma_full
-    smoothing_sigma = np.sqrt(max(direction_of_interest.T @ covariance_noisy @ direction_of_interest - sigma**2, 1e-12 * sigma**2))
+    smoothing_sigma = np.sqrt(max(direction_of_interest.T @ covariance_noisy @ direction_of_interest - sigma**2, 0))
     estimate = (direction_of_interest * observation).sum()
-    grid = np.linspace(estimate - 20 * sigma, estimate + 20 * sigma, 801)
-    weight = (
-        normal_dbn.cdf(
-            (upper_bound - grid) / (smoothing_sigma)
-        ) - normal_dbn.cdf(
-            (lower_bound - grid) / (smoothing_sigma)
+
+    if smoothing_sigma > 1e-6 * sigma:
+
+        grid = np.linspace(estimate - 20 * sigma, estimate + 20 * sigma, 801)
+        weight = (
+            normal_dbn.cdf(
+                (upper_bound - grid) / (smoothing_sigma)
+            ) - normal_dbn.cdf(
+                (lower_bound - grid) / (smoothing_sigma)
+            )
         )
-    )
-    weight *= normal_dbn.pdf(grid / sigma)
-
-    # assert(0==1)
-    sel_distr = discrete_family(grid, weight)
-    L, U = sel_distr.equal_tailed_interval(estimate,
-                                            alpha=1-level)
-    mle, _, _ = sel_distr.MLE(estimate)
-    mle *= sigma**2
-    pval = sel_distr.cdf(0, estimate)
-    pval = 2 * min(pval, 1-pval)
-    L *= sigma**2
-    U *= sigma**2
+        weight *= normal_dbn.pdf(grid / sigma)
+
+        # assert(0==1)
+        sel_distr = discrete_family(grid, weight)
+        L, U = sel_distr.equal_tailed_interval(estimate,
+                                                alpha=1-level)
+        mle, _, _ = sel_distr.MLE(estimate)
+        mle *= sigma**2
+        pval = sel_distr.cdf(0, estimate)
+        pval = 2 * min(pval, 1-pval)
+        L *= sigma**2
+        U *= sigma**2
+    else:
+
+        lb = estimate - 20 * sigma
+        ub = estimate + 20 * sigma
+
+        if estimate < lower_bound or estimate > upper_bound:
+            warn('Constraints not satisfied: returning [-np.inf, np.inf]')
+            return -np.inf, np.inf, np.nan, np.nan
+        
+        def F(theta):
+            
+            Z = (estimate - theta) / sigma
+            Z_L = (lower_bound - theta) / sigma
+            Z_U = (upper_bound - theta) / sigma
+            num = norm_interval(Z_L, Z)
+            den = norm_interval(Z_L, Z_U)
+            if Z_L > 0 and den < 1e-10:
+                C = np.fabs(Z_L)
+                D = Z-Z_L
+                cdf = 1-np.exp(-C*D)
+            elif Z_U < 0 and den < 1e-10:
+                C = np.fabs(Z_U)
+                D = Z_U-Z
+                if C*D < 0:
+                    raise ValueError
+                cdf = np.exp(-C*D)
+            else:
+                cdf = num / den
+            return cdf
+            
+        
+        pval = F(0)
+        pval = 2 * min(pval, 1-pval)
+
+        lb = lower_bound - 20 * sigma
+        ub = upper_bound + 20 * sigma
+
+        alpha = 0.5 * (1 - level)
+        L = find_root(F, 1.0 - 0.5 * alpha, lb, ub)
+        if np.isnan(L):
+            L = -np.inf
+        U = find_root(F, 0.5 * alpha, lb, ub)
+        if np.isnan(U):
+            U = np.inf
+
+        mle = np.nan
+        
 
     return L, U, mle, pval
 
+def norm_interval(lower, upper):
+    r"""
+    A multiprecision evaluation of
+
+    .. math::
+
+        \Phi(U) - \Phi(L)
+
+    Parameters
+    ----------
+
+    lower : float
+        The lower limit $L$
+
+    upper : float
+        The upper limit $U$
+
+    """
+    #cdf = normal_dbn.cdf
+    cdf = mp.ncdf
+    if lower > 0 and upper > 0:
+        return cdf(-lower) - cdf(-upper)
+    else:
+        return cdf(upper) - cdf(lower)
+
 def find_root(f, y, lb, ub, tol=1e-6):
     """
     searches for solution to f(x) = y in (lb, ub), where 
diff --git a/pyproject.toml b/pyproject.toml
index b3b5e36..c6ddb3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,7 @@ requires = ["setuptools>=42",
             "matplotlib>=3.3.3",
             "versioneer",
 	    "statsmodels",
+	    "mpmath",
 	    "coxdev"]
 build-backend = "setuptools.build_meta"
 

From d821c945739424a06c0b483dffb0b9e81cfec6dd Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 15 Nov 2023 09:31:14 -0800
Subject: [PATCH 15/44] removing debug assertion

---
 glmnet/inference.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index c2145ca..433f702 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -766,7 +766,6 @@ def selection_interval(support_directions,
         )
         weight *= normal_dbn.pdf(grid / sigma)
 
-        # assert(0==1)
         sel_distr = discrete_family(grid, weight)
         L, U = sel_distr.equal_tailed_interval(estimate,
                                                 alpha=1-level)

From d75094a1cafa7493040e816163b7fb1f16942c38 Mon Sep 17 00:00:00 2001
From: kevinbfry <15952367+kevinbfry@users.noreply.github.com>
Date: Wed, 15 Nov 2023 13:24:44 -0800
Subject: [PATCH 16/44] added bootstrap inf

---
 glmnet/inference.py | 110 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 433f702..2451a1b 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -134,6 +134,116 @@ def lasso_inference(glmnet_obj,
         pvals[i] = p
     return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us})
 
+def lasso_bootstrap_inference(glmnet_obj,
+                              lambda_val,
+                              selection_data, ## X = \Sigma^{-1/2}, Y = \Sigma^{-1/2}\hat\beta
+                              full_data,
+                            #   C_full, ## bootstrap variance
+                              alpha,
+                              level=.9):
+
+    fixed_lambda = fixed_lambda_estimator(glmnet_obj, lambda_val)
+    X_sel, Y_sel, weight_sel = selection_data
+
+    if weight_sel is None:
+        weight_sel = np.ones(X_sel.shape[0])
+        
+    fixed_lambda.fit(X_sel, Y_sel, sample_weight=weight_sel)
+    FL = fixed_lambda # shorthand
+    
+    if (not np.all(FL.upper_limits == np.inf) or
+        not np.all(FL.lower_limits == -np.inf)):
+        raise NotImplementedError('upper/lower limits coming soon')
+
+    active_set = np.nonzero(FL.coef_ != 0)[0]
+
+    # find selection data covariance
+
+    # unreg_sel_LM = glmnet_obj.get_LM()
+    # unreg_sel_LM.summarize = True
+    # unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
+    # C_sel = unreg_sel_LM.covariance_
+    C_full = np.eye(len(active_set))
+    C_sel = (1 + alpha) * C_full
+
+    state = FL.state_
+    information = FL._family.information(state,
+                                         weight_sel)
+    
+    keep = np.zeros(FL.design_.shape[1], bool)
+    if hasattr(FL, 'penalty_factors'):
+        penfac = FL.penalty_factors[active_set]
+    else:
+        penfac = np.ones(active_set.shape[0])
+    signs = np.sign(FL.coef_[active_set])
+
+    # we multiply by weight_sel.sum() due to this factor
+    # appearing in glmnet objective...
+    if FL.fit_intercept:
+        penfac = np.hstack([0, penfac])
+        signs = np.hstack([0, signs])
+        keep[0] = 1
+        keep[1 + active_set] = 1
+        stacked = np.hstack([state.intercept,
+                             state.coef[active_set]])
+        delta = np.zeros(keep.sum())
+        delta[1:] = weight_sel.sum() * lambda_val * penfac[1:] * signs[1:]
+    else:
+        keep[active_set] = 1
+        stacked = state.coef[active_set]
+        delta = weight_sel.sum() * lambda_val * penfac * signs
+
+    delta = C_sel @ delta
+    noisy_mle = stacked + delta
+
+    penalized = penfac > 0
+    sel_P = -np.diag(signs[penalized]) @ np.eye(C_sel.shape[0])[penalized]
+
+    # fit full model
+
+    X_full, Y_full, weight_full = full_data
+
+    unreg_LM = glmnet_obj.get_LM()
+    unreg_LM.summarize = True
+    unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
+    # C_full = unreg_LM.covariance_
+
+    # selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_sel).sum(), 0, 1)
+    
+    ## TODO: will this handle fit_intercept?
+    if FL.fit_intercept:
+        full_mle = np.hstack([unreg_LM.intercept_,
+                              unreg_LM.coef_])
+    else:
+        full_mle = unreg_LM.coef_
+
+    ## iterate over coordinates
+    Ls = np.zeros_like(noisy_mle)
+    Us = np.zeros_like(noisy_mle)
+    mles = np.zeros_like(noisy_mle)
+    pvals = np.zeros_like(noisy_mle)
+    for i in range(len(noisy_mle)):
+        e_i = np.zeros_like(noisy_mle)
+        e_i[i] = 1.
+        ## call selection_interval and return
+        L, U, mle, p = selection_interval(
+            support_directions=sel_P,
+            support_offsets=-signs[penalized] * delta[penalized], 
+            covariance_noisy=C_sel,
+            covariance_full=C_full,
+            noisy_observation=noisy_mle,
+            observation=full_mle,
+            direction_of_interest=e_i,
+            level=level,
+        )
+        Ls[i] = L
+        Us[i] = U
+        mles[i] = mle
+        pvals[i] = p
+        
+    return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us})
+
+
 class constraints(object):
 
     r"""

From 74ae5f4b940e0f841e6692de9cb54ec3108894ff Mon Sep 17 00:00:00 2001
From: kevinbfry <15952367+kevinbfry@users.noreply.github.com>
Date: Wed, 15 Nov 2023 18:14:06 -0800
Subject: [PATCH 17/44] fixing bootstrap inf

---
 glmnet/inference.py | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 2451a1b..ac9a2fb 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -10,6 +10,7 @@
 import numpy as np
 import pandas as pd
 from scipy.stats import norm as normal_dbn
+import statsmodels.api as sm
 
 import mpmath as mp
 mp.dps = 80
@@ -81,7 +82,8 @@ def lasso_inference(glmnet_obj,
         delta = np.zeros(keep.sum())
         delta[1:] = weight_sel.sum() * lambda_val * penfac[1:] * signs[1:]
     else:
-        keep[active_set] = 1
+        # keep[active_set] = 1
+        keep[1 + active_set] = 1
         stacked = state.coef[active_set]
         delta = weight_sel.sum() * lambda_val * penfac * signs
 
@@ -132,17 +134,24 @@ def lasso_inference(glmnet_obj,
         Us[i] = U
         mles[i] = mle
         pvals[i] = p
-    return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us})
+    
+    idx = (active_set).tolist()
+    if FL.fit_intercept:
+        idx = [-1] + idx
+    return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
 
 def lasso_bootstrap_inference(glmnet_obj,
                               lambda_val,
-                              selection_data, ## X = \Sigma^{-1/2}, Y = \Sigma^{-1/2}\hat\beta
+                              selection_data, ## X = \Sigma^{-1/2}, Y = \Sigma^{-1/2}\hat\beta^{(b)}
                               full_data,
-                            #   C_full, ## bootstrap variance
+                              C_full, ## bootstrap variance
                               alpha,
                               level=.9):
 
     fixed_lambda = fixed_lambda_estimator(glmnet_obj, lambda_val)
+    fixed_lambda.fit_intercept = False
+    gaussian = sm.families.Gaussian()
+    fixed_lambda.family = gaussian
     X_sel, Y_sel, weight_sel = selection_data
 
     if weight_sel is None:
@@ -163,8 +172,7 @@ def lasso_bootstrap_inference(glmnet_obj,
     # unreg_sel_LM.summarize = True
     # unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
     # C_sel = unreg_sel_LM.covariance_
-    C_full = np.eye(len(active_set))
-    C_sel = (1 + alpha) * C_full
+    # C_full = np.eye(len(active_set))
 
     state = FL.state_
     information = FL._family.information(state,
@@ -189,10 +197,14 @@ def lasso_bootstrap_inference(glmnet_obj,
         delta = np.zeros(keep.sum())
         delta[1:] = weight_sel.sum() * lambda_val * penfac[1:] * signs[1:]
     else:
-        keep[active_set] = 1
+        keep[1 + active_set] = 1
         stacked = state.coef[active_set]
         delta = weight_sel.sum() * lambda_val * penfac * signs
 
+    C_full_ = C_full.copy()
+    C_full = C_full[keep[1:]][:,keep[1:]]
+    C_sel = (1 + alpha) * C_full
+
     delta = C_sel @ delta
     noisy_mle = stacked + delta
 
@@ -225,6 +237,7 @@ def lasso_bootstrap_inference(glmnet_obj,
     for i in range(len(noisy_mle)):
         e_i = np.zeros_like(noisy_mle)
         e_i[i] = 1.
+        # e_i = np.linalg.pinv(X_sel) @ e_i
         ## call selection_interval and return
         L, U, mle, p = selection_interval(
             support_directions=sel_P,
@@ -240,9 +253,11 @@ def lasso_bootstrap_inference(glmnet_obj,
         Us[i] = U
         mles[i] = mle
         pvals[i] = p
-        
-    return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us})
 
+    idx = (active_set).tolist()
+    if FL.fit_intercept:
+        idx = [-1] + idx
+    return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
 
 class constraints(object):
 

From 25b0a449f916858c8dfce587babc2047ea8092ca Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 10 Jan 2024 16:50:42 -0800
Subject: [PATCH 18/44] WIP: selectinf, trying to match onestep and OLS
 estimator

---
 glmnet/glm.py             | 15 ++++++--
 glmnet/glmnet.py          |  7 +++-
 glmnet/inference.py       | 77 ++++++++++++++++++++++++++++-----------
 glmnet/regularized_glm.py | 11 +++++-
 4 files changed, 82 insertions(+), 28 deletions(-)

diff --git a/glmnet/glm.py b/glmnet/glm.py
index 30a4be5..a658700 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -109,6 +109,7 @@ def null_fit(self,
                              self,
                              offset,
                              None)
+
         else:
             state = GLMState(np.zeros(1), 0)
             state.link_parameter = offset
@@ -160,7 +161,7 @@ def get_response_and_weights(self,
 
         return pseudo_response, newton_weights
         
-    def default_scorers(self):
+    def _default_scorers(self):
 
         fam_name = self.base.__class__.__name__
 
@@ -383,7 +384,7 @@ class GLMBase(BaseEstimator,
               GLMBaseSpec):
 
     def _get_regularizer(self,
-                         X):
+                         nvars=None):
         return GLMRegularizer(fit_intercept=self.fit_intercept)
 
     # no standardization for GLM
@@ -415,6 +416,7 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 4 options non sklearn API
+            warm_state=None,
             dispersion=1,
             check=True,
             fit_null=True):
@@ -454,9 +456,12 @@ def fit(self,
         # the regularizer stores the warm start
 
         if regularizer is None:
-            regularizer = self._get_regularizer(X)
+            regularizer = self._get_regularizer(nvars=design.X.shape[0])
         self.regularizer_ = regularizer
 
+        if warm_state is not None:
+            self.regularizer_.warm_state = warm_state
+            
         if fit_null or not hasattr(self.regularizer_, 'warm_state'):
             (null_state,
              self.null_deviance_) = self._family.get_null_deviance(
@@ -642,6 +647,7 @@ def fit(self,
             y,
             sample_weight=None,
             regularizer=None,             # last 4 options non sklearn API
+            warm_state=None,
             dispersion=1,
             check=True):
 
@@ -649,6 +655,7 @@ def fit(self,
                     y,
                     sample_weight=sample_weight,
                     regularizer=regularizer,
+                    warm_state=warm_state,
                     dispersion=dispersion,
                     check=check)
 
@@ -736,6 +743,7 @@ def fit(self,
             y,
             sample_weight=None,
             regularizer=None,             # last 4 options non sklearn API
+            warm_state=None,
             dispersion=1,
             check=True):
 
@@ -750,6 +758,7 @@ def fit(self,
                            y,
                            sample_weight=sample_weight,
                            regularizer=regularizer,             # last 4 options non sklearn API
+                           warm_state=warm_state,
                            dispersion=dispersion,
                            check=check)
 
diff --git a/glmnet/glmnet.py b/glmnet/glmnet.py
index 0ed19aa..c640df6 100644
--- a/glmnet/glmnet.py
+++ b/glmnet/glmnet.py
@@ -89,6 +89,7 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 3 options non sklearn API
+            warm_state=None,
             interpolation_grid=None):
 
         if not hasattr(self, "_family"):
@@ -124,7 +125,11 @@ def fit(self,
                                exclude=self.exclude
                                )
 
-        self.reg_glm_est_.fit(X, y, None, fit_null=False) 
+        self.reg_glm_est_.fit(X,
+                              y,
+                              None,
+                              fit_null=False,
+                              warm_state=warm_state) 
         regularizer_ = self.reg_glm_est_.regularizer_
 
         state, keep_ = self._get_initial_state(X,
diff --git a/glmnet/inference.py b/glmnet/inference.py
index ac9a2fb..182d43b 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -21,15 +21,26 @@
 def fixed_lambda_estimator(glmnet_obj,
                            lambda_val):
     check_is_fitted(glmnet_obj, ["coefs_", "feature_names_in_"])
-    estimator = clone(glmnet_obj.reg_glm_est_)
-    estimator.lambda_val = lambda_val
+    estimator = glmnet_obj.regularized_estimator(
+                           lambda_val=lambda_val,
+                           family=glmnet_obj.family,
+                           alpha=glmnet_obj.alpha,
+                           penalty_factor=glmnet_obj.penalty_factor,
+                           lower_limits=glmnet_obj.lower_limits,
+                           upper_limits=glmnet_obj.upper_limits,
+                           fit_intercept=glmnet_obj.fit_intercept,
+                           standardize=glmnet_obj.standardize,
+                           control=glmnet_obj.control,
+                           offset_id=glmnet_obj.offset_id,
+                           weight_id=glmnet_obj.weight_id,            
+                           response_id=glmnet_obj.response_id,
+                           exclude=glmnet_obj.exclude
+                           )
+
     coefs, intercepts = glmnet_obj.interpolate_coefs([lambda_val])
     cls = glmnet_obj.state_.__class__
     state = cls(coefs[0], intercepts[0])
-    estimator.regularizer_ = glmnet_obj.reg_glm_est_.regularizer_
-    estimator.regularizer_.warm_state = state
-
-    return estimator
+    return estimator, state
 
 def lasso_inference(glmnet_obj,
                     lambda_val,
@@ -37,17 +48,21 @@ def lasso_inference(glmnet_obj,
                     full_data,
                     level=.9):
 
-    fixed_lambda = fixed_lambda_estimator(glmnet_obj, lambda_val)
+    fixed_lambda, warm_state = fixed_lambda_estimator(glmnet_obj, lambda_val)
     X_sel, Y_sel, weight_sel = selection_data
 
     if weight_sel is None:
         weight_sel = np.ones(X_sel.shape[0])
         
-    fixed_lambda.fit(X_sel, Y_sel, sample_weight=weight_sel)
+    fixed_lambda.fit(X_sel,
+                     Y_sel,
+                     sample_weight=weight_sel,
+                     warm_state=warm_state)
+
     FL = fixed_lambda # shorthand
     
-    if (not np.all(FL.upper_limits == np.inf) or
-        not np.all(FL.lower_limits == -np.inf)):
+    if (not np.all(FL.upper_limits >= FL.control.big) or
+        not np.all(FL.lower_limits <= -FL.control.big)):
         raise NotImplementedError('upper/lower limits coming soon')
 
     active_set = np.nonzero(FL.coef_ != 0)[0]
@@ -59,10 +74,6 @@ def lasso_inference(glmnet_obj,
     unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
     C_sel = unreg_sel_LM.covariance_
 
-    state = FL.state_
-    information = FL._family.information(state,
-                                         weight_sel)
-    
     keep = np.zeros(FL.design_.shape[1], bool)
     if hasattr(FL, 'penalty_factors'):
         penfac = FL.penalty_factors[active_set]
@@ -77,24 +88,33 @@ def lasso_inference(glmnet_obj,
         signs = np.hstack([0, signs])
         keep[0] = 1
         keep[1 + active_set] = 1
-        stacked = np.hstack([state.intercept,
-                             state.coef[active_set]])
-        delta = np.zeros(keep.sum())
-        delta[1:] = weight_sel.sum() * lambda_val * penfac[1:] * signs[1:]
+        stacked = np.hstack([FL.intercept_,
+                             FL.coef_[active_set]])
+        delta = weight_sel.sum() * lambda_val * penfac * signs
+        print(penfac)
     else:
         # keep[active_set] = 1
         keep[1 + active_set] = 1
-        stacked = state.coef[active_set]
+        stacked = FL.coef_[active_set]
         delta = weight_sel.sum() * lambda_val * penfac * signs
 
-    delta = C_sel @ delta
+    print(active_set, 'huh')
+    
+    D = np.column_stack([np.ones(X_sel.shape[0]), X_sel[:,active_set]])
+    other_mle = np.linalg.pinv(D) @ Y_sel
+    A_sel = C_sel / unreg_sel_LM.dispersion_
+    print(np.linalg.norm(A_sel - np.linalg.inv(unreg_sel_LM.design_.quadratic_form(unreg_sel_LM._information))), 'normie')
+    delta = A_sel @ delta
     noisy_mle = stacked + delta
 
     penalized = penfac > 0
-    sel_P = -np.diag(signs[penalized]) @ np.eye(C_sel.shape[0])[penalized]
+    sel_P = -np.diag(signs[penalized]) @ np.eye(A_sel.shape[0])[penalized]
 
     # fit full model
 
+    subgrad1 = X_sel[:,active_set].T @ (Y_sel - FL.design_ @ np.hstack([FL.intercept_, FL.coef_]))
+    print(subgrad1, 'subgrad1')
+
     X_full, Y_full, weight_full = full_data
 
     unreg_LM = glmnet_obj.get_LM()
@@ -111,6 +131,12 @@ def lasso_inference(glmnet_obj,
     else:
         full_mle = unreg_LM.coef_
 
+    print(other_mle, 'other')
+    print(noisy_mle, 'noisy')
+    print(full_mle, 'full')
+
+    stop
+    
     ## iterate over coordinates
     Ls = np.zeros_like(noisy_mle)
     Us = np.zeros_like(noisy_mle)
@@ -783,6 +809,8 @@ def interval_constraints(support_directions,
                      direction_of_interest)
 
     U = A.dot(X) - b
+    print(U, np.fabs(U).max()* tol, b, '?')
+
     if not np.all(U  < tol * np.fabs(U).max()):
         warn('constraints not satisfied: %s' % repr(U))
 
@@ -810,6 +838,7 @@ def interval_constraints(support_directions,
     else:
         lower_bound = -np.inf
 
+    print(lower_bound, V, upper_bound, sigma)
     return lower_bound, V, upper_bound, sigma
 
 def selection_interval(support_directions, 
@@ -865,7 +894,7 @@ def selection_interval(support_directions,
     """
 
     (lower_bound,
-     _,
+     V,
      upper_bound,
      _) = interval_constraints(support_directions, 
                                 support_offsets,
@@ -878,7 +907,10 @@ def selection_interval(support_directions,
     ## sqrt(alpha) is just sigma_noisy - sigma_full
     smoothing_sigma = np.sqrt(max(direction_of_interest.T @ covariance_noisy @ direction_of_interest - sigma**2, 0))
     estimate = (direction_of_interest * observation).sum()
+    noisy_estimate = (direction_of_interest * noisy_observation).sum()
 
+    print(V, estimate, noisy_estimate, 'huh')
+    
     if smoothing_sigma > 1e-6 * sigma:
 
         grid = np.linspace(estimate - 20 * sigma, estimate + 20 * sigma, 801)
@@ -906,6 +938,7 @@ def selection_interval(support_directions,
         ub = estimate + 20 * sigma
 
         if estimate < lower_bound or estimate > upper_bound:
+            print(estimate, lower_bound, upper_bound)
             warn('Constraints not satisfied: returning [-np.inf, np.inf]')
             return -np.inf, np.inf, np.nan, np.nan
         
diff --git a/glmnet/regularized_glm.py b/glmnet/regularized_glm.py
index f21bcb6..3cda87a 100644
--- a/glmnet/regularized_glm.py
+++ b/glmnet/regularized_glm.py
@@ -149,17 +149,21 @@ def get_LM(self):
                    response_id=self.response_id)
 
     def _get_regularizer(self,
-                         X):
+                         nvars=None):
 
         # self.design_ will have been set by now
 
+        if nvars is None:
+            check_is_fitted(self, ["design_"])
+            nvars = self.design_.X.shape[1]
+
         return ElNetRegularizer(lambda_val=self.lambda_val,
                                 alpha=self.alpha,
                                 penalty_factor=self.penalty_factor,
                                 lower_limits=self.lower_limits * self.design_.scaling_,
                                 upper_limits=self.upper_limits * self.design_.scaling_,
                                 fit_intercept=self.fit_intercept,
-                                nvars=X.shape[1],
+                                nvars=nvars,
                                 control=self.control,
                                 exclude=self.exclude)
 
@@ -177,6 +181,7 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 3 options non sklearn API
+            warm_state=None,
             check=True,
             fit_null=True):
 
@@ -184,6 +189,7 @@ def fit(self,
                     y,
                     sample_weight=sample_weight,
                     regularizer=regularizer,
+                    warm_state=warm_state,
                     dispersion=1,
                     check=check)
 
@@ -220,6 +226,7 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 4 options non sklearn API
+            warm_state=None,
             dispersion=1,
             check=True):
 

From de7db402ca3f6568b1aa3745564dd4da21fb89a7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 18 Jan 2024 12:22:59 -0800
Subject: [PATCH 19/44] some pyproject.toml work

---
 .gitattributes     |   1 +
 glmnet/_version.py | 127 +++++++++++++++++++++++++++++----------------
 pyproject.toml     |   8 +++
 setup.py           |   5 +-
 4 files changed, 95 insertions(+), 46 deletions(-)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..8edfacf
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+glmnet/_version.py export-subst
diff --git a/glmnet/_version.py b/glmnet/_version.py
index 9b01ea2..3f737f4 100644
--- a/glmnet/_version.py
+++ b/glmnet/_version.py
@@ -5,8 +5,9 @@
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.
 
-# This file is released into the public domain. Generated by
-# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer
 
 """Git implementation of _version.py."""
 
@@ -15,10 +16,11 @@
 import re
 import subprocess
 import sys
-from typing import Callable, Dict
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools
 
 
-def get_keywords():
+def get_keywords() -> Dict[str, str]:
     """Get the keywords needed to look up the version information."""
     # these strings will be replaced by git during git-archive.
     # setup.py/versioneer.py will grep for the variable names, so they must
@@ -34,17 +36,24 @@ def get_keywords():
 class VersioneerConfig:
     """Container for Versioneer configuration parameters."""
 
+    VCS: str
+    style: str
+    tag_prefix: str
+    parentdir_prefix: str
+    versionfile_source: str
+    verbose: bool
 
-def get_config():
+
+def get_config() -> VersioneerConfig:
     """Create, populate and return the VersioneerConfig() object."""
     # these strings are filled in when 'setup.py versioneer' creates
     # _version.py
     cfg = VersioneerConfig()
     cfg.VCS = "git"
     cfg.style = "pep440"
-    cfg.tag_prefix = ""
-    cfg.parentdir_prefix = "ISLP-"
-    cfg.versionfile_source = "ISLP/_version.py"
+    cfg.tag_prefix = "v"
+    cfg.parentdir_prefix = "glmnet-"
+    cfg.versionfile_source = "glmnet/_version.py"
     cfg.verbose = False
     return cfg
 
@@ -57,9 +66,9 @@ class NotThisMethod(Exception):
 HANDLERS: Dict[str, Dict[str, Callable]] = {}
 
 
-def register_vcs_handler(vcs, method):  # decorator
+def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
     """Create decorator to mark a method as the handler of a VCS."""
-    def decorate(f):
+    def decorate(f: Callable) -> Callable:
         """Store f in HANDLERS[vcs][method]."""
         if vcs not in HANDLERS:
             HANDLERS[vcs] = {}
@@ -68,11 +77,25 @@ def decorate(f):
     return decorate
 
 
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
+def run_command(
+    commands: List[str],
+    args: List[str],
+    cwd: Optional[str] = None,
+    verbose: bool = False,
+    hide_stderr: bool = False,
+    env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
     """Call the given command(s)."""
     assert isinstance(commands, list)
     process = None
+
+    popen_kwargs: Dict[str, Any] = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
     for command in commands:
         try:
             dispcmd = str([command] + args)
@@ -80,10 +103,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
             process = subprocess.Popen([command] + args, cwd=cwd, env=env,
                                        stdout=subprocess.PIPE,
                                        stderr=(subprocess.PIPE if hide_stderr
-                                               else None))
+                                               else None), **popen_kwargs)
             break
-        except OSError:
-            e = sys.exc_info()[1]
+        except OSError as e:
             if e.errno == errno.ENOENT:
                 continue
             if verbose:
@@ -103,7 +125,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
     return stdout, process.returncode
 
 
-def versions_from_parentdir(parentdir_prefix, root, verbose):
+def versions_from_parentdir(
+    parentdir_prefix: str,
+    root: str,
+    verbose: bool,
+) -> Dict[str, Any]:
     """Try to determine the version from the parent directory name.
 
     Source tarballs conventionally unpack into a directory that includes both
@@ -128,13 +154,13 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
 
 
 @register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
     """Extract version information from the given file."""
     # the code embedded in _version.py can just fetch the value of these
     # keywords. When used from setup.py, we don't want to import _version.py,
     # so we do it with a regexp instead. This function is not used from
     # _version.py.
-    keywords = {}
+    keywords: Dict[str, str] = {}
     try:
         with open(versionfile_abs, "r") as fobj:
             for line in fobj:
@@ -156,7 +182,11 @@ def git_get_keywords(versionfile_abs):
 
 
 @register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
+def git_versions_from_keywords(
+    keywords: Dict[str, str],
+    tag_prefix: str,
+    verbose: bool,
+) -> Dict[str, Any]:
     """Get version information from git keywords."""
     if "refnames" not in keywords:
         raise NotThisMethod("Short version file found")
@@ -220,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
 
 
 @register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+def git_pieces_from_vcs(
+    tag_prefix: str,
+    root: str,
+    verbose: bool,
+    runner: Callable = run_command
+) -> Dict[str, Any]:
     """Get version from 'git describe' in the root of the source tree.
 
     This only gets called if the git-archive 'subst' keywords were *not*
@@ -228,13 +263,18 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     version string, meaning we're inside a checked out source tree.
     """
     GITS = ["git"]
-    TAG_PREFIX_REGEX = "*"
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-        TAG_PREFIX_REGEX = r"\*"
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
 
     _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                   hide_stderr=True)
+                   hide_stderr=not verbose)
     if rc != 0:
         if verbose:
             print("Directory %s not under git control" % root)
@@ -242,11 +282,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
 
     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
     # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
-                                     "--always", "--long",
-                                     "--match",
-                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
-                              cwd=root)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
     # --long was added in git-1.5.5
     if describe_out is None:
         raise NotThisMethod("'git describe' failed")
@@ -256,7 +295,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
         raise NotThisMethod("'git rev-parse' failed")
     full_out = full_out.strip()
 
-    pieces = {}
+    pieces: Dict[str, Any] = {}
     pieces["long"] = full_out
     pieces["short"] = full_out[:7]  # maybe improved later
     pieces["error"] = None
@@ -335,8 +374,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     else:
         # HEX: no tags
         pieces["closest-tag"] = None
-        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
 
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
     date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
@@ -348,14 +387,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     return pieces
 
 
-def plus_or_dot(pieces):
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
     """Return a + if we don't already have one, else return a ."""
     if "+" in pieces.get("closest-tag", ""):
         return "."
     return "+"
 
 
-def render_pep440(pieces):
+def render_pep440(pieces: Dict[str, Any]) -> str:
     """Build up version string, with post-release "local version identifier".
 
     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
@@ -380,7 +419,7 @@ def render_pep440(pieces):
     return rendered
 
 
-def render_pep440_branch(pieces):
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
     """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
 
     The ".dev0" means not master branch. Note that .dev0 sorts backwards
@@ -410,7 +449,7 @@ def render_pep440_branch(pieces):
     return rendered
 
 
-def pep440_split_post(ver):
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
     """Split pep440 version string at the post-release segment.
 
     Returns the release segments before the post-release and the
@@ -420,7 +459,7 @@ def pep440_split_post(ver):
     return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
 
 
-def render_pep440_pre(pieces):
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
     """TAG[.postN.devDISTANCE] -- No -dirty.
 
     Exceptions:
@@ -432,7 +471,7 @@ def render_pep440_pre(pieces):
             tag_version, post_version = pep440_split_post(pieces["closest-tag"])
             rendered = tag_version
             if post_version is not None:
-                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
             else:
                 rendered += ".post0.dev%d" % (pieces["distance"])
         else:
@@ -444,7 +483,7 @@ def render_pep440_pre(pieces):
     return rendered
 
 
-def render_pep440_post(pieces):
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]+gHEX] .
 
     The ".dev0" means dirty. Note that .dev0 sorts backwards
@@ -471,7 +510,7 @@ def render_pep440_post(pieces):
     return rendered
 
 
-def render_pep440_post_branch(pieces):
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
 
     The ".dev0" means not master branch.
@@ -500,7 +539,7 @@ def render_pep440_post_branch(pieces):
     return rendered
 
 
-def render_pep440_old(pieces):
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]] .
 
     The ".dev0" means dirty.
@@ -522,7 +561,7 @@ def render_pep440_old(pieces):
     return rendered
 
 
-def render_git_describe(pieces):
+def render_git_describe(pieces: Dict[str, Any]) -> str:
     """TAG[-DISTANCE-gHEX][-dirty].
 
     Like 'git describe --tags --dirty --always'.
@@ -542,7 +581,7 @@ def render_git_describe(pieces):
     return rendered
 
 
-def render_git_describe_long(pieces):
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
     """TAG-DISTANCE-gHEX[-dirty].
 
     Like 'git describe --tags --dirty --always -long'.
@@ -562,7 +601,7 @@ def render_git_describe_long(pieces):
     return rendered
 
 
-def render(pieces, style):
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
     """Render the given version pieces into the requested style."""
     if pieces["error"]:
         return {"version": "unknown",
@@ -598,7 +637,7 @@ def render(pieces, style):
             "date": pieces.get("date")}
 
 
-def get_versions():
+def get_versions() -> Dict[str, Any]:
     """Get version information or return default if unable to do so."""
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
diff --git a/pyproject.toml b/pyproject.toml
index 978dbe5..a6be16e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,3 +42,11 @@ dynamic = ['version']
 [project.license]
 file = 'LICENSE.txt'
 
+[tool.versioneer]
+VCS = "git"
+style = "pep440"
+versionfile_source = "glmnet/_version.py"
+versionfile_build = "glmnet/_version.py"
+tag_prefix = "v"
+parentdir_prefix = "glmnet-"
+
diff --git a/setup.py b/setup.py
index e4ca64a..2f42b2c 100644
--- a/setup.py
+++ b/setup.py
@@ -43,9 +43,10 @@
                                                    'gaussnet',
                                                    'multigaussnet']]
 
+long_description = open('README.md', 'rt', encoding='utf-8').read()
+
 def main(**extra_args):
     setup(name='glmnet',
-          url="http://github.org/intro-stat-learning/glmnetpy",
           version=versioneer.get_version(),
           packages = ['glmnet',
                       'glmnet.paths'],
@@ -54,7 +55,7 @@ def main(**extra_args):
           include_package_data=True,
           data_files=[],
           scripts=[],
-          #long_description=long_description,
+          long_description=long_description,
           long_description_content_type=long_description_content_type,
           cmdclass = cmdclass,
           **extra_args

From aade7e61ea6a8aabe1cec8a5b56747ff5c561ddc Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 25 Jan 2024 14:48:33 -0800
Subject: [PATCH 20/44] WIP: inference

---
 tests/flex/__init__.py  | 0
 tests/paths/__init__.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/flex/__init__.py
 create mode 100644 tests/paths/__init__.py

diff --git a/tests/flex/__init__.py b/tests/flex/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/paths/__init__.py b/tests/paths/__init__.py
new file mode 100644
index 0000000..e69de29

From af85169fadd127dad95995fe7f249f879d21d967 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 25 Jan 2024 14:51:35 -0800
Subject: [PATCH 21/44] rename _check to get_data_arrays

---
 glmnet/cox.py                 | 21 ++++++++++++---------
 glmnet/glm.py                 | 13 +++++++------
 glmnet/glmnet.py              | 14 +++++++-------
 glmnet/paths/fastnet.py       |  7 ++++---
 glmnet/paths/fishnet.py       |  7 +++++--
 glmnet/paths/lognet.py        |  7 +++++--
 glmnet/paths/multiclassnet.py |  6 +++---
 glmnet/paths/multigaussnet.py | 11 ++++++++---
 8 files changed, 51 insertions(+), 35 deletions(-)

diff --git a/glmnet/cox.py b/glmnet/cox.py
index 444572e..a23a459 100644
--- a/glmnet/cox.py
+++ b/glmnet/cox.py
@@ -190,10 +190,10 @@ def _get_family_spec(self,
                              status_id=self.family.status_id,
                              start_id=self.family.start_id)
 
-    def _check(self,
-               X,
-               y,
-               check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
         return _get_data(self,
                          X,
                          y,
@@ -243,7 +243,10 @@ class RegCoxLM(RegGLM):
     
     fit_intercept: Literal[False] = False
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
         return _get_data(self,
                          X,
                          y,
@@ -268,10 +271,10 @@ class CoxNet(GLMNet):
     fit_intercept: Literal[False] = False
     regularized_estimator: BaseEstimator = RegCoxLM
     
-    def _check(self,
-               X,
-               y,
-               check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
         return _get_data(self,
                          X,
                          y,
diff --git a/glmnet/glm.py b/glmnet/glm.py
index a658700..3873cbc 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -403,7 +403,7 @@ def _get_family_spec(self,
         elif isinstance(self.family, GLMFamilySpec):
             return self.family
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self, X, y, check=True):
         return _get_data(self,
                          X,
                          y,
@@ -431,7 +431,7 @@ def fit(self,
         if not hasattr(self, "_family"):
             self._family = self._get_family_spec(y)
 
-        X, y, response, offset, weight = self._check(X, y, check=check)
+        X, y, response, offset, weight = self.get_data_arrays(X, y, check=check)
 
         sample_weight = weight
         self.sample_weight_ = normed_sample_weight = sample_weight / sample_weight.sum()
@@ -456,7 +456,7 @@ def fit(self,
         # the regularizer stores the warm start
 
         if regularizer is None:
-            regularizer = self._get_regularizer(nvars=design.X.shape[0])
+            regularizer = self._get_regularizer(nvars=design.X.shape[1])
         self.regularizer_ = regularizer
 
         if warm_state is not None:
@@ -542,6 +542,7 @@ def obj_function(response,
         else:
             self.dispersion_ = dispersion
 
+        self.state_ = state
         return self
     fit.__doc__ = '''
 Fit a GLM.
@@ -659,7 +660,7 @@ def fit(self,
                     dispersion=dispersion,
                     check=check)
 
-        weight = self._check(X, y, check=False)[-1]
+        weight = self.get_data_arrays(X, y, check=False)[-1]
             
         if self.summarize:
             self.covariance_, self.summary_ = self._summarize(self.exclude,
@@ -728,9 +729,9 @@ class BinomialGLM(ClassifierMixin, GLM):
 
     family: sm_family.Family = field(default_factory=sm_family.Binomial)
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self, X, y, check=True):
 
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+        X, y, response, offset, weight = super().get_data_arrays(X, y, check=check)
         encoder = LabelEncoder()
         labels = np.asfortranarray(encoder.fit_transform(response))
         self.classes_ = encoder.classes_
diff --git a/glmnet/glmnet.py b/glmnet/glmnet.py
index c640df6..7a2ed39 100644
--- a/glmnet/glmnet.py
+++ b/glmnet/glmnet.py
@@ -66,10 +66,10 @@ class GLMNetSpec(object):
 class GLMNet(BaseEstimator,
              GLMNetSpec):
 
-    def _check(self,
-               X,
-               y,
-               check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
         return _get_data(self,
                          X,
                          y,
@@ -95,7 +95,7 @@ def fit(self,
         if not hasattr(self, "_family"):
             self._family = self._get_family_spec(y)
 
-        X, y, response, offset, weight = self._check(X, y)
+        X, y, response, offset, weight = self.get_data_arrays(X, y)
 
         if isinstance(X, pd.DataFrame):
             self.feature_names_in_ = list(X.columns)
@@ -315,7 +315,7 @@ def cross_validation_path(self,
         # truncate to the size we got
         predictions = predictions[:,:self.lambda_values_.shape[0]]
 
-        response, offset, weight = self._check(X, y, check=False)[2:]
+        response, offset, weight = self.get_data_arrays(X, y, check=False)[2:]
 
         # adjust for offset
         # because predictions are just X\beta
@@ -464,7 +464,7 @@ def _get_initial_state(self,
             intercept_ = glm.intercept_
         else:
             if self.fit_intercept:
-                response, offset, weight = self._check(X, y, check=False)[2:]
+                response, offset, weight = self.get_data_arrays(X, y, check=False)[2:]
                 state0 = self._family.null_fit(response,
                                                weight,
                                                offset,
diff --git a/glmnet/paths/fastnet.py b/glmnet/paths/fastnet.py
index e3f8c0f..248336f 100644
--- a/glmnet/paths/fastnet.py
+++ b/glmnet/paths/fastnet.py
@@ -62,7 +62,7 @@ def fit(self,
         else:
             self.feature_names_in_ = ['X{}'.format(i) for i in range(X.shape[1])]
 
-        X, y, response, offset, weight = self._check(X, y)
+        X, y, response, offset, weight = self.get_data_arrays(X, y)
 
         if not scipy.sparse.issparse(X):
             X = np.asfortranarray(X)
@@ -129,8 +129,9 @@ def fit(self,
         self.coefs_ = result['coefs']
         self.intercepts_ = result['intercepts']
             
-        self.state_ = GLMState(self.coefs_[-1],
-                               self.intercepts_[-1])
+        if self.coefs_.ndim == 1:
+            self.state_ = GLMState(self.coefs_[-1],
+                                   self.intercepts_[-1])
 
         self.lambda_values_ = result['lambda_values']
         nfits = self.lambda_values_.shape[0]
diff --git a/glmnet/paths/fishnet.py b/glmnet/paths/fishnet.py
index e638c6f..da28123 100644
--- a/glmnet/paths/fishnet.py
+++ b/glmnet/paths/fishnet.py
@@ -28,9 +28,12 @@ class FishNet(FastNetMixin):
     def __post_init__(self):
         self._family = GLMFamilySpec(base=sm_family.Poisson())
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
 
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+        X, y, response, offset, weight = super().get_data_arrays(X, y, check=check)
         if np.any(response < 0):
             raise ValueError("negative responses encountered;  not permitted for Poisson family")
         response = np.asarray(response, float).copy()
diff --git a/glmnet/paths/lognet.py b/glmnet/paths/lognet.py
index bdd4dfd..b783ec1 100644
--- a/glmnet/paths/lognet.py
+++ b/glmnet/paths/lognet.py
@@ -41,9 +41,12 @@ def _extract_fits(self,
         V = super()._extract_fits(X_shape, response_shape)
         return V
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
 
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+        X, y, response, offset, weight = super().get_data_arrays(X, y, check=check)
         encoder = LabelEncoder()
         labels = np.asfortranarray(encoder.fit_transform(response))
         self.classes_ = encoder.classes_
diff --git a/glmnet/paths/multiclassnet.py b/glmnet/paths/multiclassnet.py
index 676831c..05486e4 100644
--- a/glmnet/paths/multiclassnet.py
+++ b/glmnet/paths/multiclassnet.py
@@ -26,7 +26,7 @@
 @dataclass
 class MultiClassFamily(object):
 
-    def default_scorers(self):
+    def _default_scorers(self):
 
         return [accuracy_scorer,
                 misclass_scorer,
@@ -68,9 +68,9 @@ def _offset_predictions(self,
         value = exp_value / exp_value.sum(-1)[:,:,None]
         return value
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self, X, y, check=True):
 
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+        X, y, response, offset, weight = super().get_data_arrays(X, y, check=check)
         encoder = OneHotEncoder(sparse_output=False)
         y_onehot = np.asfortranarray(encoder.fit_transform(response.reshape((-1,1))))
         self.categories_ = encoder.categories_[0]
diff --git a/glmnet/paths/multigaussnet.py b/glmnet/paths/multigaussnet.py
index 7f30c38..b2a3943 100644
--- a/glmnet/paths/multigaussnet.py
+++ b/glmnet/paths/multigaussnet.py
@@ -25,7 +25,7 @@
 @dataclass
 class MultiClassFamily(object):
 
-    def default_scorers(self):
+    def _default_scorers(self):
         return [mse_scorer, mae_scorer]
 
 @dataclass
@@ -49,8 +49,13 @@ def _extract_fits(self,
         self._fit['dev'] = self._fit['rsq']
         return super()._extract_fits(X_shape, response_shape)
 
-    def _check(self, X, y, check=True):
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
+        X, y, response, offset, weight = super().get_data_arrays(X,
+                                                                 y,
+                                                                 check=check)
         return X, y, np.asfortranarray(response), offset, weight
 
     def _wrapper_args(self,

From 302e8b2ddd210aa5139f6a65c7badefdcd42e1a8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 25 Jan 2024 17:53:09 -0800
Subject: [PATCH 22/44] WIP: inference

---
 glmnet/inference.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 182d43b..c42b33b 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -50,7 +50,10 @@ def lasso_inference(glmnet_obj,
 
     fixed_lambda, warm_state = fixed_lambda_estimator(glmnet_obj, lambda_val)
     X_sel, Y_sel, weight_sel = selection_data
-
+    X_sel, Y_sel, _, _, weight_sel = fixed_lambda.get_data_arrays(X_sel,
+                                                                  Y_sel)
+    Y_sel = np.asarray(Y_sel)
+    
     if weight_sel is None:
         weight_sel = np.ones(X_sel.shape[0])
         

From e07bf4647804b70cb412436e6e24be8ff55e1a02 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 26 Jan 2024 14:57:01 -0800
Subject: [PATCH 23/44] using score of family, seems to have good coverage
 under global null

---
 glmnet/cox.py           |  3 ++-
 glmnet/glm.py           |  1 +
 glmnet/inference.py     | 26 ++++++------------
 tests/test_inference.py | 58 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 19 deletions(-)
 create mode 100644 tests/test_inference.py

diff --git a/glmnet/cox.py b/glmnet/cox.py
index a23a459..10335dd 100644
--- a/glmnet/cox.py
+++ b/glmnet/cox.py
@@ -57,11 +57,12 @@ def logl_score(self,
                    family,
                    y,
                    sample_weight):
+
         link_parameter = self.link_parameter
         family._result = family._coxdev(link_parameter,
                                         sample_weight)
         # the gradient is the gradient of the deviance
-        # we want deviance of the log-likelihood
+        # we want gradient of the log-likelihood
         return - family._result.gradient / 2
 
 @dataclass
diff --git a/glmnet/glm.py b/glmnet/glm.py
index 3873cbc..9992509 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -305,6 +305,7 @@ def logl_score(self,
         dmu_deta = family.link.inverse_deriv(self.link_parameter)
         
         # compute working residual
+        y = np.asarray(y).reshape(-1)
         r = (y - self.mu) 
         return sample_weight * r * dmu_deta / varmu 
 
diff --git a/glmnet/inference.py b/glmnet/inference.py
index c42b33b..df36cb7 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -52,8 +52,6 @@ def lasso_inference(glmnet_obj,
     X_sel, Y_sel, weight_sel = selection_data
     X_sel, Y_sel, _, _, weight_sel = fixed_lambda.get_data_arrays(X_sel,
                                                                   Y_sel)
-    Y_sel = np.asarray(Y_sel)
-    
     if weight_sel is None:
         weight_sel = np.ones(X_sel.shape[0])
         
@@ -94,19 +92,16 @@ def lasso_inference(glmnet_obj,
         stacked = np.hstack([FL.intercept_,
                              FL.coef_[active_set]])
         delta = weight_sel.sum() * lambda_val * penfac * signs
-        print(penfac)
     else:
         # keep[active_set] = 1
         keep[1 + active_set] = 1
         stacked = FL.coef_[active_set]
         delta = weight_sel.sum() * lambda_val * penfac * signs
 
-    print(active_set, 'huh')
-    
     D = np.column_stack([np.ones(X_sel.shape[0]), X_sel[:,active_set]])
     other_mle = np.linalg.pinv(D) @ Y_sel
     A_sel = C_sel / unreg_sel_LM.dispersion_
-    print(np.linalg.norm(A_sel - np.linalg.inv(unreg_sel_LM.design_.quadratic_form(unreg_sel_LM._information))), 'normie')
+
     delta = A_sel @ delta
     noisy_mle = stacked + delta
 
@@ -115,8 +110,10 @@ def lasso_inference(glmnet_obj,
 
     # fit full model
 
-    subgrad1 = X_sel[:,active_set].T @ (Y_sel - FL.design_ @ np.hstack([FL.intercept_, FL.coef_]))
-    print(subgrad1, 'subgrad1')
+    Y_sel = np.asarray(Y_sel)
+
+    # X'(Y-X\hat{\beta}) (appropriately weighted)
+    subgrad1 = FL.design_.T @ FL.state_.logl_score(FL._family, Y_sel, sample_weight=weight_sel)
 
     X_full, Y_full, weight_full = full_data
 
@@ -134,12 +131,6 @@ def lasso_inference(glmnet_obj,
     else:
         full_mle = unreg_LM.coef_
 
-    print(other_mle, 'other')
-    print(noisy_mle, 'noisy')
-    print(full_mle, 'full')
-
-    stop
-    
     ## iterate over coordinates
     Ls = np.zeros_like(noisy_mle)
     Us = np.zeros_like(noisy_mle)
@@ -812,7 +803,7 @@ def interval_constraints(support_directions,
                      direction_of_interest)
 
     U = A.dot(X) - b
-    print(U, np.fabs(U).max()* tol, b, '?')
+    #print(U, np.fabs(U).max()* tol, b, '?')
 
     if not np.all(U  < tol * np.fabs(U).max()):
         warn('constraints not satisfied: %s' % repr(U))
@@ -841,7 +832,6 @@ def interval_constraints(support_directions,
     else:
         lower_bound = -np.inf
 
-    print(lower_bound, V, upper_bound, sigma)
     return lower_bound, V, upper_bound, sigma
 
 def selection_interval(support_directions, 
@@ -912,7 +902,7 @@ def selection_interval(support_directions,
     estimate = (direction_of_interest * observation).sum()
     noisy_estimate = (direction_of_interest * noisy_observation).sum()
 
-    print(V, estimate, noisy_estimate, 'huh')
+    #print(V, estimate, noisy_estimate, 'huh')
     
     if smoothing_sigma > 1e-6 * sigma:
 
@@ -941,7 +931,7 @@ def selection_interval(support_directions,
         ub = estimate + 20 * sigma
 
         if estimate < lower_bound or estimate > upper_bound:
-            print(estimate, lower_bound, upper_bound)
+            #print(estimate, lower_bound, upper_bound)
             warn('Constraints not satisfied: returning [-np.inf, np.inf]')
             return -np.inf, np.inf, np.nan, np.nan
         
diff --git a/tests/test_inference.py b/tests/test_inference.py
new file mode 100644
index 0000000..95f4a19
--- /dev/null
+++ b/tests/test_inference.py
@@ -0,0 +1,58 @@
+import pytest
+
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+
+from glmnet import GLMNet
+from glmnet.inference import (fixed_lambda_estimator,
+                              lasso_inference)
+@pytest.mark.parametrize('n', [500])
+@pytest.mark.parametrize('p', [103])
+@pytest.mark.parametrize('fit_intercept', [True, False])
+def test_inference(n, p, fit_intercept):
+    run_inference(n, p, fit_intercept)
+
+def run_inference(n, p, fit_intercept):
+
+    rng = np.random.default_rng(0)
+    X = rng.standard_normal((n, p))
+    Y = np.random.standard_normal(n) * 2
+    Df = pd.DataFrame({'response':Y})
+
+    fam = sm.families.Gaussian()
+    GN = GLMNet(response_id='response',
+                family=fam,
+                fit_intercept=fit_intercept)
+    GN.fit(X, Df)
+    return lasso_inference(GN, 
+                           GN.lambda_values_[10],
+                           (X[:50], Df.iloc[:50], None),
+                           (X, Df, None))
+
+# ncover = 0
+# nsel = 0
+# niter = 1000
+# for i in range(niter):
+#     GN = GLMNet(response_id='response',
+#                 family=fam,
+#                 control=GNcontrol,
+#                 fit_intercept=False,
+#              )
+    
+#     X = rng.standard_normal((n, p))
+#     Y = rng.standard_normal(n) * 1
+#     Df = pd.DataFrame({'response':Y})
+#     GN.fit(X, Df)
+#     sel_slice = slice(0, 4*n//5)
+#     res = lasso_inference(GN, 
+#                           GN.lambda_values_[10],
+#                           (X[sel_slice], Df.iloc[sel_slice], None),
+#                           (X, Df, None),
+#                           level=0.8)
+
+#     ncover += ((res['lower'] < 0) & (res['upper'] > 0)).sum()
+#     nsel += res.shape[0]
+
+# ncover / nsel
+

From 68180f65e07093c08ec877d4da497b8acdd8e054 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 12 Apr 2024 12:01:28 -0700
Subject: [PATCH 24/44] BF: quadratic form not computed correctly because X was
 modified in place when not sparse, added option for the implied Q of
 transformed or raw

---
 glmnet/base.py       |  65 ++++++++++++++++++---------
 tests/test_design.py | 102 ++++++++++++++++++++++---------------------
 2 files changed, 98 insertions(+), 69 deletions(-)

diff --git a/glmnet/base.py b/glmnet/base.py
index 257fbe0..2517071 100644
--- a/glmnet/base.py
+++ b/glmnet/base.py
@@ -95,8 +95,15 @@ def _rmatvec(self, r):
 
     def quadratic_form(self,
                        G=None,
-                       columns=None):
+                       columns=None,
+                       transformed=False):
         '''
+        if transformed is False: compute
+
+        [1 X]'G[1 X[:,E]]
+
+        if transformed is True: compute
+
         A'GA[:,E]
 
         where A is the effective matrix
@@ -104,6 +111,7 @@ def quadratic_form(self,
         [1, XS^{-1} - 1 (xm/xs)']
 
         and E is a subset of columns
+
         '''
 
         # A is effective matrix
@@ -112,6 +120,9 @@ def quadratic_form(self,
         # A'G1 = S^{-1}(X' - xm 1')G1 = S^{-1}X'G1 - S^{-1}xm 1'G1
         # A'GA = S^{-1}X'GXS^{-1} - S^{-1}X'G1 xm/xs' - xm/xs 1'GXS^{-1} + xm/xs 1'G1 (xm/xs)'
         
+        # or, the reverse
+        # X'GX = SA'GAS + X'G1xm' + xm1'GX - xm1'G1xm'
+
         n, p = self.shape[0], self.shape[1] - 1
         
         if columns is None:
@@ -122,35 +133,49 @@ def quadratic_form(self,
 
         if G is None:
             XX_block = self.X.T @ X_R # have to assume this is not too expensive
+            if scipy.sparse.issparse(self.X): 
+                XX_block = XX_block.toarray()
             X1_block = self.X.sum(0) # X'1
             G_sum = n
-            
+
         else:
             GX = G @ X_R
             G1 = G @ np.ones(G.shape[0])
             XX_block = self.X.T @ GX
             X1_block = self.X.T @ G1
             G_sum = G1.sum()
-            
-        if scipy.sparse.issparse(XX_block):
-            XX_block = XX_block.toarray()
-            
-        # correct XX_block for standardize
-        
+
         xm, xs = self.centers_, self.scaling_
+        if scipy.sparse.issparse(self.X): # in this case X has not been transformed 
 
-        if columns is not None:
-            XX_block -= (np.multiply.outer(X1_block, xm[columns]) + np.multiply.outer(xm, X1_block[columns]))
-            XX_block += np.multiply.outer(xm, xm[columns]) * G_sum
-            XX_block /= np.multiply.outer(xs, xs[columns])
-        else:
-            XX_block -= (np.multiply.outer(X1_block, xm) + np.multiply.outer(xm, X1_block))
-            XX_block += np.multiply.outer(xm, xm) * G_sum
-            XX_block /= np.multiply.outer(xs, xs)
+            # correct XX_block for standardize
+
+            if transformed:
+                if columns is not None:
+                    XX_block -= (np.multiply.outer(X1_block, xm[columns]) + np.multiply.outer(xm, X1_block[columns]))
+                    XX_block += np.multiply.outer(xm, xm[columns]) * G_sum
+                    XX_block /= np.multiply.outer(xs, xs[columns])
+                else:
+                    XX_block -= (np.multiply.outer(X1_block, xm) + np.multiply.outer(xm, X1_block))
+                    XX_block += np.multiply.outer(xm, xm) * G_sum
+                    XX_block /= np.multiply.outer(xs, xs)
+
+                X1_block -= G_sum * xm
+                X1_block /= xs
+
+        else: # X will already have been transformed, so
+              # we only have to undo it if transformed=False
+
+            # or, the reverse
+            # X'GX = SA'GAS + X'G1xm' + xm1'GX - xm1'G1xm'
+
+            if not transformed:
+                XX_block *= np.multiply.outer(xs, xs[columns])
+                X1_block *= xs
+                XX_block += (np.multiply.outer(X1_block, xm[columns]) + np.multiply.outer(xm, X1_block[columns]))
+                XX_block += np.multiply.outer(xm, xm[columns]) * G_sum
+                X1_block += G_sum * xm
 
-        X1_block -= G_sum * xm
-        X1_block /= xs
-        
         Q = np.zeros((XX_block.shape[0] + 1,
                       XX_block.shape[1] + 1))
         Q[1:,1:] = XX_block
@@ -160,7 +185,7 @@ def quadratic_form(self,
         else:
             Q[0,1:] = X1_block
         Q[0,0] = G_sum
-        
+
         return Q
 
 @add_dataclass_docstring
diff --git a/tests/test_design.py b/tests/test_design.py
index af6c895..98357f2 100644
--- a/tests/test_design.py
+++ b/tests/test_design.py
@@ -86,55 +86,59 @@ def test_design(X, weights, standardize, intercept):
     assert np.allclose(design.T @ V_l, design_s.T @ V_l)
     
 
-# @pytest.mark.parametrize('X', [rng.standard_normal((n, p)),
-#                                scipy.sparse.csc_array(rng.standard_normal((n, p)))] )
-# @pytest.mark.parametrize('weights', [np.ones(n), rng.uniform(1, 2, size=(n,))])
-# @pytest.mark.parametrize('standardize', [True, False])
-# @pytest.mark.parametrize('intercept', [True, False])
-# @pytest.mark.parametrize('gls', [None,
-#                                  rng.uniform(1, 2, size=(n,)),
-#                                  rng.uniform(1, 2, size=(n,n))])
-# def test_quadratic_form(X, weights, standardize, intercept, gls):
-#     """
-#     test the linear / adjoint maps of Design from an np.ndarray and a scipy.sparse.csc_array
-#     """
+@pytest.mark.parametrize('X', [rng.standard_normal((n, p)),
+                               scipy.sparse.csc_array(rng.standard_normal((n, p)))] )
+@pytest.mark.parametrize('weights', [np.ones(n), rng.uniform(1, 2, size=(n,))])
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+@pytest.mark.parametrize('gls', [None,
+                                 np.diag(rng.uniform(1, 2, size=(n,))),
+                                 rng.uniform(1, 2, size=(n,n))])
+def test_quadratic_form(X, weights, standardize, intercept, gls):
+    """
+    test the linear / adjoint maps of Design from an np.ndarray and a scipy.sparse.csc_array
+    """
     
-#     if gls is not None and gls.ndim == 2:
-#         gls = (gls + gls.T) / 2
-#     X_s = scipy.sparse.csc_array(X)
-#     design = Design(X, W, standardize=stand)
-#     design_s = Design(X_s, W, standardize=stand)
-
-#     Q_s = design_s.quadratic_form(G=gls, columns=columns)
-#     Q = design.quadratic_form(G=gls, columns=columns)
-
-#     X_eff = scipy.sparse.csc_array(X).toarray()
-#     if stand:
-#         xm = (X_eff * W[:,None]).sum(0) / W.sum()
-#         x2 = (X_eff**2 * W[:,None]).sum(0) / W.sum()
-#         xs = np.sqrt(x2 - xm**2)
-#     else:
-#         xm = np.zeros(p)
-#         xs = np.ones(p)
-
-#     X_eff = X_eff / xs[None,:] - np.multiply.outer(np.ones(n), xm / xs)
-#     X_eff = np.concatenate([np.ones((n,1)), X_eff], axis=1)
-
-#     if columns is not None:
-#         columns = np.array(columns)
-#         columns += 1
-#         columns = np.hstack([[0], columns])
-
-#     if gls is None:
-#         Q_eff = X_eff.T @ X_eff
-#     elif gls.ndim == 1:
-#         Q_eff = X_eff.T @ (gls[:, None] * X_eff)
-#     else:
-#         Q_eff = X_eff.T @ gls @ X_eff
-#     if columns is not None:
-#         Q_eff = Q_eff[:, columns]
-#     assert np.allclose(Q, Q_eff)  # calculation is done correctly
-
-#     assert np.allclose(Q_s, Q) # sparse and dense agree
+    if gls is not None and gls.ndim == 2:
+        gls = (gls + gls.T) / 2
+
+    X_s = scipy.sparse.csc_array(X)
+    W = weights
+    design = Design(X, W, standardize=standardize)
+    design_s = Design(X_s, W, standardize=standardize)
+
+    columns = [0,1,3]
+    Q_s = design_s.quadratic_form(G=gls, columns=columns, transformed=standardize)
+    Q = design.quadratic_form(G=gls, columns=columns, transformed=standardize)
+
+    X_eff = scipy.sparse.csc_array(X).toarray()
+
+    if standardize:
+        xm = (X_eff * W[:,None]).sum(0) / W.sum()
+        x2 = (X_eff**2 * W[:,None]).sum(0) / W.sum()
+        xs = np.sqrt(x2 - xm**2)
+    else:
+        xm = np.zeros(p)
+        xs = np.ones(p)
+
+    X_eff = X_eff / xs[None,:] - np.multiply.outer(np.ones(n), xm / xs)
+    X_eff = np.concatenate([np.ones((n,1)), X_eff], axis=1)
+
+    if columns is not None:
+        columns = np.array(columns)
+        columns += 1
+        columns = np.hstack([[0], columns])
+
+    if gls is None:
+        Q_eff = X_eff.T @ X_eff
+    elif gls.ndim == 1:
+        Q_eff = X_eff.T @ (gls[:, None] * X_eff)
+    else:
+        Q_eff = X_eff.T @ gls @ X_eff
+    if columns is not None:
+        Q_eff = Q_eff[:, columns]
+    assert np.allclose(Q, Q_eff)  # calculation is done correctly
+
+    assert np.allclose(Q_s, Q) # sparse and dense agree
 
         

From c1eab7336f93821388dddfc448874c6b5da95ea1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 12 Apr 2024 15:07:28 -0700
Subject: [PATCH 25/44] data split inference working well (slightly better
 without standardizing)

---
 glmnet/cox.py           |   3 +-
 glmnet/glm.py           |   4 +-
 glmnet/glmnet.py        |   2 +-
 glmnet/inference.py     | 352 ++++++++++++----------------------------
 tests/test_inference.py | 143 ++++++++++++----
 5 files changed, 215 insertions(+), 289 deletions(-)

diff --git a/glmnet/cox.py b/glmnet/cox.py
index 10335dd..4256850 100644
--- a/glmnet/cox.py
+++ b/glmnet/cox.py
@@ -213,7 +213,8 @@ def _summarize(self,
         # IRLS used normalized weights,
         # this unnormalizes them...
 
-        unscaled_precision_ = self.design_.quadratic_form(self._information)
+        unscaled_precision_ = self.design_.quadratic_form(self._information,
+                                                          transformed=False)
         
         keep = np.ones(unscaled_precision_.shape[0]-1, bool)
         if exclude is not []:
diff --git a/glmnet/glm.py b/glmnet/glm.py
index 9992509..1d28c94 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -669,7 +669,6 @@ def fit(self,
                                                               weight,
                                                               X.shape)
 
-
         else:
             self.summary_ = self.covariance_ = None
             
@@ -684,7 +683,8 @@ def _summarize(self,
         # IRLS used normalized weights,
         # this unnormalizes them...
 
-        unscaled_precision_ = self.design_.quadratic_form(self._information)
+        unscaled_precision_ = self.design_.quadratic_form(self._information,
+                                                          transformed=False)
         
         keep = np.ones(unscaled_precision_.shape[0]-1, bool)
         if exclude is not []:
diff --git a/glmnet/glmnet.py b/glmnet/glmnet.py
index 7a2ed39..e51236a 100644
--- a/glmnet/glmnet.py
+++ b/glmnet/glmnet.py
@@ -475,7 +475,7 @@ def _get_initial_state(self,
                 intercept_ = 0
         return GLMState(coef_, intercept_), keep.astype(float)
 
-    def get_LM(self):
+    def get_GLM(self):
         return GLM(family=self.family,
                    fit_intercept=self.fit_intercept,
                    offset_id=self.offset_id,
diff --git a/glmnet/inference.py b/glmnet/inference.py
index df36cb7..0193c41 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -18,6 +18,8 @@
 from sklearn.utils.validation import check_is_fitted
 from sklearn.base import clone
 
+from .base import _get_design
+
 def fixed_lambda_estimator(glmnet_obj,
                            lambda_val):
     check_is_fitted(glmnet_obj, ["coefs_", "feature_names_in_"])
@@ -54,7 +56,7 @@ def lasso_inference(glmnet_obj,
                                                                   Y_sel)
     if weight_sel is None:
         weight_sel = np.ones(X_sel.shape[0])
-        
+
     fixed_lambda.fit(X_sel,
                      Y_sel,
                      sample_weight=weight_sel,
@@ -68,137 +70,51 @@ def lasso_inference(glmnet_obj,
 
     active_set = np.nonzero(FL.coef_ != 0)[0]
 
-    # find selection data covariance
+    # fit unpenalized model on selection data
 
-    unreg_sel_LM = glmnet_obj.get_LM()
-    unreg_sel_LM.summarize = True
-    unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
-    C_sel = unreg_sel_LM.covariance_
+    unreg_sel_GLM = glmnet_obj.get_GLM()
 
-    keep = np.zeros(FL.design_.shape[1], bool)
-    if hasattr(FL, 'penalty_factors'):
-        penfac = FL.penalty_factors[active_set]
-    else:
-        penfac = np.ones(active_set.shape[0])
-    signs = np.sign(FL.coef_[active_set])
+    unreg_sel_GLM.summarize = True
+    unreg_sel_GLM.fit(X_sel[:,active_set],
+                      Y_sel,
+                      sample_weight=weight_sel)
 
-    # we multiply by weight_sel.sum() due to this factor
-    # appearing in glmnet objective...
-    if FL.fit_intercept:
-        penfac = np.hstack([0, penfac])
-        signs = np.hstack([0, signs])
-        keep[0] = 1
-        keep[1 + active_set] = 1
-        stacked = np.hstack([FL.intercept_,
-                             FL.coef_[active_set]])
-        delta = weight_sel.sum() * lambda_val * penfac * signs
-    else:
-        # keep[active_set] = 1
-        keep[1 + active_set] = 1
-        stacked = FL.coef_[active_set]
-        delta = weight_sel.sum() * lambda_val * penfac * signs
+    # quadratic approximation up to scaling and a factor of weight_sel.sum()
 
-    D = np.column_stack([np.ones(X_sel.shape[0]), X_sel[:,active_set]])
-    other_mle = np.linalg.pinv(D) @ Y_sel
-    A_sel = C_sel / unreg_sel_LM.dispersion_
-
-    delta = A_sel @ delta
-    noisy_mle = stacked + delta
-
-    penalized = penfac > 0
-    sel_P = -np.diag(signs[penalized]) @ np.eye(A_sel.shape[0])[penalized]
-
-    # fit full model
-
-    Y_sel = np.asarray(Y_sel)
-
-    # X'(Y-X\hat{\beta}) (appropriately weighted)
-    subgrad1 = FL.design_.T @ FL.state_.logl_score(FL._family, Y_sel, sample_weight=weight_sel)
+    design_sel = _get_design(X_sel[:,active_set],
+                             weight_sel,
+                             standardize=glmnet_obj.standardize,
+                             intercept=glmnet_obj.fit_intercept)
+    scaling_sel = design_sel.scaling_
+    P_sel = design_sel.quadratic_form(unreg_sel_GLM._information,
+                                      transformed=True)
+    Q_sel = np.linalg.inv(P_sel)
+    if not FL.fit_intercept:
+        Q_sel = Q_sel[1:,1:]
+        
+    # fit unpenalized model on full data
 
     X_full, Y_full, weight_full = full_data
-
-    unreg_LM = glmnet_obj.get_LM()
-    unreg_LM.summarize = True
-    unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
-    C_full = unreg_LM.covariance_
-
-    # selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_sel).sum(), 0, 1)
-    
-    ## TODO: will this handle fit_intercept?
-    if FL.fit_intercept:
-        full_mle = np.hstack([unreg_LM.intercept_,
-                              unreg_LM.coef_])
-    else:
-        full_mle = unreg_LM.coef_
-
-    ## iterate over coordinates
-    Ls = np.zeros_like(noisy_mle)
-    Us = np.zeros_like(noisy_mle)
-    mles = np.zeros_like(noisy_mle)
-    pvals = np.zeros_like(noisy_mle)
-    for i in range(len(noisy_mle)):
-        e_i = np.zeros_like(noisy_mle)
-        e_i[i] = 1.
-        ## call selection_interval and return
-        L, U, mle, p = selection_interval(
-            support_directions=sel_P,
-            support_offsets=-signs[penalized] * delta[penalized], 
-            covariance_noisy=C_sel,
-            covariance_full=C_full,
-            noisy_observation=noisy_mle,
-            observation=full_mle,
-            direction_of_interest=e_i,
-            level=level,
-        )
-        Ls[i] = L
-        Us[i] = U
-        mles[i] = mle
-        pvals[i] = p
-    
-    idx = (active_set).tolist()
-    if FL.fit_intercept:
-        idx = [-1] + idx
-    return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
-
-def lasso_bootstrap_inference(glmnet_obj,
-                              lambda_val,
-                              selection_data, ## X = \Sigma^{-1/2}, Y = \Sigma^{-1/2}\hat\beta^{(b)}
-                              full_data,
-                              C_full, ## bootstrap variance
-                              alpha,
-                              level=.9):
-
-    fixed_lambda = fixed_lambda_estimator(glmnet_obj, lambda_val)
-    fixed_lambda.fit_intercept = False
-    gaussian = sm.families.Gaussian()
-    fixed_lambda.family = gaussian
-    X_sel, Y_sel, weight_sel = selection_data
-
-    if weight_sel is None:
-        weight_sel = np.ones(X_sel.shape[0])
+    if weight_full is None:
+        weight_full = np.ones(X_full.shape[0])
         
-    fixed_lambda.fit(X_sel, Y_sel, sample_weight=weight_sel)
-    FL = fixed_lambda # shorthand
-    
-    if (not np.all(FL.upper_limits == np.inf) or
-        not np.all(FL.lower_limits == -np.inf)):
-        raise NotImplementedError('upper/lower limits coming soon')
-
-    active_set = np.nonzero(FL.coef_ != 0)[0]
-
-    # find selection data covariance
-
-    # unreg_sel_LM = glmnet_obj.get_LM()
-    # unreg_sel_LM.summarize = True
-    # unreg_sel_LM.fit(X_sel[:,active_set], Y_sel, sample_weight=weight_sel)
-    # C_sel = unreg_sel_LM.covariance_
-    # C_full = np.eye(len(active_set))
+    unreg_GLM = glmnet_obj.get_GLM()
+    unreg_GLM.summarize = True
+    unreg_GLM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
+
+    # quadratic approximation
+
+    design_full = _get_design(X_full[:,active_set],
+                              weight_full,
+                              standardize=glmnet_obj.standardize,
+                              intercept=glmnet_obj.fit_intercept)
+    scaling_full = design_full.scaling_
+    P_full = design_full.quadratic_form(unreg_GLM._information,
+                                        transformed=True)
+    Q_full = np.linalg.inv(P_full)
+    if not FL.fit_intercept:
+        Q_full = Q_full[1:,1:]
 
-    state = FL.state_
-    information = FL._family.information(state,
-                                         weight_sel)
-    
-    keep = np.zeros(FL.design_.shape[1], bool)
     if hasattr(FL, 'penalty_factors'):
         penfac = FL.penalty_factors[active_set]
     else:
@@ -207,76 +123,77 @@ def lasso_bootstrap_inference(glmnet_obj,
 
     # we multiply by weight_sel.sum() due to this factor
     # appearing in glmnet objective...
+
     if FL.fit_intercept:
+        # correct the scaling
         penfac = np.hstack([0, penfac])
         signs = np.hstack([0, signs])
-        keep[0] = 1
-        keep[1 + active_set] = 1
-        stacked = np.hstack([state.intercept,
-                             state.coef[active_set]])
-        delta = np.zeros(keep.sum())
-        delta[1:] = weight_sel.sum() * lambda_val * penfac[1:] * signs[1:]
+        stacked = np.hstack([FL.state_.intercept,
+                             FL.state_.coef[active_set]])
     else:
-        keep[1 + active_set] = 1
-        stacked = state.coef[active_set]
-        delta = weight_sel.sum() * lambda_val * penfac * signs
+        stacked = FL.state_.coef[active_set]
 
-    C_full_ = C_full.copy()
-    C_full = C_full[keep[1:]][:,keep[1:]]
-    C_sel = (1 + alpha) * C_full
+    delta = lambda_val * penfac * signs
 
-    delta = C_sel @ delta
-    noisy_mle = stacked + delta
+    # remember loss of glmnet is normalized by sum of weights
+    # when taking newton step adjust by weight_sel.sum()
+    
+    delta = Q_sel @ delta * weight_sel.sum() 
+    noisy_mle = stacked + delta # unitless scale
+
+    transform_to_scaled = np.diag(np.hstack([1, 1/scaling_sel]))
+    transform_to_scaled[0,1:] = -design_sel.centers_/scaling_sel
+    DEVEL = False
+    if DEVEL: # compare to coefs and intercept on scales with units
+        scaled_mle = transform_to_scaled @ noisy_mle
+        #scaled_mle[1:] /= scaling_sel
+        #scaled_mle[0] = scaled_mle[0] - (scaled_mle[1:] * design_sel.centers_).sum()
+        assert( np.allclose(scaled_mle, np.hstack([unreg_sel_GLM.intercept_, unreg_sel_GLM.coef_])))
 
     penalized = penfac > 0
-    sel_P = -np.diag(signs[penalized]) @ np.eye(C_sel.shape[0])[penalized]
-
-    # fit full model
-
-    X_full, Y_full, weight_full = full_data
-
-    unreg_LM = glmnet_obj.get_LM()
-    unreg_LM.summarize = True
-    unreg_LM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
-    # C_full = unreg_LM.covariance_
+    sel_P = -np.diag(signs[penalized]) @ np.eye(Q_sel.shape[0])[penalized]
+    assert (np.all(sel_P @ noisy_mle < -signs[penalized] * delta[penalized]))
 
-    # selection_proportion = np.clip(np.diag(C_full).sum() / np.diag(C_sel).sum(), 0, 1)
-    
     ## TODO: will this handle fit_intercept?
     if FL.fit_intercept:
-        full_mle = np.hstack([unreg_LM.intercept_,
-                              unreg_LM.coef_])
+        full_mle = np.hstack([unreg_GLM.state_.intercept,
+                              unreg_GLM.state_.coef])
     else:
-        full_mle = unreg_LM.coef_
+        full_mle = unreg_GLM.state_.coef
 
     ## iterate over coordinates
     Ls = np.zeros_like(noisy_mle)
     Us = np.zeros_like(noisy_mle)
     mles = np.zeros_like(noisy_mle)
     pvals = np.zeros_like(noisy_mle)
-    for i in range(len(noisy_mle)):
-        e_i = np.zeros_like(noisy_mle)
-        e_i[i] = 1.
-        # e_i = np.linalg.pinv(X_sel) @ e_i
+
+    if FL.fit_intercept:
+        transform_to_scaled = np.diag(np.hstack([1, 1/scaling_full]))
+        transform_to_scaled[0,1:] = -design_full.centers_/scaling_full
+    else:
+        transform_to_scaled = np.diag(1/scaling_full)
+
+    for i in range(transform_to_scaled.shape[0]):
         ## call selection_interval and return
         L, U, mle, p = selection_interval(
             support_directions=sel_P,
             support_offsets=-signs[penalized] * delta[penalized], 
-            covariance_noisy=C_sel,
-            covariance_full=C_full,
+            Q_noisy=Q_sel,
+            Q_full=Q_full,
             noisy_observation=noisy_mle,
             observation=full_mle,
-            direction_of_interest=e_i,
+            direction_of_interest=transform_to_scaled[i],
             level=level,
+            dispersion=unreg_GLM.dispersion_
         )
         Ls[i] = L
         Us[i] = U
         mles[i] = mle
         pvals[i] = p
-
+    
     idx = (active_set).tolist()
     if FL.fit_intercept:
-        idx = [-1] + idx
+        idx = ['intercept'] + idx
     return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
 
 class constraints(object):
@@ -407,71 +324,6 @@ def value(self, Y):
         """
         return (self.linear_part.dot(Y) - self.offset).max()
 
-    def conditional(self,
-                    linear_part,
-                    value,
-                    rank=None):
-        """
-        Return an equivalent constraint 
-        after having conditioned on a linear equality.
-        
-        Let the inequality constraints be specified by
-        `(A,b)` and the equality constraints be specified
-        by `(C,d)`. We form equivalent inequality constraints by 
-        considering the residual
-
-        .. math::
-           
-           AY - E(AY|CY=d)
-
-        Parameters
-        ----------
-
-        linear_part : np.float((k,q))
-             Linear part of equality constraint, `C` above.
-
-        value : np.float(k)
-             Value of equality constraint, `b` above.
-
-        rank : int
-            If not None, this should specify
-            the rank of `linear_part`. Defaults
-            to `min(k,q)`.
-
-        Returns
-        -------
-
-        conditional_con : `constraints`
-             Affine constraints having applied equality constraint.
-
-        """
-
-        S = self.covariance
-        C, d = linear_part, value
-
-        M1 = S.dot(C.T)
-        M2 = C.dot(M1)
-
-        if M2.shape:
-            M2i = np.linalg.pinv(M2)
-            delta_cov = M1.dot(M2i.dot(M1.T))
-            delta_mean = M1.dot(M2i.dot(C.dot(self.mean) - d))
-        else:
-            delta_cov = np.multiply.outer(M1, M1) / M2
-            delta_mean = M1 * (C.dot(self.mean) - d) / M2
-
-        if rank is None:
-            if len(linear_part.shape) == 2:
-                rank = min(linear_part.shape)
-            else:
-                rank = 1
-
-        return constraints(self.linear_part,
-                           self.offset,
-                           covariance=self.covariance - delta_cov,
-                           mean=self.mean - delta_mean,
-                           rank=self.rank - rank)
-
     def bounds(self, direction_of_interest, Y):
         r"""
         For a realization $Y$ of the random variable $N(\mu,\Sigma)$
@@ -803,9 +655,9 @@ def interval_constraints(support_directions,
                      direction_of_interest)
 
     U = A.dot(X) - b
-    #print(U, np.fabs(U).max()* tol, b, '?')
 
     if not np.all(U  < tol * np.fabs(U).max()):
+        stop
         warn('constraints not satisfied: %s' % repr(U))
 
     Sw = S.dot(w)
@@ -836,13 +688,14 @@ def interval_constraints(support_directions,
 
 def selection_interval(support_directions, 
                        support_offsets,
-                       covariance_noisy,
-                       covariance_full,
+                       Q_noisy,
+                       Q_full,
                        noisy_observation,
                        observation,
                        direction_of_interest,
                        tol = 1.e-4,
                        level = 0.90,
+                       dispersion=1,
                        UMAU=True):
     """
     Given an affine in cone constraint $\{z:Az+b \leq 0\}$ (elementwise)
@@ -890,23 +743,24 @@ def selection_interval(support_directions,
      V,
      upper_bound,
      _) = interval_constraints(support_directions, 
-                                support_offsets,
-                                covariance_full,
-                                noisy_observation,
-                                direction_of_interest,
-                                tol=tol)
+                               support_offsets,
+                               Q_full,
+                               noisy_observation,
+                               direction_of_interest,
+                               tol=tol)
+
     ## lars path lockhart tibs^2 taylor paper
-    sigma = np.sqrt(direction_of_interest.T @ covariance_full @ direction_of_interest)
+    sigma = np.sqrt(direction_of_interest.T @ Q_full @ direction_of_interest * dispersion)
     ## sqrt(alpha) is just sigma_noisy - sigma_full
-    smoothing_sigma = np.sqrt(max(direction_of_interest.T @ covariance_noisy @ direction_of_interest - sigma**2, 0))
+    noisy_sigma = np.sqrt(direction_of_interest.T @ Q_noisy @ direction_of_interest * dispersion)
+    smoothing_sigma = np.sqrt(max(noisy_sigma**2 - sigma**2, 0))
+    
     estimate = (direction_of_interest * observation).sum()
     noisy_estimate = (direction_of_interest * noisy_observation).sum()
 
-    #print(V, estimate, noisy_estimate, 'huh')
-    
     if smoothing_sigma > 1e-6 * sigma:
 
-        grid = np.linspace(estimate - 20 * sigma, estimate + 20 * sigma, 801)
+        grid = np.linspace(estimate - 10 * sigma, estimate + 10 * sigma, 2001)
         weight = (
             normal_dbn.cdf(
                 (upper_bound - grid) / (smoothing_sigma)
@@ -914,24 +768,23 @@ def selection_interval(support_directions,
                 (lower_bound - grid) / (smoothing_sigma)
             )
         )
-        weight *= normal_dbn.pdf(grid / sigma)
+        weight *= normal_dbn.pdf((grid - estimate) / sigma)
 
         sel_distr = discrete_family(grid, weight)
         L, U = sel_distr.equal_tailed_interval(estimate,
-                                                alpha=1-level)
+                                               alpha=1-level)
         mle, _, _ = sel_distr.MLE(estimate)
-        mle *= sigma**2
-        pval = sel_distr.cdf(0, estimate)
+        mle *= sigma**2; mle += estimate
+        pval = sel_distr.cdf(-estimate / sigma**2, estimate)
         pval = 2 * min(pval, 1-pval)
-        L *= sigma**2
-        U *= sigma**2
+        L *= sigma**2; L += estimate
+        U *= sigma**2; U += estimate
     else:
 
         lb = estimate - 20 * sigma
         ub = estimate + 20 * sigma
 
-        if estimate < lower_bound or estimate > upper_bound:
-            #print(estimate, lower_bound, upper_bound)
+        if noisy_estimate < lower_bound or noisy_estimate > upper_bound:
             warn('Constraints not satisfied: returning [-np.inf, np.inf]')
             return -np.inf, np.inf, np.nan, np.nan
         
@@ -972,7 +825,6 @@ def F(theta):
             U = np.inf
 
         mle = np.nan
-        
 
     return L, U, mle, pval
 
@@ -1072,9 +924,11 @@ def __init__(self, sufficient_stat, weights, theta=0.):
         The weights are normalized to sum to 1.
         """
         xw = np.array(sorted(zip(sufficient_stat, weights)), float)
+        xw[:,1] = np.maximum(xw[:,1], 1e-40)
         self._x = xw[:,0]
         self._w = xw[:,1]
         self._lw = np.log(xw[:,1])
+        
         self._w /= self._w.sum() # make sure they are a pmf
         self.n = len(xw)
         self._theta = np.nan
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 95f4a19..a37851c 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -10,49 +10,120 @@
 @pytest.mark.parametrize('n', [500])
 @pytest.mark.parametrize('p', [103])
 @pytest.mark.parametrize('fit_intercept', [True, False])
-def test_inference(n, p, fit_intercept):
-    run_inference(n, p, fit_intercept)
+@pytest.mark.parametrize('standardize', [True, False])
+def test_inference(n,
+                   p,
+                   fit_intercept,
+                   standardize,
+                   ntrial=10):
+    # run a few times to be sure KKT conditions not violated
 
-def run_inference(n, p, fit_intercept):
+    for _ in range(ntrial):
+        run_inference(n,
+                      p,
+                      fit_intercept,
+                      standardize)
 
-    rng = np.random.default_rng(0)
+def run_inference(n,
+                  p,
+                  fit_intercept,
+                  standardize,
+                  rng=None,
+                  alt=False,
+                  s=3,
+                  prop=0.8):
+
+    if rng is None:
+        rng = np.random.default_rng(0)
     X = rng.standard_normal((n, p))
-    Y = np.random.standard_normal(n) * 2
+    D = np.linspace(1, p, p) / p + 1
+    X *= D[None,:]
+    Y = rng.standard_normal(n) * 2
+    beta = np.zeros(p)
+    if alt:
+        beta[:s] = rng.standard_normal(s)
+    Y += X @ beta
     Df = pd.DataFrame({'response':Y})
 
     fam = sm.families.Gaussian()
     GN = GLMNet(response_id='response',
                 family=fam,
-                fit_intercept=fit_intercept)
+                fit_intercept=fit_intercept,
+                standardize=standardize)
     GN.fit(X, Df)
-    return lasso_inference(GN, 
-                           GN.lambda_values_[10],
-                           (X[:50], Df.iloc[:50], None),
-                           (X, Df, None))
-
-# ncover = 0
-# nsel = 0
-# niter = 1000
-# for i in range(niter):
-#     GN = GLMNet(response_id='response',
-#                 family=fam,
-#                 control=GNcontrol,
-#                 fit_intercept=False,
-#              )
-    
-#     X = rng.standard_normal((n, p))
-#     Y = rng.standard_normal(n) * 1
-#     Df = pd.DataFrame({'response':Y})
-#     GN.fit(X, Df)
-#     sel_slice = slice(0, 4*n//5)
-#     res = lasso_inference(GN, 
-#                           GN.lambda_values_[10],
-#                           (X[sel_slice], Df.iloc[sel_slice], None),
-#                           (X, Df, None),
-#                           level=0.8)
-
-#     ncover += ((res['lower'] < 0) & (res['upper'] > 0)).sum()
-#     nsel += res.shape[0]
-
-# ncover / nsel
+    m = int(prop*n)
+
+    df = lasso_inference(GN, 
+                         GN.lambda_values_[10],
+                         (X[:m], Df.iloc[:m], None),
+                         (X, Df, None))
+    if fit_intercept:
+        active_set = np.array(df.index[1:]).astype(int)
+    else:
+        active_set = np.array(df.index).astype(int)
+    X_sel = X[:,active_set]
+
+    if fit_intercept:
+        X_sel = np.column_stack([np.ones(X_sel.shape[0]), X_sel])
+    targets = np.linalg.pinv(X_sel) @ X @ beta
+
+    df['target'] = targets
+    return df
+
+
+def main_null(fit_intercept=True,
+              standardize=True,
+              n=2000,
+              p=50,
+              ntrial=500,
+              rng=None):
+
+    ncover = 0
+    nsel = 0
+
+    dfs = []
+
+    rng = np.random.default_rng(0)
+    for i in range(ntrial):
+        df = run_inference(n,
+                           p,
+                           fit_intercept,
+                           standardize,
+                           rng=rng)
+        dfs.append(df)
+        all_df = pd.concat(dfs)
+        ncover += ((all_df['lower'] < 0) & (all_df['upper'] > 0)).sum()
+        nsel += all_df.shape[0]
+
+        print('cover:', ncover / nsel, 'typeI:', (all_df['pval'] < 0.05).mean())
+
+
+def main_alt(fit_intercept=True,
+              standardize=True,
+              n=2000,
+              p=50,
+              ntrial=500,
+              rng=None):
+
+    ncover = 0
+    nsel = 0
+
+    dfs = []
+
+    rng = np.random.default_rng(0)
+    for i in range(ntrial):
+        df = run_inference(n,
+                           p,
+                           fit_intercept,
+                           standardize,
+                           rng=rng,
+                           alt=True)
+        dfs.append(df)
+        all_df = pd.concat(dfs)
+        ncover += ((all_df['lower'] < all_df['target']) & (all_df['upper'] > all_df['target'])).sum()
+        nsel += all_df.shape[0]
+
+        print('cover:', ncover / nsel)
+    return all_df
 
+        

From f1ef8ca7efb050c06c9c07905682e6b45836ff9f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 12 Apr 2024 15:24:04 -0700
Subject: [PATCH 26/44] getting rid of some unused code

---
 glmnet/inference.py     | 399 +---------------------------------------
 tests/test_inference.py |   2 +-
 2 files changed, 4 insertions(+), 397 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 0193c41..36ee070 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -196,400 +196,6 @@ def lasso_inference(glmnet_obj,
         idx = ['intercept'] + idx
     return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
 
-class constraints(object):
-
-    r"""
-    This class is the core object for affine selection procedures.
-    It is meant to describe sets of the form $C$
-    where
-
-    .. math::
-
-       C = \left\{z: Az\leq b \right \}
-
-    Its main purpose is to consider slices through $C$
-    and the conditional distribution of a Gaussian $N(\mu,\Sigma)$
-    restricted to such slices.
-
-    Notes
-    -----
-
-    In this parameterization, the parameter `self.mean` corresponds
-    to the *reference measure* that is being truncated. It is not the
-    mean of the truncated Gaussian.
-
-    >>> positive = constraints(-np.identity(2), np.zeros(2))
-    >>> Y = np.array([3, 4.4])
-    >>> eta = np.array([1, 1], np.float)
-    >>> list(positive.interval(eta, Y))  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-    [4.62...,  10.17...]
-    >>> positive.pivot(eta, Y) # doctest: +ELLIPSIS 
-    5.187...-07
-    >>> list(positive.bounds(eta, Y)) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-    [1.399..., 7.4..., inf, 1.414...]  
-    >>> 
-
-    """
-
-    def __init__(self, 
-                 linear_part,
-                 offset,
-                 covariance=None,
-                 mean=None,
-                 rank=None):
-        r"""
-        Create a new inequality. 
-
-        Parameters
-        ----------
-
-        linear_part : np.float((q,p))
-            The linear part, $A$ of the affine constraint
-            $\{z:Az \leq b\}$. 
-
-        offset: np.float(q)
-            The offset part, $b$ of the affine constraint
-            $\{z:Az \leq b\}$. 
-
-        covariance : np.float((p,p))
-            Covariance matrix of Gaussian distribution to be 
-            truncated. Defaults to `np.identity(self.dim)`.
-
-        mean : np.float(p)
-            Mean vector of Gaussian distribution to be 
-            truncated. Defaults to `np.zeros(self.dim)`.
-
-        rank : int
-            If not None, this should specify
-            the rank of the covariance matrix. Defaults
-            to self.dim.
-
-        """
-
-        self.linear_part, self.offset = \
-            linear_part, np.asarray(offset)
-        
-        if self.linear_part.ndim == 2:
-            self.dim = self.linear_part.shape[1]
-        else:
-            self.dim = self.linear_part.shape[0]
-
-        if rank is None:
-            self.rank = self.dim
-        else:
-            self.rank = rank
-
-        if covariance is None:
-            covariance = np.identity(self.dim)
-        self.covariance = covariance
-
-        if mean is None:
-            mean = np.zeros(self.dim)
-        self.mean = mean
-
-    def __copy__(self):
-        r"""
-        A copy of the constraints.
-
-        Also copies _sqrt_cov, _sqrt_inv if attributes are present.
-        """
-        con = constraints(copy(self.linear_part),
-                          copy(self.offset),
-                          mean=copy(self.mean),
-                          covariance=copy(self.covariance),
-                          rank=self.rank)
-        if hasattr(self, "_sqrt_cov"):
-            con._sqrt_cov = self._sqrt_cov.copy()
-            con._sqrt_inv = self._sqrt_inv.copy()
-            con._rowspace = self._rowspace.copy()
-        return con
-
-    def __call__(self, Y, tol=1.e-3):
-        r"""
-        Check whether Y satisfies the linear
-        inequality constraints.
-        >>> A = np.array([[1., -1.], [1., -1.]])
-        >>> B = np.array([1., 1.])
-        >>> con = constraints(A, B)
-        >>> Y = np.array([-1., 1.])
-        >>> con(Y)
-        True
-        """
-        V1 = self.value(Y)
-        return np.all(V1 < tol * np.fabs(V1).max(0))
-
-    def value(self, Y):
-        r"""
-        Compute $\max(Ay-b)$.
-        """
-        return (self.linear_part.dot(Y) - self.offset).max()
-
-    def bounds(self, direction_of_interest, Y):
-        r"""
-        For a realization $Y$ of the random variable $N(\mu,\Sigma)$
-        truncated to $C$ specified by `self.constraints` compute
-        the slice of the inequality constraints in a 
-        given direction $\eta$.
-
-        Parameters
-        ----------
-
-        direction_of_interest: np.float
-            A direction $\eta$ for which we may want to form 
-            selection intervals or a test.
-
-        Y : np.float
-            A realization of $N(\mu,\Sigma)$ where 
-            $\Sigma$ is `self.covariance`.
-
-        Returns
-        -------
-
-        L : np.float
-            Lower truncation bound.
-
-        Z : np.float
-            The observed $\eta^TY$
-
-        U : np.float
-            Upper truncation bound.
-
-        S : np.float
-            Standard deviation of $\eta^TY$.
-
-        
-        """
-        return interval_constraints(self.linear_part,
-                                    self.offset,
-                                    self.covariance,
-                                    Y,
-                                    direction_of_interest)
-
-    def pivot(self, 
-              direction_of_interest, 
-              Y,
-              null_value=None,
-              alternative='greater'):
-        r"""
-        For a realization $Y$ of the random variable $N(\mu,\Sigma)$
-        truncated to $C$ specified by `self.constraints` compute
-        the slice of the inequality constraints in a 
-        given direction $\eta$ and test whether 
-        $\eta^T\mu$ is greater then 0, less than 0 or equal to 0.
-
-        Parameters
-        ----------
-
-        direction_of_interest: np.float
-            A direction $\eta$ for which we may want to form 
-            selection intervals or a test.
-
-        Y : np.float
-            A realization of $N(0,\Sigma)$ where 
-            $\Sigma$ is `self.covariance`.
-
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-
-        Returns
-        -------
-
-        P : np.float
-            $p$-value of corresponding test.
-
-        Notes
-        -----
-
-        All of the tests are based on the exact pivot $F$ given
-        by the truncated Gaussian distribution for the
-        given direction $\eta$. If the alternative is 'greater'
-        then we return $1-F$; if it is 'less' we return $F$
-        and if it is 'twosided' we return $2 \min(F,1-F)$.
-
-        """
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-        L, Z, U, S = self.bounds(direction_of_interest, Y)
-
-        if null_value is None:
-            meanZ = (direction_of_interest * self.mean).sum()
-        else:
-            meanZ = null_value
-
-        P = truncnorm_cdf((Z-meanZ)/S, (L-meanZ)/S, (U-meanZ)/S)
-
-        if alternative == 'greater':
-            return 1 - P
-        elif alternative == 'less':
-            return P
-        else:
-            return max(2 * min(P, 1-P), 0)
-
-    def interval(self, direction_of_interest, Y,
-                 alpha=0.05, UMAU=False):
-        r"""
-        For a realization $Y$ of the random variable $N(\mu,\Sigma)$
-        truncated to $C$ specified by `self.constraints` compute
-        the slice of the inequality constraints in a 
-        given direction $\eta$ and test whether 
-        $\eta^T\mu$ is greater then 0, less than 0 or equal to 0.
-        
-        Parameters
-        ----------
-
-        direction_of_interest: np.float
-
-            A direction $\eta$ for which we may want to form 
-            selection intervals or a test.
-
-        Y : np.float
-
-            A realization of $N(0,\Sigma)$ where 
-            $\Sigma$ is `self.covariance`.
-
-        alpha : float
-
-            What level of confidence?
-
-        UMAU : bool
-
-            Use the UMAU intervals?
-
-        Returns
-        -------
-
-        [L,U] : selection interval
-
-        
-        """
-        ## THE DOCUMENTATION IS NOT GOOD ! HAS TO BE CHANGED !
-
-        return selection_interval( \
-            self.linear_part,
-            self.offset,
-            self.covariance,
-            Y,
-            direction_of_interest,
-            level=1. - alpha,
-            UMAU=UMAU)
-
-    def covariance_factors(self, force=True):
-        """
-        Factor `self.covariance`,
-        finding a possibly non-square square-root.
-
-        Parameters
-        ----------
-
-        force : bool
-            If True, force a recomputation of
-            the covariance. If not, assumes that
-            covariance has not changed.
-
-        """
-        if not hasattr(self, "_sqrt_cov") or force:
-
-            # original matrix is np.dot(U, (D**2 * U).T)
-
-            U, D = np.linalg.svd(self.covariance)[:2]
-            D = np.sqrt(D[:self.rank])
-            U = U[:,:self.rank]
-        
-            self._sqrt_cov = U * D[None,:]
-            self._sqrt_inv = (U / D[None,:]).T
-            self._rowspace = U
-
-        return self._sqrt_cov, self._sqrt_inv, self._rowspace
-
-    def whiten(self):
-        """
-
-        Return a whitened version of constraints in a different
-        basis, and a change of basis matrix.
-
-        If `self.covariance` is rank deficient, the change-of
-        basis matrix will not be square.
-
-        Returns
-        -------
-
-        inverse_map : callable
-
-        forward_map : callable
-
-        white_con : `constraints`
-
-        """
-        sqrt_cov, sqrt_inv = self.covariance_factors()[:2]
-
-        new_A = self.linear_part.dot(sqrt_cov)
-        den = np.sqrt((new_A**2).sum(1))
-        new_b = self.offset - self.linear_part.dot(self.mean)
-        new_con = constraints(new_A / den[:,None], new_b / den)
-
-        mu = self.mean.copy()
-
-        def inverse_map(Z): 
-            if Z.ndim == 2:
-                return sqrt_cov.dot(Z) + mu[:,None]
-            else:
-                return sqrt_cov.dot(Z) + mu
-
-        forward_map = lambda W: sqrt_inv.dot(W - mu)
-
-        return inverse_map, forward_map, new_con
-
-    def project_rowspace(self, direction):
-        """
-        Project a vector onto rowspace
-        of the covariance.
-        """
-        rowspace = self.covariance_factors()[-1]
-        return rowspace.dot(rowspace.T.dot(direction))
-
-    def solve(self, direction):
-        """
-        Compute the inverse of the covariance
-        times a direction vector.
-        """
-        sqrt_inv = self.covariance_factors()[1]
-        return sqrt_inv.T.dot(sqrt_inv.dot(direction))
-
-
-
-
-
-def stack(*cons):
-    """
-    Combine constraints into a large constaint
-    by intersection. 
-
-    Parameters
-    ----------
-
-    cons : [`selection.affine.constraints`_]
-         A sequence of constraints.
-
-    Returns
-    -------
-
-    intersection : `selection.affine.constraints`_
-
-    Notes
-    -----
-
-    Resulting constraint will have mean 0 and covariance $I$.
-
-    """
-    ineq, ineq_off = [], []
-    for con in cons:
-        ineq.append(con.linear_part)
-        ineq_off.append(con.offset)
-
-    intersection = constraints(np.vstack(ineq), 
-                               np.hstack(ineq_off))
-    return intersection
 
 def interval_constraints(support_directions, 
                          support_offsets,
@@ -780,11 +386,12 @@ def selection_interval(support_directions,
         L *= sigma**2; L += estimate
         U *= sigma**2; U += estimate
     else:
-
+        warnings.warn('assuming data for selection is same as for inference -- using hard selection')
+        
         lb = estimate - 20 * sigma
         ub = estimate + 20 * sigma
 
-        if noisy_estimate < lower_bound or noisy_estimate > upper_bound:
+        if estimate < lower_bound or estimate > upper_bound:
             warn('Constraints not satisfied: returning [-np.inf, np.inf]')
             return -np.inf, np.inf, np.nan, np.nan
         
diff --git a/tests/test_inference.py b/tests/test_inference.py
index a37851c..1a6b768 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -41,7 +41,7 @@ def run_inference(n,
     Y = rng.standard_normal(n) * 2
     beta = np.zeros(p)
     if alt:
-        beta[:s] = rng.standard_normal(s)
+        beta[rng.choice(p, s, replace=False)] = rng.standard_normal(s) / 2
     Y += X @ beta
     Df = pd.DataFrame({'response':Y})
 

From 8d771119ec5f524a4ce4b7c4f2bb09825b3b4e76 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 12 Apr 2024 16:44:33 -0700
Subject: [PATCH 27/44] BF: GLM coefs are on original scale, need to transform
 to unitless for comparison to selection estimates

---
 glmnet/inference.py     | 35 ++++++++++++--------------------
 tests/test_inference.py | 45 +++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 36ee070..a1b11ad 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -121,9 +121,6 @@ def lasso_inference(glmnet_obj,
         penfac = np.ones(active_set.shape[0])
     signs = np.sign(FL.coef_[active_set])
 
-    # we multiply by weight_sel.sum() due to this factor
-    # appearing in glmnet objective...
-
     if FL.fit_intercept:
         # correct the scaling
         penfac = np.hstack([0, penfac])
@@ -141,25 +138,19 @@ def lasso_inference(glmnet_obj,
     delta = Q_sel @ delta * weight_sel.sum() 
     noisy_mle = stacked + delta # unitless scale
 
-    transform_to_scaled = np.diag(np.hstack([1, 1/scaling_sel]))
-    transform_to_scaled[0,1:] = -design_sel.centers_/scaling_sel
-    DEVEL = False
-    if DEVEL: # compare to coefs and intercept on scales with units
-        scaled_mle = transform_to_scaled @ noisy_mle
-        #scaled_mle[1:] /= scaling_sel
-        #scaled_mle[0] = scaled_mle[0] - (scaled_mle[1:] * design_sel.centers_).sum()
-        assert( np.allclose(scaled_mle, np.hstack([unreg_sel_GLM.intercept_, unreg_sel_GLM.coef_])))
-
     penalized = penfac > 0
     sel_P = -np.diag(signs[penalized]) @ np.eye(Q_sel.shape[0])[penalized]
     assert (np.all(sel_P @ noisy_mle < -signs[penalized] * delta[penalized]))
 
-    ## TODO: will this handle fit_intercept?
+    ## the GLM's coef and intercept are on the original scale
+    ## we transform them here to the (typically) unitless "standardized" scale
     if FL.fit_intercept:
-        full_mle = np.hstack([unreg_GLM.state_.intercept,
-                              unreg_GLM.state_.coef])
+        intercept = unreg_GLM.state_.intercept + (unreg_GLM.state_.coef * design_full.centers_).sum()
+        coef = unreg_GLM.state_.coef * scaling_full
+        full_mle = np.hstack([intercept,
+                              coef])
     else:
-        full_mle = unreg_GLM.state_.coef
+        full_mle = unreg_GLM.state_.coef * scaling_full
 
     ## iterate over coordinates
     Ls = np.zeros_like(noisy_mle)
@@ -168,12 +159,12 @@ def lasso_inference(glmnet_obj,
     pvals = np.zeros_like(noisy_mle)
 
     if FL.fit_intercept:
-        transform_to_scaled = np.diag(np.hstack([1, 1/scaling_full]))
-        transform_to_scaled[0,1:] = -design_full.centers_/scaling_full
+        transform_to_original = np.diag(np.hstack([1, 1/scaling_full]))
+        transform_to_original[0,1:] = -design_full.centers_/scaling_full
     else:
-        transform_to_scaled = np.diag(1/scaling_full)
+        transform_to_original = np.diag(1/scaling_full)
 
-    for i in range(transform_to_scaled.shape[0]):
+    for i in range(transform_to_original.shape[0]):
         ## call selection_interval and return
         L, U, mle, p = selection_interval(
             support_directions=sel_P,
@@ -182,7 +173,7 @@ def lasso_inference(glmnet_obj,
             Q_full=Q_full,
             noisy_observation=noisy_mle,
             observation=full_mle,
-            direction_of_interest=transform_to_scaled[i],
+            direction_of_interest=transform_to_original[i],
             level=level,
             dispersion=unreg_GLM.dispersion_
         )
@@ -350,7 +341,7 @@ def selection_interval(support_directions,
      upper_bound,
      _) = interval_constraints(support_directions, 
                                support_offsets,
-                               Q_full,
+                               Q_noisy,
                                noisy_observation,
                                direction_of_interest,
                                tol=tol)
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 1a6b768..1ed06b1 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -31,25 +31,36 @@ def run_inference(n,
                   rng=None,
                   alt=False,
                   s=3,
-                  prop=0.8):
+                  prop=0.8,
+                  family='gaussian'):
 
     if rng is None:
         rng = np.random.default_rng(0)
     X = rng.standard_normal((n, p))
-    D = np.linspace(1, p, p) / p + 1
+    D = np.linspace(1, p, p) / p + 0.2
     X *= D[None,:]
     Y = rng.standard_normal(n) * 2
     beta = np.zeros(p)
     if alt:
-        beta[rng.choice(p, s, replace=False)] = rng.standard_normal(s) / 2
-    Y += X @ beta
-    Df = pd.DataFrame({'response':Y})
-
-    fam = sm.families.Gaussian()
+        subs = rng.choice(p, s, replace=False)
+        beta[subs] = rng.standard_normal(s) 
+    mu = X @ beta
+    Y += mu
+
+    if family == 'gaussian':
+        fam = sm.families.Gaussian()
+    elif family == 'probit':
+        fam = sm.families.Binomial(link=sm.families.links.Probit())
+        Y = (mu + rng.standard_normal(n)) > 0
+    else:
+        raise ValueError('only testing "gaussian" and "probit"')
+    
     GN = GLMNet(response_id='response',
                 family=fam,
                 fit_intercept=fit_intercept,
                 standardize=standardize)
+    Df = pd.DataFrame({'response':Y})
+
     GN.fit(X, Df)
     m = int(prop*n)
 
@@ -65,9 +76,13 @@ def run_inference(n,
 
     if fit_intercept:
         X_sel = np.column_stack([np.ones(X_sel.shape[0]), X_sel])
-    targets = np.linalg.pinv(X_sel) @ X @ beta
+    targets = np.linalg.pinv(X_sel) @ mu
 
     df['target'] = targets
+    if alt:
+        if family == 'probit' and not set(subs).issubset(active_set):
+            df['target'] *= np.nan
+            
     return df
 
 
@@ -76,7 +91,8 @@ def main_null(fit_intercept=True,
               n=2000,
               p=50,
               ntrial=500,
-              rng=None):
+              rng=None,
+              family='gaussian'):
 
     ncover = 0
     nsel = 0
@@ -89,7 +105,8 @@ def main_null(fit_intercept=True,
                            p,
                            fit_intercept,
                            standardize,
-                           rng=rng)
+                           rng=rng,
+                           family=family)
         dfs.append(df)
         all_df = pd.concat(dfs)
         ncover += ((all_df['lower'] < 0) & (all_df['upper'] > 0)).sum()
@@ -100,10 +117,11 @@ def main_null(fit_intercept=True,
 
 def main_alt(fit_intercept=True,
               standardize=True,
-              n=2000,
+              n=200,
               p=50,
               ntrial=500,
-              rng=None):
+              rng=None,
+              family='gaussian'):
 
     ncover = 0
     nsel = 0
@@ -117,9 +135,10 @@ def main_alt(fit_intercept=True,
                            fit_intercept,
                            standardize,
                            rng=rng,
+                           family=family,
                            alt=True)
         dfs.append(df)
-        all_df = pd.concat(dfs)
+        all_df = pd.concat(dfs).dropna()
         ncover += ((all_df['lower'] < all_df['target']) & (all_df['upper'] > all_df['target'])).sum()
         nsel += all_df.shape[0]
 

From a5f466916d511c9a08e7bdf445fc8695cbad7b50 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 2 May 2024 16:06:03 -0700
Subject: [PATCH 28/44] BF: misspelled penalty_factor

---
 glmnet/inference.py     |  5 +++--
 tests/test_inference.py | 13 +++++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index a1b11ad..1804e97 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -22,6 +22,7 @@
 
 def fixed_lambda_estimator(glmnet_obj,
                            lambda_val):
+
     check_is_fitted(glmnet_obj, ["coefs_", "feature_names_in_"])
     estimator = glmnet_obj.regularized_estimator(
                            lambda_val=lambda_val,
@@ -115,10 +116,10 @@ def lasso_inference(glmnet_obj,
     if not FL.fit_intercept:
         Q_full = Q_full[1:,1:]
 
-    if hasattr(FL, 'penalty_factors'):
-        penfac = FL.penalty_factors[active_set]
+        penfac = FL.penalty_factor[active_set]
     else:
         penfac = np.ones(active_set.shape[0])
+
     signs = np.sign(FL.coef_[active_set])
 
     if FL.fit_intercept:
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 1ed06b1..cb8856e 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -32,6 +32,7 @@ def run_inference(n,
                   alt=False,
                   s=3,
                   prop=0.8,
+                  penalty_facs=True,
                   family='gaussian'):
 
     if rng is None:
@@ -55,17 +56,25 @@ def run_inference(n,
     else:
         raise ValueError('only testing "gaussian" and "probit"')
     
+    if penalty_facs:
+        penalty_factor = np.ones(X.shape[1])
+        penalty_factor[:10] = 0.5
+    else:
+        penalty_factor = None
+
     GN = GLMNet(response_id='response',
                 family=fam,
                 fit_intercept=fit_intercept,
-                standardize=standardize)
+                standardize=standardize,
+                penalty_factor=penalty_factor)
+
     Df = pd.DataFrame({'response':Y})
 
     GN.fit(X, Df)
     m = int(prop*n)
 
     df = lasso_inference(GN, 
-                         GN.lambda_values_[10],
+                         GN.lambda_values_[min(10, GN.lambda_values_.shape[0]-1)],
                          (X[:m], Df.iloc[:m], None),
                          (X, Df, None))
     if fit_intercept:

From 207c189e9183b3c73a2f2c6568d51df24c837bb1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 13 Jun 2024 11:31:52 -0700
Subject: [PATCH 29/44] WIP: trying to get active and nonactive constraint as
 dataclass

---
 glmnet/glmnet.py        |  25 ++++
 glmnet/inference.py     | 305 ++++++++++++++++++++++------------------
 tests/test_inference.py |  10 +-
 3 files changed, 196 insertions(+), 144 deletions(-)

diff --git a/glmnet/glmnet.py b/glmnet/glmnet.py
index e51236a..fdd3ff1 100644
--- a/glmnet/glmnet.py
+++ b/glmnet/glmnet.py
@@ -482,3 +482,28 @@ def get_GLM(self):
                    weight_id=self.weight_id,
                    response_id=self.response_id)
 
+    def get_fixed_lambda(self,
+                         lambda_val):
+
+        check_is_fitted(self, ["coefs_", "feature_names_in_"])
+
+        estimator = self.regularized_estimator(
+                               lambda_val=lambda_val,
+                               family=self.family,
+                               alpha=self.alpha,
+                               penalty_factor=self.penalty_factor,
+                               lower_limits=self.lower_limits,
+                               upper_limits=self.upper_limits,
+                               fit_intercept=self.fit_intercept,
+                               standardize=self.standardize,
+                               control=self.control,
+                               offset_id=self.offset_id,
+                               weight_id=self.weight_id,            
+                               response_id=self.response_id,
+                               exclude=self.exclude
+                               )
+
+        coefs, intercepts = self.interpolate_coefs([lambda_val])
+        cls = self.state_.__class__
+        state = cls(coefs[0], intercepts[0])
+        return estimator, state
diff --git a/glmnet/inference.py b/glmnet/inference.py
index 1804e97..69b0a6d 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -7,7 +7,9 @@
 import warnings
 from warnings import warn
 from copy import copy
+
 import numpy as np
+import scipy.sparse
 import pandas as pd
 from scipy.stats import norm as normal_dbn
 import statsmodels.api as sm
@@ -15,35 +17,112 @@
 import mpmath as mp
 mp.dps = 80
 
-from sklearn.utils.validation import check_is_fitted
-from sklearn.base import clone
-
 from .base import _get_design
 
-def fixed_lambda_estimator(glmnet_obj,
-                           lambda_val):
-
-    check_is_fitted(glmnet_obj, ["coefs_", "feature_names_in_"])
-    estimator = glmnet_obj.regularized_estimator(
-                           lambda_val=lambda_val,
-                           family=glmnet_obj.family,
-                           alpha=glmnet_obj.alpha,
-                           penalty_factor=glmnet_obj.penalty_factor,
-                           lower_limits=glmnet_obj.lower_limits,
-                           upper_limits=glmnet_obj.upper_limits,
-                           fit_intercept=glmnet_obj.fit_intercept,
-                           standardize=glmnet_obj.standardize,
-                           control=glmnet_obj.control,
-                           offset_id=glmnet_obj.offset_id,
-                           weight_id=glmnet_obj.weight_id,            
-                           response_id=glmnet_obj.response_id,
-                           exclude=glmnet_obj.exclude
-                           )
-
-    coefs, intercepts = glmnet_obj.interpolate_coefs([lambda_val])
-    cls = glmnet_obj.state_.__class__
-    state = cls(coefs[0], intercepts[0])
-    return estimator, state
+from dataclasses import dataclass
+from typing import Optional
+
+@dataclass
+class AffineConstraint(object):
+
+    linear: np.ndarray
+    offset: np.ndarray
+    observed: np.ndarray
+    cov_selected: Optional[np.ndarray] = None # covariance with coefs in selected model
+
+    def __post_init__(self):
+
+        if not all (self.linear @ self.observed - self.offset <= 0):
+            raise ValueError('constraint not satisfied for observed data')
+    
+    def interval_constraints(self,
+                             target,
+                             gamma,
+                             tol = 1.e-4):
+        r"""
+        Given an affine constraint $\{z:Az \leq b \leq \}$ (elementwise)
+        specified with $A$ as `support_directions` and $b$ as
+        `support_offset`, a new direction of interest $\eta$, and
+        an `observed_data` is Gaussian vector $Z \sim N(\mu,\Sigma)$ 
+        with `covariance` matrix $\Sigma$, this
+        function returns $\eta^TZ$ as well as an interval
+        bounding this value. 
+
+        The interval constructed is such that the endpoints are 
+        independent of $\eta^TZ$, hence the $p$-value
+        of `Kac Rice`_
+        can be used to form an exact pivot.
+
+        Parameters
+        ----------
+
+        support_directions : np.float
+             Matrix specifying constraint, $A$.
+
+        support_offsets : np.float
+             Offset in constraint, $b$.
+
+        covariance : np.float
+             Covariance matrix of `observed_data`.
+
+        observed_data : np.float
+             Observations.
+
+        direction_of_interest : np.float
+             Direction in which we're interested for the
+             contrast.
+
+        tol : float
+             Relative tolerance parameter for deciding 
+             sign of $Az-b$.
+
+        Returns
+        -------
+
+        lower_bound : float
+
+        observed : float
+
+        upper_bound : float
+
+        sigma : float
+
+        """
+
+        # shorthand
+        A, b, D = (self.linear,
+                   self.offset,
+                   self.observed)
+
+        N = D - gamma * target
+
+        U = A.dot(N) - b
+        V = A @ gamma
+        
+        # Inequalities are now U + V @ target <= 0
+        
+        # adding the zero_coords in the denominator ensures that
+        # there are no divide-by-zero errors in RHS
+        # these coords are never used in upper_bound or lower_bound
+
+        zero_coords = V == 0
+        RHS = -U / (V + zero_coords)
+        RHS[zero_coords] = np.nan
+        pos_coords = V > tol * np.fabs(V).max()
+        neg_coords = V < -tol * np.fabs(V).max()
+
+        if np.any(pos_coords):
+            upper_bound = RHS[pos_coords].min()
+        else:
+            upper_bound = np.inf
+
+        if np.any(neg_coords):
+            lower_bound = RHS[neg_coords].max()
+        else:
+            lower_bound = -np.inf
+
+        print(lower_bound, upper_bound)
+        return lower_bound, upper_bound
 
 def lasso_inference(glmnet_obj,
                     lambda_val,
@@ -51,7 +130,7 @@ def lasso_inference(glmnet_obj,
                     full_data,
                     level=.9):
 
-    fixed_lambda, warm_state = fixed_lambda_estimator(glmnet_obj, lambda_val)
+    fixed_lambda, warm_state = glmnet_obj.get_fixed_lambda(lambda_val)
     X_sel, Y_sel, weight_sel = selection_data
     X_sel, Y_sel, _, _, weight_sel = fixed_lambda.get_data_arrays(X_sel,
                                                                   Y_sel)
@@ -64,12 +143,13 @@ def lasso_inference(glmnet_obj,
                      warm_state=warm_state)
 
     FL = fixed_lambda # shorthand
-    
+
     if (not np.all(FL.upper_limits >= FL.control.big) or
         not np.all(FL.lower_limits <= -FL.control.big)):
         raise NotImplementedError('upper/lower limits coming soon')
 
     active_set = np.nonzero(FL.coef_ != 0)[0]
+    inactive_set = np.nonzero(FL.coef_ == 0)[0]
 
     # fit unpenalized model on selection data
 
@@ -89,6 +169,7 @@ def lasso_inference(glmnet_obj,
     scaling_sel = design_sel.scaling_
     P_sel = design_sel.quadratic_form(unreg_sel_GLM._information,
                                       transformed=True)
+
     Q_sel = np.linalg.inv(P_sel)
     if not FL.fit_intercept:
         Q_sel = Q_sel[1:,1:]
@@ -165,11 +246,56 @@ def lasso_inference(glmnet_obj,
     else:
         transform_to_original = np.diag(1/scaling_full)
 
+    linear = sel_P
+    offset = -signs[penalized] * delta[penalized]
+    active_con = AffineConstraint(linear=linear,
+                                  offset=offset,
+                                  observed=noisy_mle)
+    cov_selected = Q_sel
+
+    inactive = True
+    if inactive:
+        pf = FL.regularizer_.penalty_factor
+        scale = 1 / (pf + (pf <= 0))
+
+        logl_score = FL.state_.logl_score(FL._family,
+                                          Y_sel,
+                                          weight_sel / weight_sel.sum())
+
+        # X_{-E}'(Y-X\hat{\beta}_E) -- \hat{\beta}_E is the LASSO soln, not GLM !
+        # this is (2nd order Taylor series sense) equivalent to
+        # X_{-E}'(Y-X\bar{\beta}_E) + X_{-E}'WX_E(X_E'WX_E)^{-1}\lambda_E s_E
+        # with \bar{\beta}_E the GLM soln
+        
+        score_ = (FL.design_.T @ logl_score)[1:]
+
+        score_ *= scale
+
+        # we now know that `score_` is bounded by \pm 1
+
+        I = scipy.sparse.eye(score_.shape[0])
+        L = scipy.sparse.vstack([I, -I])
+        O = np.ones(L.shape[0])
+
+        # X'WX_E
+        # here, X is the effective matrix implied by scale / intercept choices
+        P_inactive = FL.design_.quadratic_form(unreg_sel_GLM._information,
+                                               transformed=True,
+                                               columns=active_set)[1:]
+        if not FL.fit_intercept:
+            P_inactive = P_inactive[:,1:]
+            
+        # (\lambda_{-E})^{-1} X_{-E}'WX_E(X_E'WX_E)^{-1}\lambda_E s_E
+        I_inactive = (scale[:,None] * (P_inactive @ delta))[inactive_set] 
+
+        inactive_con = AffineConstraint(linear=L,
+                                        offset=O,
+                                        observed=score_)
+
     for i in range(transform_to_original.shape[0]):
         ## call selection_interval and return
         L, U, mle, p = selection_interval(
-            support_directions=sel_P,
-            support_offsets=-signs[penalized] * delta[penalized], 
+            active_con=active_con,
             Q_noisy=Q_sel,
             Q_full=Q_full,
             noisy_observation=noisy_mle,
@@ -189,103 +315,7 @@ def lasso_inference(glmnet_obj,
     return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
 
 
-def interval_constraints(support_directions, 
-                         support_offsets,
-                         covariance,
-                         observed_data, 
-                         direction_of_interest,
-                         tol = 1.e-4):
-    r"""
-    Given an affine constraint $\{z:Az \leq b \leq \}$ (elementwise)
-    specified with $A$ as `support_directions` and $b$ as
-    `support_offset`, a new direction of interest $\eta$, and
-    an `observed_data` is Gaussian vector $Z \sim N(\mu,\Sigma)$ 
-    with `covariance` matrix $\Sigma$, this
-    function returns $\eta^TZ$ as well as an interval
-    bounding this value. 
-
-    The interval constructed is such that the endpoints are 
-    independent of $\eta^TZ$, hence the $p$-value
-    of `Kac Rice`_
-    can be used to form an exact pivot.
-
-    Parameters
-    ----------
-
-    support_directions : np.float
-         Matrix specifying constraint, $A$.
-
-    support_offsets : np.float
-         Offset in constraint, $b$.
-
-    covariance : np.float
-         Covariance matrix of `observed_data`.
-
-    observed_data : np.float
-         Observations.
-
-    direction_of_interest : np.float
-         Direction in which we're interested for the
-         contrast.
-
-    tol : float
-         Relative tolerance parameter for deciding 
-         sign of $Az-b$.
-
-    Returns
-    -------
-
-    lower_bound : float
-
-    observed : float
-
-    upper_bound : float
-
-    sigma : float
-
-    """
-
-    # shorthand
-    A, b, S, X, w = (support_directions,
-                     support_offsets,
-                     covariance,
-                     observed_data,
-                     direction_of_interest)
-
-    U = A.dot(X) - b
-
-    if not np.all(U  < tol * np.fabs(U).max()):
-        stop
-        warn('constraints not satisfied: %s' % repr(U))
-
-    Sw = S.dot(w)
-    sigma = np.sqrt((w*Sw).sum())
-    alpha = A.dot(Sw) / sigma**2
-    V = (w*X).sum() # \eta^TZ
-
-    # adding the zero_coords in the denominator ensures that
-    # there are no divide-by-zero errors in RHS
-    # these coords are never used in upper_bound or lower_bound
-
-    zero_coords = alpha == 0
-    RHS = (-U + V * alpha) / (alpha + zero_coords)
-    RHS[zero_coords] = np.nan
-
-    pos_coords = alpha > tol * np.fabs(alpha).max()
-    if np.any(pos_coords):
-        upper_bound = RHS[pos_coords].min()
-    else:
-        upper_bound = np.inf
-    neg_coords = alpha < -tol * np.fabs(alpha).max()
-    if np.any(neg_coords):
-        lower_bound = RHS[neg_coords].max()
-    else:
-        lower_bound = -np.inf
-
-    return lower_bound, V, upper_bound, sigma
-
-def selection_interval(support_directions, 
-                       support_offsets,
+def selection_interval(active_con,
                        Q_noisy,
                        Q_full,
                        noisy_observation,
@@ -337,24 +367,21 @@ def selection_interval(support_directions,
 
     """
 
+    noisy_var = direction_of_interest.T @ Q_noisy @ direction_of_interest
+    noisy_estimate = (direction_of_interest * noisy_observation).sum()
+    estimate = (direction_of_interest * observation).sum()
+
     (lower_bound,
-     V,
-     upper_bound,
-     _) = interval_constraints(support_directions, 
-                               support_offsets,
-                               Q_noisy,
-                               noisy_observation,
-                               direction_of_interest,
-                               tol=tol)
+     upper_bound) = active_con.interval_constraints(noisy_estimate,
+                                                    Q_noisy @ direction_of_interest / unnormalized_var,
+                                                    tol=tol)
 
     ## lars path lockhart tibs^2 taylor paper
     sigma = np.sqrt(direction_of_interest.T @ Q_full @ direction_of_interest * dispersion)
     ## sqrt(alpha) is just sigma_noisy - sigma_full
-    noisy_sigma = np.sqrt(direction_of_interest.T @ Q_noisy @ direction_of_interest * dispersion)
+    noisy_sigma = np.sqrt(noisy_var * dispersion)
     smoothing_sigma = np.sqrt(max(noisy_sigma**2 - sigma**2, 0))
     
-    estimate = (direction_of_interest * observation).sum()
-    noisy_estimate = (direction_of_interest * noisy_observation).sum()
 
     if smoothing_sigma > 1e-6 * sigma:
 
diff --git a/tests/test_inference.py b/tests/test_inference.py
index cb8856e..88d1545 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -5,12 +5,12 @@
 import statsmodels.api as sm
 
 from glmnet import GLMNet
-from glmnet.inference import (fixed_lambda_estimator,
-                              lasso_inference)
-@pytest.mark.parametrize('n', [500])
-@pytest.mark.parametrize('p', [103])
-@pytest.mark.parametrize('fit_intercept', [True, False])
+from glmnet.inference import lasso_inference
+
 @pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('fit_intercept', [True, False])
+@pytest.mark.parametrize('p', [103])
+@pytest.mark.parametrize('n', [500])
 def test_inference(n,
                    p,
                    fit_intercept,

From b7b0c52331f3210db3afdbb260bcde7408e05e95 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 13 Jun 2024 11:44:40 -0700
Subject: [PATCH 30/44] adding auto test, BF: inactive bounded by lambda_val

---
 glmnet/inference.py     |  6 +++---
 tests/test_inference.py | 25 +++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 69b0a6d..b0b1906 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -271,11 +271,11 @@ def lasso_inference(glmnet_obj,
 
         score_ *= scale
 
-        # we now know that `score_` is bounded by \pm 1
+        # we now know that `score_` is bounded by \pm lambda_val
 
         I = scipy.sparse.eye(score_.shape[0])
         L = scipy.sparse.vstack([I, -I])
-        O = np.ones(L.shape[0])
+        O = np.ones(L.shape[0]) * lambda_val
 
         # X'WX_E
         # here, X is the effective matrix implied by scale / intercept choices
@@ -373,7 +373,7 @@ def selection_interval(active_con,
 
     (lower_bound,
      upper_bound) = active_con.interval_constraints(noisy_estimate,
-                                                    Q_noisy @ direction_of_interest / unnormalized_var,
+                                                    Q_noisy @ direction_of_interest / noisy_var,
                                                     tol=tol)
 
     ## lars path lockhart tibs^2 taylor paper
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 88d1545..422b4de 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -24,6 +24,31 @@ def test_inference(n,
                       fit_intercept,
                       standardize)
 
+def test_Auto():
+
+    df = sm.datasets.get_rdataset('Auto', package='ISLR').data
+    df = df.set_index('name')
+    X = np.asarray(df.drop(columns=['mpg']))
+    y = np.asarray(df['mpg'])
+
+    GN = GLMNet(response_id='response',
+                fit_intercept=True,
+                standardize=True)
+
+    Df = pd.DataFrame({'response':y})
+
+    GN.fit(X, Df)
+    prop = 0.8
+    n =	df.shape[0]
+    m = int(prop*n)
+
+    df = lasso_inference(GN,
+                         GN.lambda_values_[min(10, GN.lambda_values_.shape[0]-1)],
+                         (X[:m], Df.iloc[:m], None),
+                         (X, Df, None))
+    print(df)
+
+
 def run_inference(n,
                   p,
                   fit_intercept,

From ad282b1fced1b2ac8dc6d45fcc25616f42e2ff8d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 18 Jun 2024 14:37:40 -0700
Subject: [PATCH 31/44] WIP: cleaning up affine code

---
 glmnet/inference.py     | 52 ++++++++++++++++++++---------------------
 tests/test_inference.py | 10 ++++----
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index b0b1906..51925a3 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -28,7 +28,6 @@ class AffineConstraint(object):
     linear: np.ndarray
     offset: np.ndarray
     observed: np.ndarray
-    cov_selected: Optional[np.ndarray] = None # covariance with coefs in selected model
 
     def __post_init__(self):
 
@@ -166,10 +165,10 @@ def lasso_inference(glmnet_obj,
                              weight_sel,
                              standardize=glmnet_obj.standardize,
                              intercept=glmnet_obj.fit_intercept)
-    scaling_sel = design_sel.scaling_
     P_sel = design_sel.quadratic_form(unreg_sel_GLM._information,
                                       transformed=True)
 
+    scaling_sel = design_sel.scaling_
     Q_sel = np.linalg.inv(P_sel)
     if not FL.fit_intercept:
         Q_sel = Q_sel[1:,1:]
@@ -196,7 +195,6 @@ def lasso_inference(glmnet_obj,
     Q_full = np.linalg.inv(P_full)
     if not FL.fit_intercept:
         Q_full = Q_full[1:,1:]
-
         penfac = FL.penalty_factor[active_set]
     else:
         penfac = np.ones(active_set.shape[0])
@@ -209,20 +207,23 @@ def lasso_inference(glmnet_obj,
         signs = np.hstack([0, signs])
         stacked = np.hstack([FL.state_.intercept,
                              FL.state_.coef[active_set]])
+        noisy_mle = np.hstack([unreg_sel_GLM.state_.intercept,
+                               unreg_sel_GLM.state_.coef])
     else:
         stacked = FL.state_.coef[active_set]
+        noisy_mle = unreg_sel_GLM.state_.coef
 
-    delta = lambda_val * penfac * signs
+    # delta = lambda_val * penfac * signs
 
-    # remember loss of glmnet is normalized by sum of weights
-    # when taking newton step adjust by weight_sel.sum()
+    # # remember loss of glmnet is normalized by sum of weights
+    # # when taking newton step adjust by weight_sel.sum()
     
-    delta = Q_sel @ delta * weight_sel.sum() 
-    noisy_mle = stacked + delta # unitless scale
+    # delta = Q_sel @ delta * weight_sel.sum() 
+    # noisy_mle = stacked + delta # unitless scale
 
     penalized = penfac > 0
-    sel_P = -np.diag(signs[penalized]) @ np.eye(Q_sel.shape[0])[penalized]
-    assert (np.all(sel_P @ noisy_mle < -signs[penalized] * delta[penalized]))
+    sel_P = -np.diag(signs[penalized]) @ np.eye(Q_full.shape[0])[penalized]
+#    assert (np.all(sel_P @ noisy_mle < -signs[penalized] * delta[penalized]))
 
     ## the GLM's coef and intercept are on the original scale
     ## we transform them here to the (typically) unitless "standardized" scale
@@ -235,10 +236,10 @@ def lasso_inference(glmnet_obj,
         full_mle = unreg_GLM.state_.coef * scaling_full
 
     ## iterate over coordinates
-    Ls = np.zeros_like(noisy_mle)
-    Us = np.zeros_like(noisy_mle)
-    mles = np.zeros_like(noisy_mle)
-    pvals = np.zeros_like(noisy_mle)
+    Ls = np.zeros_like(stacked)
+    Us = np.zeros_like(stacked)
+    mles = np.zeros_like(stacked)
+    pvals = np.zeros_like(stacked)
 
     if FL.fit_intercept:
         transform_to_original = np.diag(np.hstack([1, 1/scaling_full]))
@@ -247,12 +248,10 @@ def lasso_inference(glmnet_obj,
         transform_to_original = np.diag(1/scaling_full)
 
     linear = sel_P
-    offset = -signs[penalized] * delta[penalized]
+    offset = np.zeros(sel_P.shape[0]) # -signs[penalized] * delta[penalized]
     active_con = AffineConstraint(linear=linear,
                                   offset=offset,
-                                  observed=noisy_mle)
-    cov_selected = Q_sel
-
+                                  observed=stacked)
     inactive = True
     if inactive:
         pf = FL.regularizer_.penalty_factor
@@ -268,9 +267,8 @@ def lasso_inference(glmnet_obj,
         # with \bar{\beta}_E the GLM soln
         
         score_ = (FL.design_.T @ logl_score)[1:]
-
         score_ *= scale
-
+        score_ = score_[inactive_set]
         # we now know that `score_` is bounded by \pm lambda_val
 
         I = scipy.sparse.eye(score_.shape[0])
@@ -279,14 +277,14 @@ def lasso_inference(glmnet_obj,
 
         # X'WX_E
         # here, X is the effective matrix implied by scale / intercept choices
-        P_inactive = FL.design_.quadratic_form(unreg_sel_GLM._information,
-                                               transformed=True,
-                                               columns=active_set)[1:]
-        if not FL.fit_intercept:
-            P_inactive = P_inactive[:,1:]
+        # P_inactive = FL.design_.quadratic_form(unreg_sel_GLM._information,
+        #                                        transformed=True,
+        #                                        columns=active_set)[1:]
+        # if not FL.fit_intercept:
+        #     P_inactive = P_inactive[:,1:]
             
-        # (\lambda_{-E})^{-1} X_{-E}'WX_E(X_E'WX_E)^{-1}\lambda_E s_E
-        I_inactive = (scale[:,None] * (P_inactive @ delta))[inactive_set] 
+        # # (\lambda_{-E})^{-1} X_{-E}'WX_E(X_E'WX_E)^{-1}\lambda_E s_E
+        # I_inactive = (scale[:,None] * (P_inactive @ delta))[inactive_set] 
 
         inactive_con = AffineConstraint(linear=L,
                                         offset=O,
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 422b4de..89a2907 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -56,7 +56,7 @@ def run_inference(n,
                   rng=None,
                   alt=False,
                   s=3,
-                  prop=0.8,
+                  prop=0.75,
                   penalty_facs=True,
                   family='gaussian'):
 
@@ -122,8 +122,8 @@ def run_inference(n,
 
 def main_null(fit_intercept=True,
               standardize=True,
-              n=2000,
-              p=50,
+              n=500,
+              p=75,
               ntrial=500,
               rng=None,
               family='gaussian'):
@@ -151,8 +151,8 @@ def main_null(fit_intercept=True,
 
 def main_alt(fit_intercept=True,
               standardize=True,
-              n=200,
-              p=50,
+              n=1000,
+              p=75,
               ntrial=500,
               rng=None,
               family='gaussian'):

From 542582d5fc0ffa807bb8e4cd8e067a65164a506b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jun 2024 11:32:13 -0400
Subject: [PATCH 32/44] added scale/unscale operators to convert between raw
 and scaled coefs within Design

---
 glmnet/base.py       | 87 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_design.py | 35 +++++++++++++++++-
 2 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/glmnet/base.py b/glmnet/base.py
index 2517071..498a9e2 100644
--- a/glmnet/base.py
+++ b/glmnet/base.py
@@ -55,6 +55,10 @@ def __post_init__(self, standardize, intercept):
             self.X = (self.X - self.centers_[None,:]) / self.scaling_[None,:]
             self.X = np.asfortranarray(self.X)
 
+        self.unscaler_ = UnscaleOperator(centers=self.centers_,
+                                         scaling=self.scaling_)
+        self.scaler_ = ScaleOperator(centers=self.centers_,
+                                     scaling=self.scaling_)
     # LinearOperator API
 
     def _matvec(self, x):
@@ -96,6 +100,7 @@ def _rmatvec(self, r):
     def quadratic_form(self,
                        G=None,
                        columns=None,
+                       intercept=True,
                        transformed=False):
         '''
         if transformed is False: compute
@@ -188,6 +193,88 @@ def quadratic_form(self,
 
         return Q
 
+    def scaled_to_raw(self, state):
+        """
+        Take a "scaled" (intercept, coef) (these are the params
+        used by glmnet) and return a (intercept, coef) on "raw" scale.
+        """
+        unscaled = self.unscaler_ @ state._stack
+        coef = unscaled[1:]
+        intercept = unscaled[0]
+        klass = state.__class__
+        return klass(coef=coef,
+                     intercept=intercept)
+
+@dataclass
+class UnscaleOperator(LinearOperator):
+
+    scaling : np.ndarray
+    centers: np.ndarray
+
+    def __post_init__(self):
+        ncoef = self.scaling.shape[0]
+        self.shape = (ncoef+1,)*2
+        self.dtype = float
+
+    # LinearOperator API
+    def _matvec(self, stacked):
+
+        intercept = float(stacked[0])
+        coef = np.squeeze(stacked[1:])
+
+        result = np.zeros(stacked.shape[0])
+        result[1:] = coef / self.scaling
+        result[0] = intercept - (result[1:] * self.centers).sum()
+
+        return result
+
+    def _rmatvec(self, stacked):
+
+        intercept = float(stacked[0])
+        coef = np.squeeze(stacked[1:])
+
+        result = np.zeros(stacked.shape[0])
+        result[0] = intercept
+        result[1:] = coef / self.scaling
+        result[1:] -= intercept * self.centers / self.scaling
+
+        return result
+
+@dataclass
+class ScaleOperator(LinearOperator):
+
+    scaling : np.ndarray
+    centers: np.ndarray
+
+    def __post_init__(self):
+        ncoef = self.scaling.shape[0]
+        self.shape = (ncoef+1,)*2
+        self.dtype = float
+
+    # LinearOperator API
+    def _matvec(self, stacked):
+
+        intercept = float(stacked[0])
+        coef = np.squeeze(stacked[1:])
+
+        result = np.zeros(stacked.shape[0])
+        result[1:] = coef * self.scaling
+        result[0] = intercept + (coef * self.centers).sum()
+
+        return result
+
+    def _rmatvec(self, stacked):
+
+        intercept = float(stacked[0])
+        coef = np.squeeze(stacked[1:])
+
+        result = np.zeros(stacked.shape[0])
+        result[0] = intercept
+        result[1:] = coef * self.scaling
+        result[1:] += intercept * self.centers
+
+        return result
+    
 @add_dataclass_docstring
 @dataclass
 class DiagonalOperator(LinearOperator):
diff --git a/tests/test_design.py b/tests/test_design.py
index 98357f2..df9b8bd 100644
--- a/tests/test_design.py
+++ b/tests/test_design.py
@@ -141,4 +141,37 @@ def test_quadratic_form(X, weights, standardize, intercept, gls):
 
     assert np.allclose(Q_s, Q) # sparse and dense agree
 
-        
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+def test_unscaler(intercept,
+                  standardize):
+
+    X = np.random.standard_normal((20,5))
+    D = Design(X, intercept=True, standardize=True)
+    M = D.unscaler_ @ np.identity(6)
+    MT = D.unscaler_.T @ np.identity(6) 
+    assert np.allclose(MT, M.T)
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+def test_scaler(intercept,
+                  standardize):
+
+    X = np.random.standard_normal((20,5))
+    D = Design(X, intercept=True, standardize=True)
+    M = D.scaler_ @ np.identity(6)
+    MT = D.scaler_.T @ np.identity(6) 
+    assert np.allclose(MT, M.T)
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+def test_scaler_unscaler_inv(intercept,
+                             standardize):
+
+    X = np.random.standard_normal((20,5))
+    D = Design(X, intercept=True, standardize=True)
+    M = D.unscaler_ @ np.identity(6)
+    MI = D.scaler_ @ np.identity(6) 
+    assert np.allclose(M @ MI, np.identity(6))
+    

From 626c62d82be537516fcfcbc80d023770608993c4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jun 2024 11:37:39 -0400
Subject: [PATCH 33/44] adding raw_to_scaled method

---
 glmnet/base.py       | 12 ++++++++++++
 tests/test_design.py | 25 +++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/glmnet/base.py b/glmnet/base.py
index 498a9e2..0a3c174 100644
--- a/glmnet/base.py
+++ b/glmnet/base.py
@@ -205,6 +205,18 @@ def scaled_to_raw(self, state):
         return klass(coef=coef,
                      intercept=intercept)
 
+    def raw_to_scaled(self, state):
+        """
+        Take a "raw" (intercept, coef) and return a (intercept, coef)
+        on "scaled" scale (i.e. the scale GLMnet uses in its objective).
+        """
+        unscaled = self.scaler_ @ state._stack
+        coef = unscaled[1:]
+        intercept = unscaled[0]
+        klass = state.__class__
+        return klass(coef=coef,
+                     intercept=intercept)
+
 @dataclass
 class UnscaleOperator(LinearOperator):
 
diff --git a/tests/test_design.py b/tests/test_design.py
index df9b8bd..2c7dada 100644
--- a/tests/test_design.py
+++ b/tests/test_design.py
@@ -4,6 +4,7 @@
 import statsmodels.api as sm
 
 from glmnet.base import Design
+from glmnet.glm import GLMState
 rng = np.random.default_rng(0)
 
 n, p, q = 100, 5, 3
@@ -174,4 +175,28 @@ def test_scaler_unscaler_inv(intercept,
     M = D.unscaler_ @ np.identity(6)
     MI = D.scaler_ @ np.identity(6) 
     assert np.allclose(M @ MI, np.identity(6))
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+def test_scaler_to_raw_conversion(intercept,
+                                  standardize):
+
+    X = np.random.standard_normal((20,5))
+    D = Design(X, intercept=True, standardize=True)
+    raw_state = GLMState(coef=np.random.standard_normal(5),
+                         intercept=np.random.standard_normal())
+    scaled_state = D.raw_to_scaled(raw_state)
+    raw_state_roundtrip = D.scaled_to_raw(scaled_state)
+
+    assert np.allclose(raw_state._stack,
+                       raw_state_roundtrip._stack)
+    
+    scaled_state = GLMState(coef=np.random.standard_normal(5),
+                            intercept=np.random.standard_normal())
+    raw_state = D.scaled_to_raw(scaled_state)
+    scaled_state_roundtrip = D.raw_to_scaled(raw_state)
+
+    assert np.allclose(scaled_state._stack,
+                       scaled_state_roundtrip._stack)
+    
     

From 30ff33b6ec7d58b0668c8131b3aa5105ab2f7b52 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jun 2024 12:25:13 -0400
Subject: [PATCH 34/44] inference rework, using the scale/unscale operators

---
 glmnet/base.py      |  1 -
 glmnet/glm.py       |  8 ++---
 glmnet/inference.py | 85 +++++++++++++++++++--------------------------
 3 files changed, 38 insertions(+), 56 deletions(-)

diff --git a/glmnet/base.py b/glmnet/base.py
index 0a3c174..18708c0 100644
--- a/glmnet/base.py
+++ b/glmnet/base.py
@@ -100,7 +100,6 @@ def _rmatvec(self, r):
     def quadratic_form(self,
                        G=None,
                        columns=None,
-                       intercept=True,
                        transformed=False):
         '''
         if transformed is False: compute
diff --git a/glmnet/glm.py b/glmnet/glm.py
index 1d28c94..dd4470f 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -611,11 +611,9 @@ def score(self, X, y, sample_weight=None):
 '''.format(**_docstrings).strip()
 
     def _set_coef_intercept(self, state):
-        self.coef_ = state.coef.copy() # this makes a copy -- important to make this copy because `state.coef` is persistent
-        if hasattr(self, 'standardize') and self.standardize:
-            self.scaling_ = self.design_.scaling_
-            self.coef_ /= self.scaling_ 
-        self.intercept_ = state.intercept - (self.coef_ * self.design_.centers_).sum()
+        raw_state = self.design_.scaled_to_raw(state)
+        self.coef_ = raw_state.coef
+        self.intercept_ = raw_state.intercept
 
 GLMBase.__doc__ = '''
 Base class to fit a Generalized Linear Model (GLM). Base class for `GLMNet`.
diff --git a/glmnet/inference.py b/glmnet/inference.py
index 51925a3..9324f7c 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -120,7 +120,6 @@ def interval_constraints(self,
         else:
             lower_bound = -np.inf
 
-        print(lower_bound, upper_bound)
         return lower_bound, upper_bound
 
 def lasso_inference(glmnet_obj,
@@ -159,7 +158,7 @@ def lasso_inference(glmnet_obj,
                       Y_sel,
                       sample_weight=weight_sel)
 
-    # quadratic approximation up to scaling and a factor of weight_sel.sum()
+    # # quadratic approximation up to scaling and a factor of weight_sel.sum()
 
     design_sel = _get_design(X_sel[:,active_set],
                              weight_sel,
@@ -168,11 +167,8 @@ def lasso_inference(glmnet_obj,
     P_sel = design_sel.quadratic_form(unreg_sel_GLM._information,
                                       transformed=True)
 
-    scaling_sel = design_sel.scaling_
     Q_sel = np.linalg.inv(P_sel)
-    if not FL.fit_intercept:
-        Q_sel = Q_sel[1:,1:]
-        
+
     # fit unpenalized model on full data
 
     X_full, Y_full, weight_full = full_data
@@ -185,33 +181,32 @@ def lasso_inference(glmnet_obj,
 
     # quadratic approximation
 
-    design_full = _get_design(X_full[:,active_set],
-                              weight_full,
-                              standardize=glmnet_obj.standardize,
-                              intercept=glmnet_obj.fit_intercept)
-    scaling_full = design_full.scaling_
-    P_full = design_full.quadratic_form(unreg_GLM._information,
-                                        transformed=True)
+    D = _get_design(X_full[:,active_set],
+                    weight_full,
+                    standardize=glmnet_obj.standardize,
+                    intercept=glmnet_obj.fit_intercept)
+    P_full = D.quadratic_form(unreg_GLM._information,
+                              transformed=True)
     Q_full = np.linalg.inv(P_full)
     if not FL.fit_intercept:
-        Q_full = Q_full[1:,1:]
         penfac = FL.penalty_factor[active_set]
+        Q_full = Q_full[1:,1:]
+        Q_sel = Q_sel[1:,1:]
     else:
         penfac = np.ones(active_set.shape[0])
 
     signs = np.sign(FL.coef_[active_set])
 
+    noisy_mle = D.raw_to_scaled(unreg_sel_GLM.state_)
     if FL.fit_intercept:
-        # correct the scaling
         penfac = np.hstack([0, penfac])
         signs = np.hstack([0, signs])
         stacked = np.hstack([FL.state_.intercept,
                              FL.state_.coef[active_set]])
-        noisy_mle = np.hstack([unreg_sel_GLM.state_.intercept,
-                               unreg_sel_GLM.state_.coef])
+        noisy_mle = noisy_mle._stack
     else:
         stacked = FL.state_.coef[active_set]
-        noisy_mle = unreg_sel_GLM.state_.coef
+        noisy_mle = noisy_mle.coef
 
     # delta = lambda_val * penfac * signs
 
@@ -227,13 +222,11 @@ def lasso_inference(glmnet_obj,
 
     ## the GLM's coef and intercept are on the original scale
     ## we transform them here to the (typically) unitless "standardized" scale
+    full_mle = D.raw_to_scaled(unreg_GLM.state_)
     if FL.fit_intercept:
-        intercept = unreg_GLM.state_.intercept + (unreg_GLM.state_.coef * design_full.centers_).sum()
-        coef = unreg_GLM.state_.coef * scaling_full
-        full_mle = np.hstack([intercept,
-                              coef])
+        full_mle = full_mle._stack
     else:
-        full_mle = unreg_GLM.state_.coef * scaling_full
+        full_mle = full_mle.coef
 
     ## iterate over coordinates
     Ls = np.zeros_like(stacked)
@@ -241,11 +234,10 @@ def lasso_inference(glmnet_obj,
     mles = np.zeros_like(stacked)
     pvals = np.zeros_like(stacked)
 
-    if FL.fit_intercept:
-        transform_to_original = np.diag(np.hstack([1, 1/scaling_full]))
-        transform_to_original[0,1:] = -design_full.centers_/scaling_full
-    else:
-        transform_to_original = np.diag(1/scaling_full)
+    transform_to_raw = (D.unscaler_ @
+                        np.identity(D.shape[1]))
+    if not FL.fit_intercept:
+        transform_to_raw = transform_to_raw[1:,1:]
 
     linear = sel_P
     offset = np.zeros(sel_P.shape[0]) # -signs[penalized] * delta[penalized]
@@ -275,22 +267,11 @@ def lasso_inference(glmnet_obj,
         L = scipy.sparse.vstack([I, -I])
         O = np.ones(L.shape[0]) * lambda_val
 
-        # X'WX_E
-        # here, X is the effective matrix implied by scale / intercept choices
-        # P_inactive = FL.design_.quadratic_form(unreg_sel_GLM._information,
-        #                                        transformed=True,
-        #                                        columns=active_set)[1:]
-        # if not FL.fit_intercept:
-        #     P_inactive = P_inactive[:,1:]
-            
-        # # (\lambda_{-E})^{-1} X_{-E}'WX_E(X_E'WX_E)^{-1}\lambda_E s_E
-        # I_inactive = (scale[:,None] * (P_inactive @ delta))[inactive_set] 
-
         inactive_con = AffineConstraint(linear=L,
                                         offset=O,
                                         observed=score_)
 
-    for i in range(transform_to_original.shape[0]):
+    for i in range(transform_to_raw.shape[0]):
         ## call selection_interval and return
         L, U, mle, p = selection_interval(
             active_con=active_con,
@@ -298,7 +279,7 @@ def lasso_inference(glmnet_obj,
             Q_full=Q_full,
             noisy_observation=noisy_mle,
             observation=full_mle,
-            direction_of_interest=transform_to_original[i],
+            direction_of_interest=transform_to_raw[i],
             level=level,
             dispersion=unreg_GLM.dispersion_
         )
@@ -366,20 +347,17 @@ def selection_interval(active_con,
     """
 
     noisy_var = direction_of_interest.T @ Q_noisy @ direction_of_interest
+    full_var = direction_of_interest.T @ Q_full @ direction_of_interest
     noisy_estimate = (direction_of_interest * noisy_observation).sum()
     estimate = (direction_of_interest * observation).sum()
 
     (lower_bound,
      upper_bound) = active_con.interval_constraints(noisy_estimate,
-                                                    Q_noisy @ direction_of_interest / noisy_var,
+                                                    Q_full @ direction_of_interest / full_var,
                                                     tol=tol)
 
-    ## lars path lockhart tibs^2 taylor paper
-    sigma = np.sqrt(direction_of_interest.T @ Q_full @ direction_of_interest * dispersion)
-    ## sqrt(alpha) is just sigma_noisy - sigma_full
-    noisy_sigma = np.sqrt(noisy_var * dispersion)
-    smoothing_sigma = np.sqrt(max(noisy_sigma**2 - sigma**2, 0))
-    
+    sigma = np.sqrt(full_var * dispersion)
+    smoothing_sigma = np.sqrt(max(noisy_var - full_var, 0) * dispersion)
 
     if smoothing_sigma > 1e-6 * sigma:
 
@@ -472,8 +450,15 @@ def norm_interval(lower, upper):
     """
     #cdf = normal_dbn.cdf
     cdf = mp.ncdf
-    if lower > 0 and upper > 0:
-        return cdf(-lower) - cdf(-upper)
+    if lower > 0:
+        if lower < 4:
+            return cdf(-lower) - cdf(-upper)
+        else:
+            return np.exp(-np.fabs(lower*(upper-lower)))
+    elif upper < 0:
+        if upper < -4:
+            return np.exp(-np.fabs(upper*(upper-lower)))
+    
     else:
         return cdf(upper) - cdf(lower)
 

From 6291a390aca7722ad5ad0c67ba873d4ef9d3ba92 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jun 2024 12:31:23 -0400
Subject: [PATCH 35/44] fixing  handling of dispersion

---
 glmnet/inference.py     | 48 ++++++++++++++++++++---------------------
 tests/test_inference.py |  2 +-
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 9324f7c..53fa6c2 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -160,14 +160,14 @@ def lasso_inference(glmnet_obj,
 
     # # quadratic approximation up to scaling and a factor of weight_sel.sum()
 
-    design_sel = _get_design(X_sel[:,active_set],
-                             weight_sel,
-                             standardize=glmnet_obj.standardize,
-                             intercept=glmnet_obj.fit_intercept)
-    P_sel = design_sel.quadratic_form(unreg_sel_GLM._information,
-                                      transformed=True)
+    D_sel = _get_design(X_sel[:,active_set],
+                        weight_sel,
+                        standardize=glmnet_obj.standardize,
+                        intercept=glmnet_obj.fit_intercept)
+    P_noisy = D_sel.quadratic_form(unreg_sel_GLM._information,
+                                   transformed=True)
 
-    Q_sel = np.linalg.inv(P_sel)
+    Q_noisy = np.linalg.inv(P_noisy)
 
     # fit unpenalized model on full data
 
@@ -187,7 +187,7 @@ def lasso_inference(glmnet_obj,
                     intercept=glmnet_obj.fit_intercept)
     P_full = D.quadratic_form(unreg_GLM._information,
                               transformed=True)
-    Q_full = np.linalg.inv(P_full)
+    Q_full = np.linalg.inv(P_full) 
     if not FL.fit_intercept:
         penfac = FL.penalty_factor[active_set]
         Q_full = Q_full[1:,1:]
@@ -273,15 +273,14 @@ def lasso_inference(glmnet_obj,
 
     for i in range(transform_to_raw.shape[0]):
         ## call selection_interval and return
-        L, U, mle, p = selection_interval(
+        L, U, mle, p = _split_interval(
             active_con=active_con,
-            Q_noisy=Q_sel,
-            Q_full=Q_full,
+            Q_noisy=Q_noisy * unreg_GLM.dispersion_,
+            Q_full=Q_full * unreg_GLM.dispersion_,
             noisy_observation=noisy_mle,
             observation=full_mle,
             direction_of_interest=transform_to_raw[i],
-            level=level,
-            dispersion=unreg_GLM.dispersion_
+            level=level
         )
         Ls[i] = L
         Us[i] = U
@@ -294,16 +293,15 @@ def lasso_inference(glmnet_obj,
     return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
 
 
-def selection_interval(active_con,
-                       Q_noisy,
-                       Q_full,
-                       noisy_observation,
-                       observation,
-                       direction_of_interest,
-                       tol = 1.e-4,
-                       level = 0.90,
-                       dispersion=1,
-                       UMAU=True):
+def _split_interval(active_con,
+                    Q_noisy,
+                    Q_full,
+                    noisy_observation,
+                    observation,
+                    direction_of_interest,
+                    tol = 1.e-4,
+                    level = 0.90,
+                    dispersion=1):
     """
     Given an affine in cone constraint $\{z:Az+b \leq 0\}$ (elementwise)
     specified with $A$ as `support_directions` and $b$ as
@@ -356,8 +354,8 @@ def selection_interval(active_con,
                                                     Q_full @ direction_of_interest / full_var,
                                                     tol=tol)
 
-    sigma = np.sqrt(full_var * dispersion)
-    smoothing_sigma = np.sqrt(max(noisy_var - full_var, 0) * dispersion)
+    sigma = np.sqrt(full_var)
+    smoothing_sigma = np.sqrt(max(noisy_var - full_var, 0))
 
     if smoothing_sigma > 1e-6 * sigma:
 
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 89a2907..62f4e7f 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -65,7 +65,7 @@ def run_inference(n,
     X = rng.standard_normal((n, p))
     D = np.linspace(1, p, p) / p + 0.2
     X *= D[None,:]
-    Y = rng.standard_normal(n) * 2
+    Y = rng.standard_normal(n) * np.fabs(1+np.random.standard_normal())
     beta = np.zeros(p)
     if alt:
         subs = rng.choice(p, s, replace=False)

From d3ec1b965966f4ee3ea3c125a03378c45bf25842 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 27 Jun 2024 17:03:37 -0700
Subject: [PATCH 36/44] WIP: orthogonal working well under null

---
 glmnet/glm.py           |   4 +-
 glmnet/inference.py     | 445 ++++++++++++++++++++++------------------
 tests/test_inference.py | 218 +++++++++++++-------
 3 files changed, 383 insertions(+), 284 deletions(-)

diff --git a/glmnet/glm.py b/glmnet/glm.py
index dd4470f..e2ee28c 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -418,7 +418,7 @@ def fit(self,
             sample_weight=None,           # ignored
             regularizer=None,             # last 4 options non sklearn API
             warm_state=None,
-            dispersion=1,
+            dispersion=None,
             check=True,
             fit_null=True):
 
@@ -536,7 +536,7 @@ def obj_function(response,
 
         self._set_coef_intercept(state)
 
-        if (hasattr(self._family, "base") and 
+        if (dispersion is None and hasattr(self._family, "base") and 
             isinstance(self._family.base, sm_family.Gaussian)): # GLM specific
             # usual estimate of sigma^2
             self.dispersion_ = self.deviance_ / (nobs-nvar-self.fit_intercept) 
diff --git a/glmnet/inference.py b/glmnet/inference.py
index 53fa6c2..8f9fedc 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -126,7 +126,8 @@ def lasso_inference(glmnet_obj,
                     lambda_val,
                     selection_data,
                     full_data,
-                    level=.9):
+                    level=.9,
+                    dispersion=None):
 
     fixed_lambda, warm_state = glmnet_obj.get_fixed_lambda(lambda_val)
     X_sel, Y_sel, weight_sel = selection_data
@@ -147,151 +148,161 @@ def lasso_inference(glmnet_obj,
         raise NotImplementedError('upper/lower limits coming soon')
 
     active_set = np.nonzero(FL.coef_ != 0)[0]
-    inactive_set = np.nonzero(FL.coef_ == 0)[0]
+    if active_set.shape[0] != 0:
+        inactive_set = np.nonzero(FL.coef_ == 0)[0]
 
-    # fit unpenalized model on selection data
+        # fit unpenalized model on selection data
 
-    unreg_sel_GLM = glmnet_obj.get_GLM()
+        unreg_sel_GLM = glmnet_obj.get_GLM()
 
-    unreg_sel_GLM.summarize = True
-    unreg_sel_GLM.fit(X_sel[:,active_set],
-                      Y_sel,
-                      sample_weight=weight_sel)
+        unreg_sel_GLM.summarize = True
+        unreg_sel_GLM.fit(X_sel[:,active_set],
+                          Y_sel,
+                          sample_weight=weight_sel,
+                          dispersion=dispersion)
 
-    # # quadratic approximation up to scaling and a factor of weight_sel.sum()
+        # # quadratic approximation up to scaling and a factor of weight_sel.sum()
 
-    D_sel = _get_design(X_sel[:,active_set],
-                        weight_sel,
+        D_sel = _get_design(X_sel[:,active_set],
+                            weight_sel,
+                            standardize=glmnet_obj.standardize,
+                            intercept=glmnet_obj.fit_intercept)
+        P_noisy = D_sel.quadratic_form(unreg_sel_GLM._information,
+                                       transformed=True)
+
+        # fit unpenalized model on full data
+
+        X_full, Y_full, weight_full = full_data
+        if weight_full is None:
+            weight_full = np.ones(X_full.shape[0])
+
+        unreg_GLM = glmnet_obj.get_GLM()
+        unreg_GLM.summarize = True
+        unreg_GLM.fit(X_full[:,active_set],
+                      Y_full,
+                      sample_weight=weight_full,
+                      dispersion=dispersion)
+
+        # quadratic approximation
+
+        D = _get_design(X_full[:,active_set],
+                        weight_full,
                         standardize=glmnet_obj.standardize,
                         intercept=glmnet_obj.fit_intercept)
-    P_noisy = D_sel.quadratic_form(unreg_sel_GLM._information,
-                                   transformed=True)
+        P_full = D.quadratic_form(unreg_GLM._information,
+                                  transformed=True)
+        if not FL.fit_intercept:
+            if FL.penalty_factor is not None:
+                penfac = FL.penalty_factor[active_set]
+            else:
+                penfac = np.ones_like(active_set)
+            P_full = P_full[1:,1:]
+            P_noisy = P_noisy[1:,1:]
+        else:
+            penfac = np.ones(active_set.shape[0])
 
-    Q_noisy = np.linalg.inv(P_noisy)
+        Q_full = np.linalg.inv(P_full) 
+        Q_noisy = np.linalg.inv(P_noisy)
 
-    # fit unpenalized model on full data
+        signs = np.sign(FL.coef_[active_set])
 
-    X_full, Y_full, weight_full = full_data
-    if weight_full is None:
-        weight_full = np.ones(X_full.shape[0])
-        
-    unreg_GLM = glmnet_obj.get_GLM()
-    unreg_GLM.summarize = True
-    unreg_GLM.fit(X_full[:,active_set], Y_full, sample_weight=weight_full)
-
-    # quadratic approximation
-
-    D = _get_design(X_full[:,active_set],
-                    weight_full,
-                    standardize=glmnet_obj.standardize,
-                    intercept=glmnet_obj.fit_intercept)
-    P_full = D.quadratic_form(unreg_GLM._information,
-                              transformed=True)
-    Q_full = np.linalg.inv(P_full) 
-    if not FL.fit_intercept:
-        penfac = FL.penalty_factor[active_set]
-        Q_full = Q_full[1:,1:]
-        Q_sel = Q_sel[1:,1:]
-    else:
-        penfac = np.ones(active_set.shape[0])
+        noisy_mle = D.raw_to_scaled(unreg_sel_GLM.state_)
 
-    signs = np.sign(FL.coef_[active_set])
+        if FL.fit_intercept:
+            penfac = np.hstack([0, penfac])
+            signs = np.hstack([0, signs])
+            stacked = np.hstack([FL.state_.intercept,
+                                 FL.state_.coef[active_set]])
+            noisy_mle = noisy_mle._stack
+        else:
+            stacked = FL.state_.coef[active_set]
+            noisy_mle = noisy_mle.coef
 
-    noisy_mle = D.raw_to_scaled(unreg_sel_GLM.state_)
-    if FL.fit_intercept:
-        penfac = np.hstack([0, penfac])
-        signs = np.hstack([0, signs])
-        stacked = np.hstack([FL.state_.intercept,
-                             FL.state_.coef[active_set]])
-        noisy_mle = noisy_mle._stack
-    else:
-        stacked = FL.state_.coef[active_set]
-        noisy_mle = noisy_mle.coef
+        # delta = lambda_val * penfac * signs
 
-    # delta = lambda_val * penfac * signs
+        # # remember loss of glmnet is normalized by sum of weights
+        # # when taking newton step adjust by weight_sel.sum()
 
-    # # remember loss of glmnet is normalized by sum of weights
-    # # when taking newton step adjust by weight_sel.sum()
-    
-    # delta = Q_sel @ delta * weight_sel.sum() 
-    # noisy_mle = stacked + delta # unitless scale
-
-    penalized = penfac > 0
-    sel_P = -np.diag(signs[penalized]) @ np.eye(Q_full.shape[0])[penalized]
-#    assert (np.all(sel_P @ noisy_mle < -signs[penalized] * delta[penalized]))
-
-    ## the GLM's coef and intercept are on the original scale
-    ## we transform them here to the (typically) unitless "standardized" scale
-    full_mle = D.raw_to_scaled(unreg_GLM.state_)
-    if FL.fit_intercept:
-        full_mle = full_mle._stack
-    else:
-        full_mle = full_mle.coef
-
-    ## iterate over coordinates
-    Ls = np.zeros_like(stacked)
-    Us = np.zeros_like(stacked)
-    mles = np.zeros_like(stacked)
-    pvals = np.zeros_like(stacked)
-
-    transform_to_raw = (D.unscaler_ @
-                        np.identity(D.shape[1]))
-    if not FL.fit_intercept:
-        transform_to_raw = transform_to_raw[1:,1:]
-
-    linear = sel_P
-    offset = np.zeros(sel_P.shape[0]) # -signs[penalized] * delta[penalized]
-    active_con = AffineConstraint(linear=linear,
-                                  offset=offset,
-                                  observed=stacked)
-    inactive = True
-    if inactive:
-        pf = FL.regularizer_.penalty_factor
-        scale = 1 / (pf + (pf <= 0))
-
-        logl_score = FL.state_.logl_score(FL._family,
-                                          Y_sel,
-                                          weight_sel / weight_sel.sum())
-
-        # X_{-E}'(Y-X\hat{\beta}_E) -- \hat{\beta}_E is the LASSO soln, not GLM !
-        # this is (2nd order Taylor series sense) equivalent to
-        # X_{-E}'(Y-X\bar{\beta}_E) + X_{-E}'WX_E(X_E'WX_E)^{-1}\lambda_E s_E
-        # with \bar{\beta}_E the GLM soln
-        
-        score_ = (FL.design_.T @ logl_score)[1:]
-        score_ *= scale
-        score_ = score_[inactive_set]
-        # we now know that `score_` is bounded by \pm lambda_val
-
-        I = scipy.sparse.eye(score_.shape[0])
-        L = scipy.sparse.vstack([I, -I])
-        O = np.ones(L.shape[0]) * lambda_val
-
-        inactive_con = AffineConstraint(linear=L,
-                                        offset=O,
-                                        observed=score_)
-
-    for i in range(transform_to_raw.shape[0]):
-        ## call selection_interval and return
-        L, U, mle, p = _split_interval(
-            active_con=active_con,
-            Q_noisy=Q_noisy * unreg_GLM.dispersion_,
-            Q_full=Q_full * unreg_GLM.dispersion_,
-            noisy_observation=noisy_mle,
-            observation=full_mle,
-            direction_of_interest=transform_to_raw[i],
-            level=level
-        )
-        Ls[i] = L
-        Us[i] = U
-        mles[i] = mle
-        pvals[i] = p
-    
-    idx = (active_set).tolist()
-    if FL.fit_intercept:
-        idx = ['intercept'] + idx
-    return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
+        # delta = Q_sel @ delta * weight_sel.sum() 
+        # noisy_mle = stacked + delta # unitless scale
+
+        penalized = penfac > 0
+        sel_P = -np.diag(signs[penalized]) @ np.eye(Q_full.shape[0])[penalized]
+
+        ## the GLM's coef and intercept are on the original scale
+        ## we transform them here to the (typically) unitless "standardized" scale
+        full_mle = D.raw_to_scaled(unreg_GLM.state_)
+        if FL.fit_intercept:
+            full_mle = full_mle._stack
+        else:
+            full_mle = full_mle.coef
+
+        ## iterate over coordinates
+        Ls = np.zeros_like(stacked)
+        Us = np.zeros_like(stacked)
+        mles = np.zeros_like(stacked)
+        pvals = np.zeros_like(stacked)
+
+        transform_to_raw = (D.unscaler_ @
+                            np.identity(D.shape[1]))
+        if not FL.fit_intercept:
+            transform_to_raw = transform_to_raw[1:,1:]
+
+        linear = sel_P
+        offset = np.zeros(sel_P.shape[0]) # -signs[penalized] * delta[penalized]
+        active_con = AffineConstraint(linear=linear,
+                                      offset=offset,
+                                      observed=stacked)
+        inactive = True
+        if inactive:
+            pf = FL.regularizer_.penalty_factor
+            scale = 1 / (pf + (pf <= 0))
+
+            logl_score = FL.state_.logl_score(FL._family,
+                                              Y_sel,
+                                              weight_sel / weight_sel.sum())
+
+            # X_{-E}'(Y-X\hat{\beta}_E) -- \hat{\beta}_E is the LASSO soln, not GLM !
+            # this is (2nd order Taylor series sense) equivalent to
+            # X_{-E}'(Y-X\bar{\beta}_E) + X_{-E}'WX_E(X_E'WX_E)^{-1}\lambda_E s_E
+            # with \bar{\beta}_E the GLM soln
+
+            score_ = (FL.design_.T @ logl_score)[1:]
+            score_ *= scale
+            score_ = score_[inactive_set]
+            # we now know that `score_` is bounded by \pm lambda_val
+
+            I = scipy.sparse.eye(score_.shape[0])
+            L = scipy.sparse.vstack([I, -I])
+            O = np.ones(L.shape[0]) * lambda_val
+
+            inactive_con = AffineConstraint(linear=L,
+                                            offset=O,
+                                            observed=score_)
+
+        for i in range(transform_to_raw.shape[0]):
+            ## call selection_interval and return
+            L, U, mle, pval, _ = _split_interval(
+                active_con=active_con,
+                Q_noisy=Q_noisy * unreg_GLM.dispersion_,
+                Q_full=Q_full * unreg_GLM.dispersion_,
+                noisy_observation=noisy_mle, # these must be same scale / shift as glmnet
+                observation=full_mle, # these must be same scale / shift as glmnet
+                direction_of_interest=transform_to_raw[i], # this matrix describes the map from glmnet("transform") coords to raw("original") scale
+                level=level
+            )
+            Ls[i] = L
+            Us[i] = U
+            mles[i] = mle
+            pvals[i] = pval
 
+        idx = active_set.tolist()
+        if FL.fit_intercept:
+            idx = ['intercept'] + idx
+
+        return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
+    else:
+        return None
 
 def _split_interval(active_con,
                     Q_noisy,
@@ -349,84 +360,23 @@ def _split_interval(active_con,
     noisy_estimate = (direction_of_interest * noisy_observation).sum()
     estimate = (direction_of_interest * observation).sum()
 
+    slice_dir = Q_full @ direction_of_interest / full_var
     (lower_bound,
      upper_bound) = active_con.interval_constraints(noisy_estimate,
-                                                    Q_full @ direction_of_interest / full_var,
+                                                    slice_dir,
                                                     tol=tol)
 
     sigma = np.sqrt(full_var)
     smoothing_sigma = np.sqrt(max(noisy_var - full_var, 0))
 
-    if smoothing_sigma > 1e-6 * sigma:
-
-        grid = np.linspace(estimate - 10 * sigma, estimate + 10 * sigma, 2001)
-        weight = (
-            normal_dbn.cdf(
-                (upper_bound - grid) / (smoothing_sigma)
-            ) - normal_dbn.cdf(
-                (lower_bound - grid) / (smoothing_sigma)
-            )
-        )
-        weight *= normal_dbn.pdf((grid - estimate) / sigma)
+    print(lower_bound, upper_bound, sigma, smoothing_sigma**2)
 
-        sel_distr = discrete_family(grid, weight)
-        L, U = sel_distr.equal_tailed_interval(estimate,
-                                               alpha=1-level)
-        mle, _, _ = sel_distr.MLE(estimate)
-        mle *= sigma**2; mle += estimate
-        pval = sel_distr.cdf(-estimate / sigma**2, estimate)
-        pval = 2 * min(pval, 1-pval)
-        L *= sigma**2; L += estimate
-        U *= sigma**2; U += estimate
-    else:
-        warnings.warn('assuming data for selection is same as for inference -- using hard selection')
-        
-        lb = estimate - 20 * sigma
-        ub = estimate + 20 * sigma
-
-        if estimate < lower_bound or estimate > upper_bound:
-            warn('Constraints not satisfied: returning [-np.inf, np.inf]')
-            return -np.inf, np.inf, np.nan, np.nan
-        
-        def F(theta):
-            
-            Z = (estimate - theta) / sigma
-            Z_L = (lower_bound - theta) / sigma
-            Z_U = (upper_bound - theta) / sigma
-            num = norm_interval(Z_L, Z)
-            den = norm_interval(Z_L, Z_U)
-            if Z_L > 0 and den < 1e-10:
-                C = np.fabs(Z_L)
-                D = Z-Z_L
-                cdf = 1-np.exp(-C*D)
-            elif Z_U < 0 and den < 1e-10:
-                C = np.fabs(Z_U)
-                D = Z_U-Z
-                if C*D < 0:
-                    raise ValueError
-                cdf = np.exp(-C*D)
-            else:
-                cdf = num / den
-            return cdf
-            
-        
-        pval = F(0)
-        pval = 2 * min(pval, 1-pval)
-
-        lb = lower_bound - 20 * sigma
-        ub = upper_bound + 20 * sigma
-
-        alpha = 0.5 * (1 - level)
-        L = find_root(F, 1.0 - 0.5 * alpha, lb, ub)
-        if np.isnan(L):
-            L = -np.inf
-        U = find_root(F, 0.5 * alpha, lb, ub)
-        if np.isnan(U):
-            U = np.inf
-
-        mle = np.nan
-
-    return L, U, mle, pval
+    return _truncated_inference(estimate,
+                                sigma,
+                                smoothing_sigma,
+                                lower_bound,
+                                upper_bound,
+                                level=level)
 
 def norm_interval(lower, upper):
     r"""
@@ -456,7 +406,8 @@ def norm_interval(lower, upper):
     elif upper < 0:
         if upper < -4:
             return np.exp(-np.fabs(upper*(upper-lower)))
-    
+        else:
+            return cdf(-lower) - cdf(-upper)
     else:
         return cdf(upper) - cdf(lower)
 
@@ -552,7 +503,7 @@ def theta(self):
     def theta(self, _theta):
         if _theta != self._theta:
             _thetaX = _theta * self.sufficient_stat + self._lw
-            _largest = _thetaX.max() - 5 # try to avoid over/under flow, 5 seems arbitrary
+            _largest = _thetaX.max() - 15 # try to avoid over/under flow, 5 seems arbitrary
             _exp_thetaX = np.exp(_thetaX - _largest)
             _prod = _exp_thetaX
             self._partition = np.sum(_prod)
@@ -909,3 +860,91 @@ def first_two_moments(x):
 
         return cur_est, 1. / hessian, grad
 
+def _truncated_inference(estimate,
+                         sigma,
+                         smoothing_sigma,
+                         lower_bound,
+                         upper_bound,
+                         basept=None,
+                         level=0.9):
+
+    if basept is None:
+        basept = estimate
+
+    if smoothing_sigma > 1e-6 * sigma:
+
+        grid = np.linspace(basept - 10 * sigma, basept + 10 * sigma, 2001)
+
+        weight = (normal_dbn.cdf((upper_bound - grid) / smoothing_sigma)
+                  - normal_dbn.cdf((lower_bound - grid) / smoothing_sigma))
+
+        weight *= normal_dbn.pdf((grid - basept) / sigma)
+
+        sel_distr = discrete_family(grid, weight)
+
+        use_sample = True
+        if use_sample:
+            rng = np.random.default_rng(0)
+            sample = rng.standard_normal(2000) * sigma + basept
+            weight_s = (normal_dbn.cdf((upper_bound - sample) / smoothing_sigma)
+                      - normal_dbn.cdf((lower_bound - sample) / smoothing_sigma))
+            sel_distr =  discrete_family(sample, weight_s)
+
+        L, U = sel_distr.equal_tailed_interval(estimate,
+                                               alpha=1-level)
+        mle, _, _ = sel_distr.MLE(estimate)
+        mle *= sigma**2; mle += estimate
+        pval = sel_distr.cdf(-basept / sigma**2, estimate)
+        pval = 2 * min(pval, 1-pval)
+        L *= sigma**2; L += basept
+        U *= sigma**2; U += basept
+
+    else:
+        warnings.warn('assuming data for selection is same as for inference -- using hard selection')
+        
+        lb = estimate - 20 * sigma
+        ub = estimate + 20 * sigma
+
+        if estimate < lower_bound or estimate > upper_bound:
+            warn('Constraints not satisfied: returning [-np.inf, np.inf]')
+            return -np.inf, np.inf, np.nan, np.nan
+        
+        def F(theta):
+            
+            Z = (estimate - theta) / sigma
+            Z_L = (lower_bound - theta) / sigma
+            Z_U = (upper_bound - theta) / sigma
+            num = norm_interval(Z_L, Z)
+            den = norm_interval(Z_L, Z_U)
+            if Z_L > 0 and den < 1e-10:
+                C = np.fabs(Z_L)
+                D = Z-Z_L
+                cdf = 1-np.exp(-C*D)
+            elif Z_U < 0 and den < 1e-10:
+                C = np.fabs(Z_U)
+                D = Z_U-Z
+                if C*D < 0:
+                    raise ValueError
+                cdf = np.exp(-C*D)
+            else:
+                cdf = num / den
+            return cdf
+            
+        
+        pval = F(0)
+        pval = 2 * min(pval, 1-pval)
+
+        lb = lower_bound - 20 * sigma
+        ub = upper_bound + 20 * sigma
+
+        alpha = 0.5 * (1 - level)
+        L = find_root(F, 1.0 - 0.5 * alpha, lb, ub)
+        if np.isnan(L):
+            L = -np.inf
+        U = find_root(F, 0.5 * alpha, lb, ub)
+        if np.isnan(U):
+            U = np.inf
+
+        mle = np.nan
+        
+    return L, U, mle, pval, sel_distr
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 62f4e7f..01790fc 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -5,7 +5,8 @@
 import statsmodels.api as sm
 
 from glmnet import GLMNet
-from glmnet.inference import lasso_inference
+from glmnet.inference import (lasso_inference,
+                              _truncated_inference)
 
 @pytest.mark.parametrize('standardize', [True, False])
 @pytest.mark.parametrize('fit_intercept', [True, False])
@@ -43,11 +44,9 @@ def test_Auto():
     m = int(prop*n)
 
     df = lasso_inference(GN,
-                         GN.lambda_values_[min(10, GN.lambda_values_.shape[0]-1)],
+                         2 / n, # GN.lambda_values_[min(10, GN.lambda_values_.shape[0]-1)],
                          (X[:m], Df.iloc[:m], None),
                          (X, Df, None))
-    print(df)
-
 
 def run_inference(n,
                   p,
@@ -58,20 +57,25 @@ def run_inference(n,
                   s=3,
                   prop=0.75,
                   penalty_facs=True,
-                  family='gaussian'):
+                  family='gaussian',
+                  orthogonal=False,
+                  cv=False):
 
     if rng is None:
         rng = np.random.default_rng(0)
-    X = rng.standard_normal((n, p))
-    D = np.linspace(1, p, p) / p + 0.2
-    X *= D[None,:]
-    Y = rng.standard_normal(n) * np.fabs(1+np.random.standard_normal())
     beta = np.zeros(p)
-    if alt:
-        subs = rng.choice(p, s, replace=False)
-        beta[subs] = rng.standard_normal(s) 
-    mu = X @ beta
-    Y += mu
+    subs = rng.choice(p, s, replace=False)
+
+    if not orthogonal:
+        X = rng.standard_normal((n, p))
+        D = np.linspace(1, p, p) / p + 0.2
+        X *= D[None,:]
+        Y = rng.standard_normal(n) * np.fabs(1+np.random.standard_normal())
+        if alt:
+            beta[subs] = rng.uniform(3, 5) * rng.choice([-1,1], size=s, replace=True) / np.sqrt(n)
+
+        mu = X @ beta
+        Y += mu
 
     if family == 'gaussian':
         fam = sm.families.Gaussian()
@@ -82,51 +86,97 @@ def run_inference(n,
         raise ValueError('only testing "gaussian" and "probit"')
     
     if penalty_facs:
-        penalty_factor = np.ones(X.shape[1])
+        penalty_factor = np.ones(p)
         penalty_factor[:10] = 0.5
     else:
         penalty_factor = None
 
-    GN = GLMNet(response_id='response',
-                family=fam,
-                fit_intercept=fit_intercept,
-                standardize=standardize,
-                penalty_factor=penalty_factor)
-
-    Df = pd.DataFrame({'response':Y})
+    if not orthogonal:
+        
+        GN = GLMNet(response_id='response',
+                    fit_intercept=fit_intercept,
+                    standardize=standardize,
+                    family=fam,
+                    penalty_factor=penalty_factor)
 
-    GN.fit(X, Df)
-    m = int(prop*n)
+        m = int(prop*n)
+        
+        Df = pd.DataFrame({'response':Y})
+        GN.fit(X[:m], Df.iloc[:m]) 
+        if cv:
+            GN.cross_validation_path(X[:m], Df.iloc[:m])
+            lamval = GN.index_best_['Mean Squared Error']
+        else:
+            eps = rng.standard_normal((X.shape[0], 1000))
+            lamval = 1.2 * np.fabs(X.T @ eps).max() / X.shape[0]
+        df = lasso_inference(GN, 
+                             lamval,
+                             (X[:m], Df.iloc[:m], None),
+                             (X, Df, None))
+
+        if df is not None:
+            if fit_intercept:
+                active_set = np.array(df.index[1:]).astype(int)
+            else:
+                active_set = np.array(df.index).astype(int)
+            X_sel = X[:,active_set]
+
+            if fit_intercept:
+                X_sel = np.column_stack([np.ones(X_sel.shape[0]), X_sel])
+            targets = np.linalg.pinv(X_sel) @ mu
+
+            df['target'] = targets
 
-    df = lasso_inference(GN, 
-                         GN.lambda_values_[min(10, GN.lambda_values_.shape[0]-1)],
-                         (X[:m], Df.iloc[:m], None),
-                         (X, Df, None))
-    if fit_intercept:
-        active_set = np.array(df.index[1:]).astype(int)
     else:
-        active_set = np.array(df.index).astype(int)
-    X_sel = X[:,active_set]
-
-    if fit_intercept:
-        X_sel = np.column_stack([np.ones(X_sel.shape[0]), X_sel])
-    targets = np.linalg.pinv(X_sel) @ mu
 
-    df['target'] = targets
-    if alt:
+        X = np.identity(p)
+        Y = rng.standard_normal(p) 
+        Df = pd.DataFrame({'response':Y})
+        if alt:
+            beta[subs] = rng.standard_normal(s) * 2
+        mu = X @ beta
+        Y += mu
+        
+        GN = GLMNet(response_id='response',
+                    family=fam,
+                    fit_intercept=False,
+                    standardize=False,
+                    penalty_factor=penalty_factor)
+
+
+        lamval = 2
+        Y_sel = Y + np.sqrt((1 - prop) / prop) * rng.standard_normal(Y.shape)
+
+        active_naive = np.nonzero(np.fabs(Y_sel) > lamval)[0]
+        Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
+        X_sel = np.sqrt(prop) * X
+        GN.fit(X, Df)
+        if active_naive.shape[0] > 0:
+            df = lasso_inference(GN, 
+                                 prop * lamval / p,
+                                 (X_sel, Df_sel, None),
+                                 (X, Df, None),
+                                 dispersion=1)
+            df['target'] = mu[df.index]
+        else:
+            return None
+    if alt and df is not None:
         if family == 'probit' and not set(subs).issubset(active_set):
             df['target'] *= np.nan
             
     return df
 
 
-def main_null(fit_intercept=True,
-              standardize=True,
-              n=500,
-              p=75,
-              ntrial=500,
-              rng=None,
-              family='gaussian'):
+def main(fit_intercept=True,
+         standardize=True,
+         n=500,
+         p=75,
+         ntrial=500,
+         rng=None,
+         family='gaussian',
+         alt=False,
+         cv=False,
+         orthogonal=False):
 
     ncover = 0
     nsel = 0
@@ -140,43 +190,53 @@ def main_null(fit_intercept=True,
                            fit_intercept,
                            standardize,
                            rng=rng,
-                           family=family)
-        dfs.append(df)
-        all_df = pd.concat(dfs)
-        ncover += ((all_df['lower'] < 0) & (all_df['upper'] > 0)).sum()
-        nsel += all_df.shape[0]
-
-        print('cover:', ncover / nsel, 'typeI:', (all_df['pval'] < 0.05).mean())
+                           family=family,
+                           alt=alt,
+                           cv=cv,
+                           orthogonal=orthogonal)
+        if df is not None:
+            dfs.append(df)
+        if len(dfs) > 0:
+            all_df = pd.concat(dfs)
+            ncover += ((all_df['lower'] < 0) & (all_df['upper'] > 0)).sum()
+            nsel += all_df.shape[0]
 
+            print('cover:', ncover / nsel, 'power:', (all_df['pval'] < 0.05).mean())
 
-def main_alt(fit_intercept=True,
-              standardize=True,
-              n=1000,
-              p=75,
-              ntrial=500,
-              rng=None,
-              family='gaussian'):
 
-    ncover = 0
-    nsel = 0
+        
+def test_truncated_inference(B=1000,
+                             noisy_sigma=0.5,
+                             sigma=1,
+                             upper_bound=5,
+                             lower_bound=2,
+                             alt=False):
 
-    dfs = []
+    pvals = []
+    cover = []
 
     rng = np.random.default_rng(0)
-    for i in range(ntrial):
-        df = run_inference(n,
-                           p,
-                           fit_intercept,
-                           standardize,
-                           rng=rng,
-                           family=family,
-                           alt=True)
-        dfs.append(df)
-        all_df = pd.concat(dfs).dropna()
-        ncover += ((all_df['lower'] < all_df['target']) & (all_df['upper'] > all_df['target'])).sum()
-        nsel += all_df.shape[0]
-
-        print('cover:', ncover / nsel)
-    return all_df
-
-        
+    
+    for _ in range(B):
+        mu = rng.standard_normal() + 3
+        lower_bound = rng.uniform(0, 2)
+        upper_bound = lower_bound + rng.uniform(3, 5)
+        while True:
+            Z = rng.standard_normal() * sigma
+            if alt:
+                Z += mu
+            Z_noisy = Z + rng.standard_normal() * noisy_sigma
+            if (Z_noisy > lower_bound) and (Z_noisy < upper_bound):
+                L, U, _, pval, D = _truncated_inference(Z,
+                                                        sigma,
+                                                        noisy_sigma,
+                                                        lower_bound,
+                                                        upper_bound,
+                                                        basept=Z,
+                                                        level=0.90)
+                cover.append((L<mu) * (U>mu))
+                pvals.append(pval)
+                print(np.mean(cover), np.mean(np.array(pvals) < 0.05))
+                break
+            
+    return D

From da21a471f815ff7b50755264ca644eefa6a5eb9d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 3 Jul 2024 17:18:27 -0700
Subject: [PATCH 37/44] WIP: orthogonal now working without penfacs

---
 glmnet/inference.py     | 140 +++++++++++++++++++++++++++++++++-------
 tests/test_inference.py |  93 ++++++++++++++++++--------
 2 files changed, 181 insertions(+), 52 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index 8f9fedc..f610afa 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -22,6 +22,94 @@
 from dataclasses import dataclass
 from typing import Optional
 
+@dataclass
+class TruncatedGaussian(object):
+    
+    estimate: float
+    sigma: float
+    smoothing_sigma: float
+    lower_bound: float
+    upper_bound: float
+    level: Optional[float] = 0.9
+    num_sd: Optional[float] = 10
+    num_grid: Optional[float] = 4000
+    use_sample: Optional[bool] = False
+    seed: Optional[int] = 0
+
+    def _get_family(self,
+                    basept=None):
+   
+        if basept is None:
+            basept = self.estimate
+            
+        self._rng = np.random.default_rng(self.seed)
+        
+        if not self.use_sample:
+            grid = np.linspace(basept - self.num_sd * self.sigma,
+                               basept + self.num_sd * self.sigma, self.num_grid)
+
+            weight = (normal_dbn.cdf((self.upper_bound - grid) / self.smoothing_sigma)
+                      - normal_dbn.cdf((self.lower_bound - grid) / self.smoothing_sigma))
+
+            weight *= normal_dbn.pdf((grid - basept) / self.sigma)
+
+            return discrete_family(grid, weight)
+
+        else:
+            sample = self._rng.standard_normal(self.num_grid) * self.sigma + basept
+            weight = (normal_dbn.cdf((self.upper_bound - sample) / self.smoothing_sigma)
+                      - normal_dbn.cdf((self.lower_bound - sample) / self.smoothing_sigma))
+            return discrete_family(sample, weight)
+
+    def pvalue(self,
+               null_value=0,
+               alternative='twosided',
+               basept=None):
+
+        if basept is None:
+            basept = null_value
+
+        if alternative not in ['twosided', 'greater', 'less']:
+            raise ValueError("alternative should be one of ['twosided', 'greater', 'less']")
+        _family = self._get_family(basept=basept)
+        tilt = (null_value - basept) / self.sigma**2
+        if alternative in ['less', 'twosided']:
+            _cdf = _family.cdf(tilt, x=self.estimate)
+            if alternative == 'less':
+                pvalue = _cdf
+            else:
+                pvalue = 2 * min(_cdf, 1 - _cdf)
+        else:
+            pvalue = _family.sf(tilt, x=self.estimate)
+        return pvalue
+
+    def interval(self,
+                 basept=None):
+                 
+        if basept is None:
+            basept = self.estimate
+            
+        _family = self._get_family(basept=basept)
+
+        L, U = _family.equal_tailed_interval(self.estimate,
+                                             alpha=1-self.level)
+        L *= self.sigma**2; L += basept
+        U *= self.sigma**2; U += basept
+
+        return L, U
+
+    def MLE(self,
+            basept=None):
+
+        if basept is None:
+            basept = self.estimate
+            
+        _family = self._get_family(basept=basept)
+
+        mle, _, _ = _family.MLE(self.estimate)
+        mle *= self.sigma**2; mle += basept
+        return mle
+    
 @dataclass
 class AffineConstraint(object):
 
@@ -242,7 +330,8 @@ def lasso_inference(glmnet_obj,
         Us = np.zeros_like(stacked)
         mles = np.zeros_like(stacked)
         pvals = np.zeros_like(stacked)
-
+        TGs = []
+        
         transform_to_raw = (D.unscaler_ @
                             np.identity(D.shape[1]))
         if not FL.fit_intercept:
@@ -282,25 +371,27 @@ def lasso_inference(glmnet_obj,
 
         for i in range(transform_to_raw.shape[0]):
             ## call selection_interval and return
-            L, U, mle, pval, _ = _split_interval(
-                active_con=active_con,
-                Q_noisy=Q_noisy * unreg_GLM.dispersion_,
-                Q_full=Q_full * unreg_GLM.dispersion_,
-                noisy_observation=noisy_mle, # these must be same scale / shift as glmnet
-                observation=full_mle, # these must be same scale / shift as glmnet
-                direction_of_interest=transform_to_raw[i], # this matrix describes the map from glmnet("transform") coords to raw("original") scale
-                level=level
-            )
+            TG = _split_interval(active_con=active_con,
+                                 Q_noisy=Q_noisy * unreg_GLM.dispersion_,
+                                 Q_full=Q_full * unreg_GLM.dispersion_,
+                                 noisy_observation=noisy_mle, # these must be same scale / shift as glmnet
+                                 observation=full_mle, # these must be same scale / shift as glmnet
+                                 direction_of_interest=transform_to_raw[i], # this matrix describes the map from glmnet("transform") coords to raw("original") scale
+                                 level=level)
+
+            (L, U), mle, pval = (TG.interval(), TG.MLE(), TG.pvalue())
+
             Ls[i] = L
             Us[i] = U
             mles[i] = mle
             pvals[i] = pval
-
+            TGs.append(TG)
+            
         idx = active_set.tolist()
         if FL.fit_intercept:
             idx = ['intercept'] + idx
 
-        return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx)
+        return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us, 'TG':TGs}, index=idx)
     else:
         return None
 
@@ -369,16 +460,14 @@ def _split_interval(active_con,
     sigma = np.sqrt(full_var)
     smoothing_sigma = np.sqrt(max(noisy_var - full_var, 0))
 
-    print(lower_bound, upper_bound, sigma, smoothing_sigma**2)
-
-    return _truncated_inference(estimate,
-                                sigma,
-                                smoothing_sigma,
-                                lower_bound,
-                                upper_bound,
-                                level=level)
-
-def norm_interval(lower, upper):
+    return TruncatedGaussian(estimate=estimate,
+                             sigma=sigma,
+                             smoothing_sigma=smoothing_sigma,
+                             lower_bound=lower_bound,
+                             upper_bound=upper_bound,
+                             level=level)
+    
+def _norm_interval(lower, upper):
     r"""
     A multiprecision evaluation of
 
@@ -870,6 +959,7 @@ def _truncated_inference(estimate,
 
     if basept is None:
         basept = estimate
+    noisy_sigma = np.sqrt(smoothing_sigma**2 + sigma**2)
 
     if smoothing_sigma > 1e-6 * sigma:
 
@@ -882,7 +972,7 @@ def _truncated_inference(estimate,
 
         sel_distr = discrete_family(grid, weight)
 
-        use_sample = True
+        use_sample = False
         if use_sample:
             rng = np.random.default_rng(0)
             sample = rng.standard_normal(2000) * sigma + basept
@@ -914,8 +1004,8 @@ def F(theta):
             Z = (estimate - theta) / sigma
             Z_L = (lower_bound - theta) / sigma
             Z_U = (upper_bound - theta) / sigma
-            num = norm_interval(Z_L, Z)
-            den = norm_interval(Z_L, Z_U)
+            num = _norm_interval(Z_L, Z)
+            den = _norm_interval(Z_L, Z_U)
             if Z_L > 0 and den < 1e-10:
                 C = np.fabs(Z_L)
                 D = Z-Z_L
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 01790fc..405825a 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -6,7 +6,7 @@
 
 from glmnet import GLMNet
 from glmnet.inference import (lasso_inference,
-                              _truncated_inference)
+                              TruncatedGaussian)
 
 @pytest.mark.parametrize('standardize', [True, False])
 @pytest.mark.parametrize('fit_intercept', [True, False])
@@ -131,11 +131,11 @@ def run_inference(n,
 
         X = np.identity(p)
         Y = rng.standard_normal(p) 
-        Df = pd.DataFrame({'response':Y})
         if alt:
-            beta[subs] = rng.standard_normal(s) * 2
-        mu = X @ beta
+            beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+        mu = beta
         Y += mu
+        Df = pd.DataFrame({'response':Y})
         
         GN = GLMNet(response_id='response',
                     family=fam,
@@ -143,11 +143,10 @@ def run_inference(n,
                     standardize=False,
                     penalty_factor=penalty_factor)
 
-
         lamval = 2
         Y_sel = Y + np.sqrt((1 - prop) / prop) * rng.standard_normal(Y.shape)
-
         active_naive = np.nonzero(np.fabs(Y_sel) > lamval)[0]
+
         Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
         X_sel = np.sqrt(prop) * X
         GN.fit(X, Df)
@@ -158,6 +157,37 @@ def run_inference(n,
                                  (X, Df, None),
                                  dispersion=1)
             df['target'] = mu[df.index]
+
+            pivots = []
+            signs = np.sign(Y_sel[df.index])
+            assert np.all(Y_sel[df.index] * signs >= lamval * penalty_factor[df.index])
+
+            for j, s in zip(df.index, signs):
+                if s == 1:
+                    upper_bound = np.inf
+                    if penalty_factor is not None:
+                        lower_bound = lamval * penalty_factor[j]
+                    else:
+                        lower_bound = lamval
+                else:
+                    if penalty_factor is not None:
+                        upper_bound = -lamval * penalty_factor[j]
+                    else:
+                        upper_bound = -lamval
+                    lower_bound = -np.inf
+                    
+                tg = df.loc[j,'TG']
+
+                TG = TruncatedGaussian(estimate=Y[j],
+                                       sigma=1,
+                                       smoothing_sigma=np.sqrt((1 - prop) / prop),
+                                       lower_bound=lower_bound,
+                                       upper_bound=upper_bound,
+                                       level=0.90)
+                pval = TG.pvalue()
+                pivots.append(TG.pvalue(null_value=df.loc[j,'target']))
+                assert np.allclose(pval, df.loc[j]['pval'])
+            df['pivot'] = pivots
         else:
             return None
     if alt and df is not None:
@@ -176,7 +206,8 @@ def main(fit_intercept=True,
          family='gaussian',
          alt=False,
          cv=False,
-         orthogonal=False):
+         orthogonal=False,
+         penalty_facs=False):
 
     ncover = 0
     nsel = 0
@@ -193,24 +224,29 @@ def main(fit_intercept=True,
                            family=family,
                            alt=alt,
                            cv=cv,
-                           orthogonal=orthogonal)
+                           orthogonal=orthogonal,
+                           penalty_facs=penalty_facs)
         if df is not None:
             dfs.append(df)
         if len(dfs) > 0:
             all_df = pd.concat(dfs)
-            ncover += ((all_df['lower'] < 0) & (all_df['upper'] > 0)).sum()
+            ncover += ((all_df['lower'] < all_df['target']) & (all_df['upper'] > all_df['target'])).sum()
             nsel += all_df.shape[0]
 
-            print('cover:', ncover / nsel, 'power:', (all_df['pval'] < 0.05).mean())
+            print('cover:',
+                  ncover / nsel, 'power:',
+                  (all_df['pval'] < 0.05).mean(),
+                  (all_df['pivot'] < 0.05).mean(), all_df['pivot'].std())
 
 
         
 def test_truncated_inference(B=1000,
-                             noisy_sigma=0.5,
+                             smoothing_sigma=np.sqrt(1/3),
                              sigma=1,
-                             upper_bound=5,
-                             lower_bound=2,
-                             alt=False):
+                             upper_bound=None,
+                             lower_bound=None,
+                             alt=False,
+                             level=0.9):
 
     pvals = []
     cover = []
@@ -219,24 +255,27 @@ def test_truncated_inference(B=1000,
     
     for _ in range(B):
         mu = rng.standard_normal() + 3
-        lower_bound = rng.uniform(0, 2)
-        upper_bound = lower_bound + rng.uniform(3, 5)
+        if not alt:
+            mu *= 0
+        if lower_bound is None or upper_bound is None:
+            lower_bound = rng.uniform(0, 2)
+            upper_bound = lower_bound + rng.uniform(3, 5)
         while True:
             Z = rng.standard_normal() * sigma
-            if alt:
-                Z += mu
-            Z_noisy = Z + rng.standard_normal() * noisy_sigma
+            Z += mu
+            Z_noisy = Z + rng.standard_normal() * smoothing_sigma
             if (Z_noisy > lower_bound) and (Z_noisy < upper_bound):
-                L, U, _, pval, D = _truncated_inference(Z,
-                                                        sigma,
-                                                        noisy_sigma,
-                                                        lower_bound,
-                                                        upper_bound,
-                                                        basept=Z,
-                                                        level=0.90)
+                TG = TruncatedGaussian(estimate=Z,
+                                       sigma=sigma,
+                                       smoothing_sigma=smoothing_sigma,
+                                       lower_bound=lower_bound,
+                                       upper_bound=upper_bound,
+                                       level=level)
+                
+                (L, U), mle, pval = (TG.interval(), TG.MLE(), TG.pvalue())
+
                 cover.append((L<mu) * (U>mu))
                 pvals.append(pval)
                 print(np.mean(cover), np.mean(np.array(pvals) < 0.05))
                 break
             
-    return D

From 600d5ccce3fa5dff76c3bc461a726c29cf6eedeb Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 3 Jul 2024 17:22:43 -0700
Subject: [PATCH 38/44] pivots for non-orthogonal

---
 tests/test_inference.py | 56 +++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/tests/test_inference.py b/tests/test_inference.py
index 405825a..f38a841 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -161,39 +161,41 @@ def run_inference(n,
             pivots = []
             signs = np.sign(Y_sel[df.index])
             assert np.all(Y_sel[df.index] * signs >= lamval * penalty_factor[df.index])
-
-            for j, s in zip(df.index, signs):
-                if s == 1:
-                    upper_bound = np.inf
-                    if penalty_factor is not None:
-                        lower_bound = lamval * penalty_factor[j]
-                    else:
-                        lower_bound = lamval
-                else:
-                    if penalty_factor is not None:
-                        upper_bound = -lamval * penalty_factor[j]
-                    else:
-                        upper_bound = -lamval
-                    lower_bound = -np.inf
-                    
-                tg = df.loc[j,'TG']
-
-                TG = TruncatedGaussian(estimate=Y[j],
-                                       sigma=1,
-                                       smoothing_sigma=np.sqrt((1 - prop) / prop),
-                                       lower_bound=lower_bound,
-                                       upper_bound=upper_bound,
-                                       level=0.90)
-                pval = TG.pvalue()
-                pivots.append(TG.pvalue(null_value=df.loc[j,'target']))
-                assert np.allclose(pval, df.loc[j]['pval'])
-            df['pivot'] = pivots
         else:
             return None
     if alt and df is not None:
         if family == 'probit' and not set(subs).issubset(active_set):
             df['target'] *= np.nan
             
+    if df is not None:
+        df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
+        # for j, s in zip(df.index, signs):
+        #     if s == 1:
+        #         upper_bound = np.inf
+        #         if penalty_factor is not None:
+        #             lower_bound = lamval * penalty_factor[j]
+        #         else:
+        #             lower_bound = lamval
+        #     else:
+        #         if penalty_factor is not None:
+        #             upper_bound = -lamval * penalty_factor[j]
+        #         else:
+        #             upper_bound = -lamval
+        #         lower_bound = -np.inf
+
+        #     tg = df.loc[j,'TG']
+
+        #     TG = TruncatedGaussian(estimate=Y[j],
+        #                            sigma=1,
+        #                            smoothing_sigma=np.sqrt((1 - prop) / prop),
+        #                            lower_bound=lower_bound,
+        #                            upper_bound=upper_bound,
+        #                            level=0.90)
+        #     pval = TG.pvalue()
+        #     pivots.append(TG.pvalue(null_value=df.loc[j,'target']))
+        #     assert np.allclose(pval, df.loc[j]['pval'])
+        # df['pivot'] = pivots
+        
     return df
 
 

From 8ff293a0d7ed7863acd9b7c27f2ed34a401b0336 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 8 Jul 2024 13:02:08 -0700
Subject: [PATCH 39/44] added upper / lower limits

---
 glmnet/inference.py     |  39 ++++-
 tests/test_inference.py | 306 +++++++++++++++++++++-------------------
 2 files changed, 194 insertions(+), 151 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index f610afa..ed6cd91 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -277,6 +277,20 @@ def lasso_inference(glmnet_obj,
                         weight_full,
                         standardize=glmnet_obj.standardize,
                         intercept=glmnet_obj.fit_intercept)
+
+        if np.asarray(FL.upper_limits).shape == ():
+            upper_limits = np.ones(D_sel.shape[1] - 1) * FL.upper_limits
+        else:
+            upper_limits = FL.upper_limits[active_set]
+            
+        if np.asarray(FL.lower_limits).shape == ():
+            lower_limits = np.ones(D_sel.shape[1] - 1) * FL.lower_limits
+        else:
+            lower_limits = FL.lower_limits[active_set]
+            
+        upper_limits = (D_sel.scaler_ @ np.hstack([0, upper_limits]))[1:]
+        lower_limits = (D_sel.scaler_ @ np.hstack([0, lower_limits]))[1:]
+
         P_full = D.quadratic_form(unreg_GLM._information,
                                   transformed=True)
         if not FL.fit_intercept:
@@ -315,7 +329,17 @@ def lasso_inference(glmnet_obj,
         # noisy_mle = stacked + delta # unitless scale
 
         penalized = penfac > 0
-        sel_P = -np.diag(signs[penalized]) @ np.eye(Q_full.shape[0])[penalized]
+        n_penalized = penalized.sum()
+        n_coef = penalized.shape[0]
+        row_idx = np.arange(n_penalized)
+        col_idx = np.nonzero(penalized)[0]
+        data = -signs[penalized]
+        sel_P = scipy.sparse.coo_matrix((data, (row_idx, col_idx)), shape=(n_penalized, n_coef))
+
+        if FL.fit_intercept:
+            sel_U = sel_L = scipy.sparse.dia_array((np.ones((1, n_coef)), [1]), shape=(n_coef-1, n_coef))
+        else:
+            sel_U = sel_L = scipy.sparse.eye(n_coef)
 
         ## the GLM's coef and intercept are on the original scale
         ## we transform them here to the (typically) unitless "standardized" scale
@@ -337,15 +361,14 @@ def lasso_inference(glmnet_obj,
         if not FL.fit_intercept:
             transform_to_raw = transform_to_raw[1:,1:]
 
-        linear = sel_P
-        offset = np.zeros(sel_P.shape[0]) # -signs[penalized] * delta[penalized]
+        linear = scipy.sparse.vstack([sel_P, sel_U, -sel_L])
+        offset = np.hstack([np.zeros(sel_P.shape[0]), upper_limits, -lower_limits]) 
         active_con = AffineConstraint(linear=linear,
                                       offset=offset,
                                       observed=stacked)
         inactive = True
         if inactive:
             pf = FL.regularizer_.penalty_factor
-            scale = 1 / (pf + (pf <= 0))
 
             logl_score = FL.state_.logl_score(FL._family,
                                               Y_sel,
@@ -357,16 +380,18 @@ def lasso_inference(glmnet_obj,
             # with \bar{\beta}_E the GLM soln
 
             score_ = (FL.design_.T @ logl_score)[1:]
-            score_ *= scale
             score_ = score_[inactive_set]
             # we now know that `score_` is bounded by \pm lambda_val
 
             I = scipy.sparse.eye(score_.shape[0])
             L = scipy.sparse.vstack([I, -I])
-            O = np.ones(L.shape[0]) * lambda_val
+            O = (np.ones(L.shape[0]) * lambda_val *
+                 np.hstack([pf[inactive_set],
+                            pf[inactive_set]]))
 
+            fudge_factor = 0.02 # allow 2% relative error on inactive gradient
             inactive_con = AffineConstraint(linear=L,
-                                            offset=O,
+                                            offset=O * (1 + fudge_factor),
                                             observed=score_)
 
         for i in range(transform_to_raw.shape[0]):
diff --git a/tests/test_inference.py b/tests/test_inference.py
index f38a841..4f9e89a 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -8,23 +8,6 @@
 from glmnet.inference import (lasso_inference,
                               TruncatedGaussian)
 
-@pytest.mark.parametrize('standardize', [True, False])
-@pytest.mark.parametrize('fit_intercept', [True, False])
-@pytest.mark.parametrize('p', [103])
-@pytest.mark.parametrize('n', [500])
-def test_inference(n,
-                   p,
-                   fit_intercept,
-                   standardize,
-                   ntrial=10):
-    # run a few times to be sure KKT conditions not violated
-
-    for _ in range(ntrial):
-        run_inference(n,
-                      p,
-                      fit_intercept,
-                      standardize)
-
 def test_Auto():
 
     df = sm.datasets.get_rdataset('Auto', package='ISLR').data
@@ -48,161 +31,195 @@ def test_Auto():
                          (X[:m], Df.iloc[:m], None),
                          (X, Df, None))
 
-def run_inference(n,
-                  p,
-                  fit_intercept,
-                  standardize,
-                  rng=None,
-                  alt=False,
-                  s=3,
-                  prop=0.75,
-                  penalty_facs=True,
-                  family='gaussian',
-                  orthogonal=False,
-                  cv=False):
-
+def sample_orthogonal(rng=None,
+                      p=50,
+                      s=5,
+                      penalty_facs=False,
+                      alt=True,
+                      prop=0.8):
     if rng is None:
         rng = np.random.default_rng(0)
     beta = np.zeros(p)
     subs = rng.choice(p, s, replace=False)
 
-    if not orthogonal:
-        X = rng.standard_normal((n, p))
-        D = np.linspace(1, p, p) / p + 0.2
-        X *= D[None,:]
-        Y = rng.standard_normal(n) * np.fabs(1+np.random.standard_normal())
-        if alt:
-            beta[subs] = rng.uniform(3, 5) * rng.choice([-1,1], size=s, replace=True) / np.sqrt(n)
-
-        mu = X @ beta
-        Y += mu
-
-    if family == 'gaussian':
-        fam = sm.families.Gaussian()
-    elif family == 'probit':
-        fam = sm.families.Binomial(link=sm.families.links.Probit())
-        Y = (mu + rng.standard_normal(n)) > 0
-    else:
-        raise ValueError('only testing "gaussian" and "probit"')
-    
-    if penalty_facs:
-        penalty_factor = np.ones(p)
-        penalty_factor[:10] = 0.5
-    else:
-        penalty_factor = None
+    X = np.identity(p)
+    Y = rng.standard_normal(p) 
+    if alt:
+        beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+    mu = beta
+    Y += mu
+    Df = pd.DataFrame({'response':Y})
 
-    if not orthogonal:
+    penalty_factor = np.ones(p)
+    if penalty_facs:
+        penalty_factor[:p//10] = 0.5
+        penalty_factor[p//10:2*p//10] = 0
         
-        GN = GLMNet(response_id='response',
-                    fit_intercept=fit_intercept,
-                    standardize=standardize,
-                    family=fam,
-                    penalty_factor=penalty_factor)
+    penalty_factor *= p / penalty_factor.sum()
 
-        m = int(prop*n)
-        
-        Df = pd.DataFrame({'response':Y})
-        GN.fit(X[:m], Df.iloc[:m]) 
-        if cv:
-            GN.cross_validation_path(X[:m], Df.iloc[:m])
-            lamval = GN.index_best_['Mean Squared Error']
-        else:
-            eps = rng.standard_normal((X.shape[0], 1000))
-            lamval = 1.2 * np.fabs(X.T @ eps).max() / X.shape[0]
+    GN = GLMNet(response_id='response',
+                fit_intercept=False,
+                standardize=False,
+                penalty_factor=penalty_factor)
+
+    lamval = 2
+    Y_sel = Y + np.sqrt((1 - prop) / prop) * rng.standard_normal(Y.shape)
+    active_naive = np.nonzero(np.fabs(Y_sel) > lamval)[0]
+
+    Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
+    X_sel = np.sqrt(prop) * X
+    GN.fit(X, Df)
+    active_naive = np.nonzero(np.fabs(Y_sel) > lamval)[0]
+
+    Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
+    X_sel = np.sqrt(prop) * X
+    pivots = []
+    test_pvals = []
+
+    if active_naive.shape[0] > 0:
         df = lasso_inference(GN, 
-                             lamval,
-                             (X[:m], Df.iloc[:m], None),
-                             (X, Df, None))
+                             prop * lamval / p,
+                             (X_sel, Df_sel, None),
+                             (X, Df, None),
+                             dispersion=1)
+        df['target'] = mu[df.index]
+
+        signs = np.sign(Y_sel[df.index])
+        assert np.all(Y_sel[df.index] * signs >= lamval * GN.penalty_factor[df.index])
+    else:
+        return None
 
-        if df is not None:
-            if fit_intercept:
-                active_set = np.array(df.index[1:]).astype(int)
+    if df is not None:
+        for j, s in zip(df.index, signs):
+            if s == 1:
+                upper_bound = np.inf
+                if penalty_factor is not None:
+                    lower_bound = lamval * penalty_factor[j]
+                else:
+                    lower_bound = lamval
             else:
-                active_set = np.array(df.index).astype(int)
-            X_sel = X[:,active_set]
+                if penalty_factor is not None:
+                    upper_bound = -lamval * penalty_factor[j]
+                else:
+                    upper_bound = -lamval
+                lower_bound = -np.inf
+
+            tg = df.loc[j,'TG']
+
+            TG = TruncatedGaussian(estimate=Y[j],
+                                   sigma=1,
+                                   smoothing_sigma=np.sqrt((1 - prop) / prop),
+                                   lower_bound=lower_bound,
+                                   upper_bound=upper_bound,
+                                   level=0.90)
+            pval = TG.pvalue()
+            pivots.append(TG.pvalue(null_value=df.loc[j,'target']))
+            test_pvals.append(TG.pvalue(null_value=0))
+            assert np.allclose(pval, df.loc[j]['pval'])
+        df['pivot'] = pivots
+        df['pval_byhand'] = test_pvals
+        assert np.allclose(df['pval'], df['pval_byhand'])
 
-            if fit_intercept:
-                X_sel = np.column_stack([np.ones(X_sel.shape[0]), X_sel])
-            targets = np.linalg.pinv(X_sel) @ mu
+    return df
 
-            df['target'] = targets
+def sample_gaussian(n,
+                    p,
+                    fit_intercept,
+                    standardize,
+                    rng=None,
+                    alt=False,
+                    s=10,
+                    prop=0.8,
+                    penalty_facs=False,
+                    cv=True):
 
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+
+    X = rng.standard_normal((n, p))
+    D = np.linspace(1, p, p) / p + 0.2
+    X *= D[None,:]
+    Y = rng.standard_normal(n) * np.fabs(1+np.random.standard_normal())
+    if alt:
+        beta[subs] = rng.uniform(3, 5) * rng.choice([-1,1], size=s, replace=True) / np.sqrt(n)
+
+    mu = X @ beta
+    Y += mu
+
+    if penalty_facs:
+        penalty_factor = np.ones(p)
+        penalty_factor[:10] = 0.5
     else:
+        penalty_factor = None
 
-        X = np.identity(p)
-        Y = rng.standard_normal(p) 
-        if alt:
-            beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
-        mu = beta
-        Y += mu
-        Df = pd.DataFrame({'response':Y})
-        
-        GN = GLMNet(response_id='response',
-                    family=fam,
-                    fit_intercept=False,
-                    standardize=False,
-                    penalty_factor=penalty_factor)
-
-        lamval = 2
-        Y_sel = Y + np.sqrt((1 - prop) / prop) * rng.standard_normal(Y.shape)
-        active_naive = np.nonzero(np.fabs(Y_sel) > lamval)[0]
-
-        Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
-        X_sel = np.sqrt(prop) * X
-        GN.fit(X, Df)
-        if active_naive.shape[0] > 0:
-            df = lasso_inference(GN, 
-                                 prop * lamval / p,
-                                 (X_sel, Df_sel, None),
-                                 (X, Df, None),
-                                 dispersion=1)
-            df['target'] = mu[df.index]
-
-            pivots = []
-            signs = np.sign(Y_sel[df.index])
-            assert np.all(Y_sel[df.index] * signs >= lamval * penalty_factor[df.index])
+    GN = GLMNet(response_id='response',
+                fit_intercept=fit_intercept,
+                standardize=standardize,
+                penalty_factor=penalty_factor)
+
+    m = int(prop*n)
+
+    Df = pd.DataFrame({'response':Y})
+    GN.fit(X[:m], Df.iloc[:m]) 
+    cv = True
+    if cv:
+        GN.cross_validation_path(X[:m], Df.iloc[:m])
+        lamval = GN.index_best_['Mean Squared Error']
+    else:
+        eps = rng.standard_normal((X.shape[0], 1000))
+        lamval = 1.2 * np.fabs(X.T @ eps).max() / X.shape[0]
+    df = lasso_inference(GN, 
+                         lamval,
+                         (X[:m], Df.iloc[:m], None),
+                         (X, Df, None))
+
+    if df is not None:
+        if fit_intercept:
+            active_set = np.array(df.index[1:]).astype(int)
         else:
-            return None
-    if alt and df is not None:
-        if family == 'probit' and not set(subs).issubset(active_set):
-            df['target'] *= np.nan
+            active_set = np.array(df.index).astype(int)
+        X_sel = X[:,active_set]
+
+        if fit_intercept:
+            X_sel = np.column_stack([np.ones(X_sel.shape[0]), X_sel])
+        targets = np.linalg.pinv(X_sel) @ mu
+
+        df['target'] = targets
             
     if df is not None:
         df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
-        # for j, s in zip(df.index, signs):
-        #     if s == 1:
-        #         upper_bound = np.inf
-        #         if penalty_factor is not None:
-        #             lower_bound = lamval * penalty_factor[j]
-        #         else:
-        #             lower_bound = lamval
-        #     else:
-        #         if penalty_factor is not None:
-        #             upper_bound = -lamval * penalty_factor[j]
-        #         else:
-        #             upper_bound = -lamval
-        #         lower_bound = -np.inf
-
-        #     tg = df.loc[j,'TG']
-
-        #     TG = TruncatedGaussian(estimate=Y[j],
-        #                            sigma=1,
-        #                            smoothing_sigma=np.sqrt((1 - prop) / prop),
-        #                            lower_bound=lower_bound,
-        #                            upper_bound=upper_bound,
-        #                            level=0.90)
-        #     pval = TG.pvalue()
-        #     pivots.append(TG.pvalue(null_value=df.loc[j,'target']))
-        #     assert np.allclose(pval, df.loc[j]['pval'])
-        # df['pivot'] = pivots
         
     return df
 
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('penalty_facs', [True, False])
+@pytest.mark.parametrize('fit_intercept', [True, False])
+@pytest.mark.parametrize('p', [103])
+@pytest.mark.parametrize('n', [500])
+def test_gaussian(n,
+                  p,
+                  standardize,
+                  fit_intercept,
+                  penalty_facs):
+
+    for _ in range(5):
+        df = None
+        while df is None: # make sure it is run
+            df = sample_gaussian(n=n,
+                                 p=p,
+                                 standardize=standardize,
+                                 fit_intercept=fit_intercept,
+                                 penalty_facs=penalty_facs,
+                                 cv=False)
+
 
 def main(fit_intercept=True,
          standardize=True,
          n=500,
          p=75,
+         s=5,
          ntrial=500,
          rng=None,
          family='gaussian',
@@ -222,6 +239,7 @@ def main(fit_intercept=True,
                            p,
                            fit_intercept,
                            standardize,
+                           s=s,
                            rng=rng,
                            family=family,
                            alt=alt,

From ba066ed5f125056e0876bec3ff202325924846bf Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 8 Jul 2024 14:49:05 -0700
Subject: [PATCH 40/44] running from a covariance matrix form in tests, need a
 function for IRL

---
 tests/test_inference.py | 193 +++++++++++++++++++++++++++++-----------
 1 file changed, 140 insertions(+), 53 deletions(-)

diff --git a/tests/test_inference.py b/tests/test_inference.py
index 4f9e89a..7b4902e 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -4,6 +4,8 @@
 import pandas as pd
 import statsmodels.api as sm
 
+import scipy.sparse
+
 from glmnet import GLMNet
 from glmnet.inference import (lasso_inference,
                               TruncatedGaussian)
@@ -71,8 +73,6 @@ def sample_orthogonal(rng=None,
     GN.fit(X, Df)
     active_naive = np.nonzero(np.fabs(Y_sel) > lamval)[0]
 
-    Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
-    X_sel = np.sqrt(prop) * X
     pivots = []
     test_pvals = []
 
@@ -86,8 +86,6 @@ def sample_orthogonal(rng=None,
 
         signs = np.sign(Y_sel[df.index])
         assert np.all(Y_sel[df.index] * signs >= lamval * GN.penalty_factor[df.index])
-    else:
-        return None
 
     if df is not None:
         for j, s in zip(df.index, signs):
@@ -116,22 +114,99 @@ def sample_orthogonal(rng=None,
             pivots.append(TG.pvalue(null_value=df.loc[j,'target']))
             test_pvals.append(TG.pvalue(null_value=0))
             assert np.allclose(pval, df.loc[j]['pval'])
-        df['pivot'] = pivots
+        df['pivot_byhand'] = pivots
         df['pval_byhand'] = test_pvals
         assert np.allclose(df['pval'], df['pval_byhand'])
 
+    if df is not None:
+        df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
+        assert np.allclose(df['pivot'], df['pivot_byhand'])        
+
+    return df
+
+def sample_AR1(rho=0.6,               
+               rng=None,
+               p=100,
+               s=5,
+               alt=True,
+               prop=0.8,
+               dispersion=2):
+
+    D = np.fabs(np.subtract.outer(np.arange(p), np.arange(p)))
+    dispersion = 2
+    S = (rho**D) * dispersion
+
+    return sample_cov(S,
+                      rng=rng,
+                      p=p,
+                      s=s,
+                      alt=alt,
+                      prop=prop)
+
+def sample_cov(S,
+               rng=None,
+               p=100,
+               s=5,
+               alt=True,
+               prop=0.8):
+
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+
+    S_sqrt = X = np.linalg.cholesky(S).T  # X.T @ X = S
+
+    noise = X.T @ rng.standard_normal(p) 
+    if alt:
+        beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+    mu = S @ beta
+
+    Z = mu + noise
+    Y = scipy.linalg.solve_triangular(S_sqrt.T, Z, lower=True)
+    Df = pd.DataFrame({'response':Y})
+    GN = GLMNet(response_id='response',
+                fit_intercept=False,
+                standardize=False)
+
+    lamval = 3
+    Z_sel = Z + np.sqrt((1 - prop) / prop) * X.T @ rng.standard_normal(p)
+    Y_sel = scipy.linalg.solve_triangular(S_sqrt.T, Z_sel, lower=True)
+    test_sel = np.linalg.inv(X.T) @ Z_sel
+    assert np.allclose(Y_sel, test_sel)
+    
+    Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
+    X_sel = np.sqrt(prop) * X
+    GN.fit(X, Df)
+    pivots = []
+
+    assert np.allclose(X.T @ Y_sel, Z_sel)
+
+    df = lasso_inference(GN, 
+                         prop * lamval / p,
+                         (X_sel, Df_sel, None),
+                         (X, Df, None),
+                         dispersion=1)
+    if df is not None:
+        active = list(df.index)
+        df['target'] = np.linalg.inv(S[active][:,active]) @ mu[active]
+
+    if df is not None:
+        df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
+        
     return df
 
-def sample_gaussian(n,
-                    p,
-                    fit_intercept,
-                    standardize,
-                    rng=None,
-                    alt=False,
-                    s=10,
-                    prop=0.8,
-                    penalty_facs=False,
-                    cv=True):
+def sample_randomX(n,
+                   p,
+                   fit_intercept,
+                   standardize,
+                   rng=None,
+                   alt=False,
+                   s=10,
+                   prop=0.8,
+                   penalty_facs=False,
+                   cv=True,
+                   upper_limits=np.inf):
 
     if rng is None:
         rng = np.random.default_rng(0)
@@ -157,7 +232,8 @@ def sample_gaussian(n,
     GN = GLMNet(response_id='response',
                 fit_intercept=fit_intercept,
                 standardize=standardize,
-                penalty_factor=penalty_factor)
+                penalty_factor=penalty_factor,
+                upper_limits=upper_limits)
 
     m = int(prop*n)
 
@@ -196,37 +272,55 @@ def sample_gaussian(n,
 @pytest.mark.parametrize('standardize', [True, False])
 @pytest.mark.parametrize('penalty_facs', [True, False])
 @pytest.mark.parametrize('fit_intercept', [True, False])
+#@pytest.mark.parametrize('upper_limits', [np.inf, 0.1])
 @pytest.mark.parametrize('p', [103])
 @pytest.mark.parametrize('n', [500])
-def test_gaussian(n,
-                  p,
-                  standardize,
-                  fit_intercept,
-                  penalty_facs):
+def test_randomX(n,
+                 p,
+                 standardize,
+                 fit_intercept,
+                 penalty_facs,
+                 upper_limits=np.inf):
 
     for _ in range(5):
         df = None
         while df is None: # make sure it is run
-            df = sample_gaussian(n=n,
-                                 p=p,
-                                 standardize=standardize,
-                                 fit_intercept=fit_intercept,
-                                 penalty_facs=penalty_facs,
-                                 cv=False)
-
-
-def main(fit_intercept=True,
-         standardize=True,
-         n=500,
-         p=75,
-         s=5,
-         ntrial=500,
-         rng=None,
-         family='gaussian',
-         alt=False,
-         cv=False,
-         orthogonal=False,
-         penalty_facs=False):
+            kwargs = dict(n=n,
+                          p=p,
+                          standardize=standardize,
+                          fit_intercept=fit_intercept,
+                          penalty_facs=penalty_facs,
+                          cv=False,
+                          upper_limits=upper_limits)
+
+            df = main(sample_randomX,
+                      kwargs,
+                      ntrial=1)
+
+def test_orthogonal(p=100):
+
+    for _ in range(5):
+        df = None
+        while df is None: # make sure it is run
+            df = main(sample_orthogonal,
+                      {'p':p},
+                      ntrial=1)
+
+
+def test_AR1(p=100,
+             rho=0.6):
+
+    for _ in range(5):
+        df = None
+        while df is None: # make sure it is run
+            df = sample_AR1(p=p,
+                            rho=rho)
+
+            
+
+def main(sampler,
+         kwargs,
+         ntrial=500):
 
     ncover = 0
     nsel = 0
@@ -234,18 +328,10 @@ def main(fit_intercept=True,
     dfs = []
 
     rng = np.random.default_rng(0)
+    kwargs.update(rng=rng)
+    
     for i in range(ntrial):
-        df = run_inference(n,
-                           p,
-                           fit_intercept,
-                           standardize,
-                           s=s,
-                           rng=rng,
-                           family=family,
-                           alt=alt,
-                           cv=cv,
-                           orthogonal=orthogonal,
-                           penalty_facs=penalty_facs)
+        df = sampler(**kwargs)
         if df is not None:
             dfs.append(df)
         if len(dfs) > 0:
@@ -258,7 +344,8 @@ def main(fit_intercept=True,
                   (all_df['pval'] < 0.05).mean(),
                   (all_df['pivot'] < 0.05).mean(), all_df['pivot'].std())
 
-
+    if len(dfs) > 0:
+        return all_df
         
 def test_truncated_inference(B=1000,
                              smoothing_sigma=np.sqrt(1/3),

From 26526fc244fd654f443075f7a56acd6bde306ed5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 8 Jul 2024 22:03:57 -0700
Subject: [PATCH 41/44] added functions for lasso from score form and resampler
 form

---
 glmnet/inference.py     | 126 ++++++++++++++++++++++++++++++++++++++--
 tests/test_inference.py |  95 ++++++++++++++++++++----------
 2 files changed, 187 insertions(+), 34 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index ed6cd91..b8175c8 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -18,8 +18,9 @@
 mp.dps = 80
 
 from .base import _get_design
+from .glmnet import GLMNet
 
-from dataclasses import dataclass
+from dataclasses import dataclass, asdict
 from typing import Optional
 
 @dataclass
@@ -236,6 +237,7 @@ def lasso_inference(glmnet_obj,
         raise NotImplementedError('upper/lower limits coming soon')
 
     active_set = np.nonzero(FL.coef_ != 0)[0]
+
     if active_set.shape[0] != 0:
         inactive_set = np.nonzero(FL.coef_ == 0)[0]
 
@@ -288,8 +290,8 @@ def lasso_inference(glmnet_obj,
         else:
             lower_limits = FL.lower_limits[active_set]
             
-        upper_limits = (D_sel.scaler_ @ np.hstack([0, upper_limits]))[1:]
-        lower_limits = (D_sel.scaler_ @ np.hstack([0, lower_limits]))[1:]
+        upper_limits = D_sel.scaling_ * upper_limits
+        lower_limits = D_sel.scaling_ * lower_limits
 
         P_full = D.quadratic_form(unreg_GLM._information,
                                   transformed=True)
@@ -309,7 +311,7 @@ def lasso_inference(glmnet_obj,
         signs = np.sign(FL.coef_[active_set])
 
         noisy_mle = D.raw_to_scaled(unreg_sel_GLM.state_)
-
+        
         if FL.fit_intercept:
             penfac = np.hstack([0, penfac])
             signs = np.hstack([0, signs])
@@ -343,7 +345,9 @@ def lasso_inference(glmnet_obj,
 
         ## the GLM's coef and intercept are on the original scale
         ## we transform them here to the (typically) unitless "standardized" scale
+
         full_mle = D.raw_to_scaled(unreg_GLM.state_)
+
         if FL.fit_intercept:
             full_mle = full_mle._stack
         else:
@@ -1063,3 +1067,117 @@ def F(theta):
         mle = np.nan
         
     return L, U, mle, pval, sel_distr
+
+def score_inference(score,
+                    cov_score,
+                    lamval,
+                    prop=0.8,
+                    chol_cov=None,
+                    perturbation=None,
+                    rng=None):
+
+    # perturbation should be N(0, cov_score) roughly
+
+    # this is the X for the LASSO problem
+    # X.T @ X = S = cov_score
+
+    if chol_cov is None:
+        chol_cov = X = np.linalg.cholesky(cov_score).T 
+
+    if rng is None:
+        rng = np.random.default_rng()
+
+    # shorthand
+    Z = score # X.T @ Y in OLS case
+    X = chol_cov 
+    p = X.shape[1]
+    
+    if perturbation is None:
+        perturbation = X.T @ rng.standard_normal(p) # X is square here...
+
+    # this is the Y of the LASSO problem
+
+    Y = scipy.linalg.solve_triangular(chol_cov.T, Z, lower=True)
+    Df = pd.DataFrame({'response':Y})
+
+    GN = GLMNet(response_id='response',
+                fit_intercept=False,
+                standardize=False)
+    GN.fit(X, Df)
+    
+    Z_sel = Z + np.sqrt((1 - prop) / prop) * perturbation
+    Y_sel = scipy.linalg.solve_triangular(chol_cov.T, Z_sel, lower=True)
+    Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
+    X_sel = np.sqrt(prop) * X
+
+    return lasso_inference(GN, 
+                           prop * lamval / p,
+                           (X_sel, Df_sel, None),
+                           (X, Df, None),
+                           dispersion=1)
+
+def resampler_inference(sample,
+                        lamval=None,
+                        lam_frac=1,
+                        prop=0.8,
+                        random_idx=None,
+                        rng=None,
+                        estimate=None,
+                        standardize=True):
+
+    if estimate is None:
+        estimate = sample.mean(0)
+
+    centered = sample - estimate[None,:]
+    B, p = centered.shape
+
+    if random_idx is None:
+        if rng is None:
+            rng = np.random.default_rng()
+        random_idx = rng.choice(B, 1)
+        
+    prec_score = centered.T @ centered / B
+    cov_score = np.linalg.inv(prec_score)
+
+    centered_scores = centered @ cov_score
+    scaling = centered_scores.std(0)
+    score = cov_score @ estimate
+
+    if standardize:
+        centered_scores /= scaling[None,:]
+        cov_score /= np.multiply.outer(scaling, scaling)
+        score /= scaling
+        
+    # pick a lam
+    if lamval is None:
+        max_scores = np.fabs(centered_scores).max(1)
+        lamval = lam_frac * np.median(max_scores)
+
+    # pick a pseudo-Gaussian perturbation
+    perturbation = centered_scores[random_idx].reshape((p,))
+    
+    df = score_inference(score=score,
+                         cov_score=cov_score,
+                         lamval=lamval,
+                         prop=prop,
+                         perturbation=perturbation)
+
+    if df is not None:
+        if standardize:
+            for col in ['mle', 'upper', 'lower']:
+                df[col].values[:] = df[col] / scaling[df.index]
+
+            for j in df.index:
+                df.loc[j,'TG'] = None
+                # XX try to correct this!!
+                # tg = df.loc[j, 'TG']
+                # tg_params = asdict(tg)
+                # tg_params.update(estimate=tg.estimate * scaling[j],
+                #                  sigma=tg.sigma * scaling[j],
+                #                  smoothing_sigma=tg.smoothing_sigma * scaling[j],
+                #                  lower_bound=tg.lower_bound * scaling[j],
+                #                  upper_bound=tg.upper_bound * scaling[j])
+                # df.loc[j,'TG'] = TruncatedGaussian(**tg_params)
+
+    return df
+
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 7b4902e..4b845bc 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -8,6 +8,8 @@
 
 from glmnet import GLMNet
 from glmnet.inference import (lasso_inference,
+                              score_inference,
+                              resampler_inference,
                               TruncatedGaussian)
 
 def test_Auto():
@@ -124,6 +126,41 @@ def sample_orthogonal(rng=None,
 
     return df
 
+def resample_orthogonal(rng=None,
+                        p=50,
+                        s=5,
+                        alt=True,
+                        prop=0.8,
+                        standardize=True,
+                        B=2000):
+
+    # when standardize is True, coverage will be fine but pivot won't be
+    # this is because the TruncatedGaussian instance is not corrected for scale
+
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+    if alt:
+        beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+
+    scale = np.linspace(1, 3, p)
+    beta_hat = beta + rng.standard_normal(p) * scale
+    bootstrap_noise = rng.standard_normal((B, p)) * scale[None,:]
+    sample = bootstrap_noise + beta_hat[None,:]
+
+    df = resampler_inference(sample,
+                             prop=prop,
+                             standardize=standardize)
+    if df is not None:
+        df['target'] = beta[df.index]
+        if not standardize:
+            df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j,'target']) for j in df.index]
+        else:
+            # scaling transformation not taken into account so cannot reuse TruncatedGaussian object
+            df['pivot'] = np.nan * df['pval']
+        return df
+
 def sample_AR1(rho=0.6,               
                rng=None,
                p=100,
@@ -148,7 +185,8 @@ def sample_cov(S,
                p=100,
                s=5,
                alt=True,
-               prop=0.8):
+               prop=0.8,
+               lamval=3):
 
     if rng is None:
         rng = np.random.default_rng(0)
@@ -163,30 +201,12 @@ def sample_cov(S,
     mu = S @ beta
 
     Z = mu + noise
-    Y = scipy.linalg.solve_triangular(S_sqrt.T, Z, lower=True)
-    Df = pd.DataFrame({'response':Y})
-    GN = GLMNet(response_id='response',
-                fit_intercept=False,
-                standardize=False)
 
-    lamval = 3
-    Z_sel = Z + np.sqrt((1 - prop) / prop) * X.T @ rng.standard_normal(p)
-    Y_sel = scipy.linalg.solve_triangular(S_sqrt.T, Z_sel, lower=True)
-    test_sel = np.linalg.inv(X.T) @ Z_sel
-    assert np.allclose(Y_sel, test_sel)
-    
-    Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
-    X_sel = np.sqrt(prop) * X
-    GN.fit(X, Df)
-    pivots = []
-
-    assert np.allclose(X.T @ Y_sel, Z_sel)
-
-    df = lasso_inference(GN, 
-                         prop * lamval / p,
-                         (X_sel, Df_sel, None),
-                         (X, Df, None),
-                         dispersion=1)
+    df = score_inference(score=Z,
+                         cov_score=S,
+                         lamval=lamval,
+                         prop=prop,
+                         chol_cov=S_sqrt)
     if df is not None:
         active = list(df.index)
         df['target'] = np.linalg.inv(S[active][:,active]) @ mu[active]
@@ -203,6 +223,7 @@ def sample_randomX(n,
                    rng=None,
                    alt=False,
                    s=10,
+                   snr=1,
                    prop=0.8,
                    penalty_facs=False,
                    cv=True,
@@ -214,11 +235,12 @@ def sample_randomX(n,
     subs = rng.choice(p, s, replace=False)
 
     X = rng.standard_normal((n, p))
-    D = np.linspace(1, p, p) / p + 0.2
+    D = np.ones(p) # np.linspace(1, p, p) / p + 1
     X *= D[None,:]
-    Y = rng.standard_normal(n) * np.fabs(1+np.random.standard_normal())
+    sd = np.fabs(1+np.random.standard_normal())
+    Y = rng.standard_normal(n) * sd
     if alt:
-        beta[subs] = rng.uniform(3, 5) * rng.choice([-1,1], size=s, replace=True) / np.sqrt(n)
+        beta[subs] = snr * (rng.uniform(3, 5) * rng.choice([-1,1], size=s, replace=True) / np.sqrt(n))
 
     mu = X @ beta
     Y += mu
@@ -239,13 +261,15 @@ def sample_randomX(n,
 
     Df = pd.DataFrame({'response':Y})
     GN.fit(X[:m], Df.iloc[:m]) 
-    cv = True
+
     if cv:
         GN.cross_validation_path(X[:m], Df.iloc[:m])
         lamval = GN.index_best_['Mean Squared Error']
     else:
-        eps = rng.standard_normal((X.shape[0], 1000))
-        lamval = 1.2 * np.fabs(X.T @ eps).max() / X.shape[0]
+        eps = rng.standard_normal((X.shape[0], 1000)) * sd
+        noise_score = X.T @ eps
+        lamval = 1.2 * np.median(np.fabs(noise_score).max(0)) / X.shape[0]
+        
     df = lasso_inference(GN, 
                          lamval,
                          (X[:m], Df.iloc[:m], None),
@@ -297,6 +321,17 @@ def test_randomX(n,
                       kwargs,
                       ntrial=1)
 
+def test_resampler(p=50,
+                   standardize=True):
+
+    for _ in range(5):
+        df = None
+        while df is None: # make sure it is run
+            df = main(resample_orthogonal,
+                      {'p':p},
+                      ntrial=1)
+
+
 def test_orthogonal(p=100):
 
     for _ in range(5):

From 5e2de209ea4fa42801ee1c812cecc5ddf68a6228 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 8 Jul 2024 23:12:13 -0700
Subject: [PATCH 42/44] test for non-diagonal covariance with resample

---
 tests/test_inference.py | 53 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/tests/test_inference.py b/tests/test_inference.py
index 4b845bc..39bf74d 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -180,6 +180,51 @@ def sample_AR1(rho=0.6,
                       alt=alt,
                       prop=prop)
 
+def resample_AR1(rng=None,
+                 p=50,
+                 rho=0.6,
+                 s=5,
+                 alt=True,
+                 prop=0.8,
+                 standardize=True,
+                 B=2000):
+
+    # when standardize is True, coverage will be fine but pivot won't be
+    # this is because the TruncatedGaussian instance is not corrected for scale
+
+    D = np.fabs(np.subtract.outer(np.arange(p), np.arange(p)))
+    dispersion = 2
+    S_init = (rho**D) * dispersion
+    S = np.linalg.inv(S_init)
+    
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+    if alt:
+        beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+
+    S_sqrt = np.linalg.cholesky(S)
+    beta_hat = beta + S_sqrt @ rng.standard_normal(p) 
+    bootstrap_noise = rng.standard_normal((B, p)) @ S_sqrt.T
+    sample = bootstrap_noise + beta_hat[None,:]
+
+    S_i = np.linalg.inv(S)
+    mu = S_i @ beta
+
+    df = resampler_inference(sample,
+                             prop=prop,
+                             standardize=standardize)
+    if df is not None:
+        prec_E = S_i[df.index][:,df.index]
+        df['target'] = np.linalg.inv(prec_E) @ mu[df.index]
+        if not standardize:
+            df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j,'target']) for j in df.index]
+        else:
+            # scaling transformation not taken into account so cannot reuse TruncatedGaussian object
+            df['pivot'] = np.nan * df['pval']
+        return df
+
 def sample_cov(S,
                rng=None,
                p=100,
@@ -374,10 +419,10 @@ def main(sampler,
             ncover += ((all_df['lower'] < all_df['target']) & (all_df['upper'] > all_df['target'])).sum()
             nsel += all_df.shape[0]
 
-            print('cover:',
-                  ncover / nsel, 'power:',
-                  (all_df['pval'] < 0.05).mean(),
-                  (all_df['pivot'] < 0.05).mean(), all_df['pivot'].std())
+            print('cover:', ncover / nsel,
+                  'power:', (all_df['pval'] < 0.05).mean(),
+                  'pivot<0.05:', (all_df['pivot'] < 0.05).mean(),
+                  'std(pivot):', all_df['pivot'].std())
 
     if len(dfs) > 0:
         return all_df

From f1493e333d12c261ab4ac65a966766e582092e6f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 26 Jul 2024 13:09:42 -0700
Subject: [PATCH 43/44] making it possible to store results of inference a bit
 easier

---
 tests/test_inference.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/test_inference.py b/tests/test_inference.py
index 39bf74d..d9430e9 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -124,7 +124,7 @@ def sample_orthogonal(rng=None,
         df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
         assert np.allclose(df['pivot'], df['pivot_byhand'])        
 
-    return df
+    return df, None
 
 def resample_orthogonal(rng=None,
                         p=50,
@@ -223,7 +223,7 @@ def resample_AR1(rng=None,
         else:
             # scaling transformation not taken into account so cannot reuse TruncatedGaussian object
             df['pivot'] = np.nan * df['pval']
-        return df
+        return df, None
 
 def sample_cov(S,
                rng=None,
@@ -246,12 +246,14 @@ def sample_cov(S,
     mu = S @ beta
 
     Z = mu + noise
-
+    perturbation = X.T @ rng.standard_normal(p)
+    
     df = score_inference(score=Z,
                          cov_score=S,
                          lamval=lamval,
                          prop=prop,
-                         chol_cov=S_sqrt)
+                         chol_cov=S_sqrt,
+                         perturbation=perturbation)
     if df is not None:
         active = list(df.index)
         df['target'] = np.linalg.inv(S[active][:,active]) @ mu[active]
@@ -259,7 +261,7 @@ def sample_cov(S,
     if df is not None:
         df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
         
-    return df
+    return df, {'score':Z,'cov_score':S, 'lamval':lamval, 'prop':prop, 'chol_cov':S_sqrt, 'perturbation':perturbation}
 
 def sample_randomX(n,
                    p,
@@ -336,7 +338,7 @@ def sample_randomX(n,
     if df is not None:
         df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
         
-    return df
+    return df, None
 
 @pytest.mark.parametrize('standardize', [True, False])
 @pytest.mark.parametrize('penalty_facs', [True, False])
@@ -411,7 +413,9 @@ def main(sampler,
     kwargs.update(rng=rng)
     
     for i in range(ntrial):
-        df = sampler(**kwargs)
+        df, _ = sampler(**kwargs)
+        print(kwargs)
+        print(rng.standard_normal())
         if df is not None:
             dfs.append(df)
         if len(dfs) > 0:

From 21311a6c2c1882ea0bc8257a004db62feaebbd03 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 26 Jul 2024 13:10:00 -0700
Subject: [PATCH 44/44] making it possible to store results of inference a bit
 easier

---
 glmnet/inference.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/glmnet/inference.py b/glmnet/inference.py
index b8175c8..8388bb6 100644
--- a/glmnet/inference.py
+++ b/glmnet/inference.py
@@ -420,7 +420,11 @@ def lasso_inference(glmnet_obj,
         if FL.fit_intercept:
             idx = ['intercept'] + idx
 
-        return pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us, 'TG':TGs}, index=idx)
+        TG_df = pd.concat([pd.Series(asdict(tg)) for tg in TGs], axis=1).T
+        TG_df.index = idx
+        df = pd.concat([pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx), TG_df], axis=1)
+        df['TG'] = TGs
+        return df
     else:
         return None
 
@@ -1076,7 +1080,7 @@ def score_inference(score,
                     perturbation=None,
                     rng=None):
 
-    # perturbation should be N(0, cov_score) roughly
+    # perturbation should be N(0, cov_score) roughly -- shouldn't have a multiplier for the proportion!
 
     # this is the X for the LASSO problem
     # X.T @ X = S = cov_score