diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..8edfacf
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+glmnet/_version.py export-subst
diff --git a/README.md b/README.md
index 09bc0e7..26544cc 100644
--- a/README.md
+++ b/README.md
@@ -1,36 +1,7 @@
-# pyglmnet
-Python bindings to glmnet base source
+# glmnet
 
 
-## Organization
-
-- `src` contains `glmnet` C++ code for the two main routines
-  `spwls_exp` and `wls_exp`.
-- `tests` contains test data from R so that we can verify we get the
-  same results in python. Also contains some code for regenerating
-  data using an instrumented version of glmnet.
-  
-
-## Steps to build
-
-- Assuming Xcode with command line tools is installed already.
-
-- Also probably a good idee to test in a new virtual env
-
-### Create a new virtual env, say `pyg`.
-
-```
-conda create -n pyg python=3.10 -y
-conda activate pyg
-```
-
-### Install requirements
-
-```
-pip install -r requirements.txt
-```
-
-#### Specifying `Eigen`
+## Specifying `Eigen`
 
 By default, build assumes the top level directory contains a directory `eigen` with the `Eigen` headers. This
 can be achieved with
@@ -48,26 +19,6 @@ the appropriate path.
 pip install .
 ```
 
-Load the module:
-
-```
-import glmnet.glmnetpp as gpp
-```
-
-This provides two functions:
-
-- `gpp.wls_exp` for dense $x$
-- `gpp.spwls_exp` for sparse $x$
-
-### Testing
-
-Example invocations can be seen in `tests/test_pyglmnet.py`. So one can run all the tests via:
-   
-```
-pytest tests
-```
-
-No error messages mean all is well.
 
 
 
diff --git a/glmnet/_version.py b/glmnet/_version.py
index 9b01ea2..3f737f4 100644
--- a/glmnet/_version.py
+++ b/glmnet/_version.py
@@ -5,8 +5,9 @@
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.
 
-# This file is released into the public domain. Generated by
-# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer
 
 """Git implementation of _version.py."""
 
@@ -15,10 +16,11 @@
 import re
 import subprocess
 import sys
-from typing import Callable, Dict
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools
 
 
-def get_keywords():
+def get_keywords() -> Dict[str, str]:
     """Get the keywords needed to look up the version information."""
     # these strings will be replaced by git during git-archive.
     # setup.py/versioneer.py will grep for the variable names, so they must
@@ -34,17 +36,24 @@ def get_keywords():
 class VersioneerConfig:
     """Container for Versioneer configuration parameters."""
 
+    VCS: str
+    style: str
+    tag_prefix: str
+    parentdir_prefix: str
+    versionfile_source: str
+    verbose: bool
 
-def get_config():
+
+def get_config() -> VersioneerConfig:
     """Create, populate and return the VersioneerConfig() object."""
     # these strings are filled in when 'setup.py versioneer' creates
     # _version.py
     cfg = VersioneerConfig()
     cfg.VCS = "git"
     cfg.style = "pep440"
-    cfg.tag_prefix = ""
-    cfg.parentdir_prefix = "ISLP-"
-    cfg.versionfile_source = "ISLP/_version.py"
+    cfg.tag_prefix = "v"
+    cfg.parentdir_prefix = "glmnet-"
+    cfg.versionfile_source = "glmnet/_version.py"
     cfg.verbose = False
     return cfg
 
@@ -57,9 +66,9 @@ class NotThisMethod(Exception):
 HANDLERS: Dict[str, Dict[str, Callable]] = {}
 
 
-def register_vcs_handler(vcs, method):  # decorator
+def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
     """Create decorator to mark a method as the handler of a VCS."""
-    def decorate(f):
+    def decorate(f: Callable) -> Callable:
         """Store f in HANDLERS[vcs][method]."""
         if vcs not in HANDLERS:
             HANDLERS[vcs] = {}
@@ -68,11 +77,25 @@ def decorate(f):
     return decorate
 
 
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
+def run_command(
+    commands: List[str],
+    args: List[str],
+    cwd: Optional[str] = None,
+    verbose: bool = False,
+    hide_stderr: bool = False,
+    env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
     """Call the given command(s)."""
     assert isinstance(commands, list)
     process = None
+
+    popen_kwargs: Dict[str, Any] = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
     for command in commands:
         try:
             dispcmd = str([command] + args)
@@ -80,10 +103,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
             process = subprocess.Popen([command] + args, cwd=cwd, env=env,
                                        stdout=subprocess.PIPE,
                                        stderr=(subprocess.PIPE if hide_stderr
-                                               else None))
+                                               else None), **popen_kwargs)
             break
-        except OSError:
-            e = sys.exc_info()[1]
+        except OSError as e:
             if e.errno == errno.ENOENT:
                 continue
             if verbose:
@@ -103,7 +125,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
     return stdout, process.returncode
 
 
-def versions_from_parentdir(parentdir_prefix, root, verbose):
+def versions_from_parentdir(
+    parentdir_prefix: str,
+    root: str,
+    verbose: bool,
+) -> Dict[str, Any]:
     """Try to determine the version from the parent directory name.
 
     Source tarballs conventionally unpack into a directory that includes both
@@ -128,13 +154,13 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
 
 
 @register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
     """Extract version information from the given file."""
     # the code embedded in _version.py can just fetch the value of these
     # keywords. When used from setup.py, we don't want to import _version.py,
     # so we do it with a regexp instead. This function is not used from
     # _version.py.
-    keywords = {}
+    keywords: Dict[str, str] = {}
     try:
         with open(versionfile_abs, "r") as fobj:
             for line in fobj:
@@ -156,7 +182,11 @@ def git_get_keywords(versionfile_abs):
 
 
 @register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
+def git_versions_from_keywords(
+    keywords: Dict[str, str],
+    tag_prefix: str,
+    verbose: bool,
+) -> Dict[str, Any]:
     """Get version information from git keywords."""
     if "refnames" not in keywords:
         raise NotThisMethod("Short version file found")
@@ -220,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
 
 
 @register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+def git_pieces_from_vcs(
+    tag_prefix: str,
+    root: str,
+    verbose: bool,
+    runner: Callable = run_command
+) -> Dict[str, Any]:
     """Get version from 'git describe' in the root of the source tree.
 
     This only gets called if the git-archive 'subst' keywords were *not*
@@ -228,13 +263,18 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     version string, meaning we're inside a checked out source tree.
     """
     GITS = ["git"]
-    TAG_PREFIX_REGEX = "*"
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-        TAG_PREFIX_REGEX = r"\*"
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
 
     _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                   hide_stderr=True)
+                   hide_stderr=not verbose)
     if rc != 0:
         if verbose:
             print("Directory %s not under git control" % root)
@@ -242,11 +282,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
 
     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
     # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
-                                     "--always", "--long",
-                                     "--match",
-                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
-                              cwd=root)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
     # --long was added in git-1.5.5
     if describe_out is None:
         raise NotThisMethod("'git describe' failed")
@@ -256,7 +295,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
         raise NotThisMethod("'git rev-parse' failed")
     full_out = full_out.strip()
 
-    pieces = {}
+    pieces: Dict[str, Any] = {}
     pieces["long"] = full_out
     pieces["short"] = full_out[:7]  # maybe improved later
     pieces["error"] = None
@@ -335,8 +374,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     else:
         # HEX: no tags
         pieces["closest-tag"] = None
-        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
 
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
     date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
@@ -348,14 +387,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     return pieces
 
 
-def plus_or_dot(pieces):
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
     """Return a + if we don't already have one, else return a ."""
     if "+" in pieces.get("closest-tag", ""):
         return "."
     return "+"
 
 
-def render_pep440(pieces):
+def render_pep440(pieces: Dict[str, Any]) -> str:
     """Build up version string, with post-release "local version identifier".
 
     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
@@ -380,7 +419,7 @@ def render_pep440(pieces):
     return rendered
 
 
-def render_pep440_branch(pieces):
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
     """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
 
     The ".dev0" means not master branch. Note that .dev0 sorts backwards
@@ -410,7 +449,7 @@ def render_pep440_branch(pieces):
     return rendered
 
 
-def pep440_split_post(ver):
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
     """Split pep440 version string at the post-release segment.
 
     Returns the release segments before the post-release and the
@@ -420,7 +459,7 @@ def pep440_split_post(ver):
     return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
 
 
-def render_pep440_pre(pieces):
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
     """TAG[.postN.devDISTANCE] -- No -dirty.
 
     Exceptions:
@@ -432,7 +471,7 @@ def render_pep440_pre(pieces):
             tag_version, post_version = pep440_split_post(pieces["closest-tag"])
             rendered = tag_version
             if post_version is not None:
-                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
             else:
                 rendered += ".post0.dev%d" % (pieces["distance"])
         else:
@@ -444,7 +483,7 @@ def render_pep440_pre(pieces):
     return rendered
 
 
-def render_pep440_post(pieces):
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]+gHEX] .
 
     The ".dev0" means dirty. Note that .dev0 sorts backwards
@@ -471,7 +510,7 @@ def render_pep440_post(pieces):
     return rendered
 
 
-def render_pep440_post_branch(pieces):
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
 
     The ".dev0" means not master branch.
@@ -500,7 +539,7 @@ def render_pep440_post_branch(pieces):
     return rendered
 
 
-def render_pep440_old(pieces):
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]] .
 
     The ".dev0" means dirty.
@@ -522,7 +561,7 @@ def render_pep440_old(pieces):
     return rendered
 
 
-def render_git_describe(pieces):
+def render_git_describe(pieces: Dict[str, Any]) -> str:
     """TAG[-DISTANCE-gHEX][-dirty].
 
     Like 'git describe --tags --dirty --always'.
@@ -542,7 +581,7 @@ def render_git_describe(pieces):
     return rendered
 
 
-def render_git_describe_long(pieces):
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
     """TAG-DISTANCE-gHEX[-dirty].
 
     Like 'git describe --tags --dirty --always -long'.
@@ -562,7 +601,7 @@ def render_git_describe_long(pieces):
     return rendered
 
 
-def render(pieces, style):
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
     """Render the given version pieces into the requested style."""
     if pieces["error"]:
         return {"version": "unknown",
@@ -598,7 +637,7 @@ def render(pieces, style):
             "date": pieces.get("date")}
 
 
-def get_versions():
+def get_versions() -> Dict[str, Any]:
     """Get version information or return default if unable to do so."""
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
diff --git a/glmnet/base.py b/glmnet/base.py
index 257fbe0..18708c0 100644
--- a/glmnet/base.py
+++ b/glmnet/base.py
@@ -55,6 +55,10 @@ def __post_init__(self, standardize, intercept):
             self.X = (self.X - self.centers_[None,:]) / self.scaling_[None,:]
             self.X = np.asfortranarray(self.X)
 
+        self.unscaler_ = UnscaleOperator(centers=self.centers_,
+                                         scaling=self.scaling_)
+        self.scaler_ = ScaleOperator(centers=self.centers_,
+                                     scaling=self.scaling_)
     # LinearOperator API
 
     def _matvec(self, x):
@@ -95,8 +99,15 @@ def _rmatvec(self, r):
 
     def quadratic_form(self,
                        G=None,
-                       columns=None):
+                       columns=None,
+                       transformed=False):
         '''
+        if transformed is False: compute
+
+        [1 X]'G[1 X[:,E]]
+
+        if transformed is True: compute
+
         A'GA[:,E]
 
         where A is the effective matrix
@@ -104,6 +115,7 @@ def quadratic_form(self,
         [1, XS^{-1} - 1 (xm/xs)']
 
         and E is a subset of columns
+
         '''
 
         # A is effective matrix
@@ -112,6 +124,9 @@ def quadratic_form(self,
         # A'G1 = S^{-1}(X' - xm 1')G1 = S^{-1}X'G1 - S^{-1}xm 1'G1
         # A'GA = S^{-1}X'GXS^{-1} - S^{-1}X'G1 xm/xs' - xm/xs 1'GXS^{-1} + xm/xs 1'G1 (xm/xs)'
         
+        # or, the reverse
+        # X'GX = SA'GAS + X'G1xm' + xm1'GX - xm1'G1xm'
+
         n, p = self.shape[0], self.shape[1] - 1
         
         if columns is None:
@@ -122,35 +137,49 @@ def quadratic_form(self,
 
         if G is None:
             XX_block = self.X.T @ X_R # have to assume this is not too expensive
+            if scipy.sparse.issparse(self.X): 
+                XX_block = XX_block.toarray()
             X1_block = self.X.sum(0) # X'1
             G_sum = n
-            
+
         else:
             GX = G @ X_R
             G1 = G @ np.ones(G.shape[0])
             XX_block = self.X.T @ GX
             X1_block = self.X.T @ G1
             G_sum = G1.sum()
-            
-        if scipy.sparse.issparse(XX_block):
-            XX_block = XX_block.toarray()
-            
-        # correct XX_block for standardize
-        
+
         xm, xs = self.centers_, self.scaling_
+        if scipy.sparse.issparse(self.X): # in this case X has not been transformed 
 
-        if columns is not None:
-            XX_block -= (np.multiply.outer(X1_block, xm[columns]) + np.multiply.outer(xm, X1_block[columns]))
-            XX_block += np.multiply.outer(xm, xm[columns]) * G_sum
-            XX_block /= np.multiply.outer(xs, xs[columns])
-        else:
-            XX_block -= (np.multiply.outer(X1_block, xm) + np.multiply.outer(xm, X1_block))
-            XX_block += np.multiply.outer(xm, xm) * G_sum
-            XX_block /= np.multiply.outer(xs, xs)
+            # correct XX_block for standardize
+
+            if transformed:
+                if columns is not None:
+                    XX_block -= (np.multiply.outer(X1_block, xm[columns]) + np.multiply.outer(xm, X1_block[columns]))
+                    XX_block += np.multiply.outer(xm, xm[columns]) * G_sum
+                    XX_block /= np.multiply.outer(xs, xs[columns])
+                else:
+                    XX_block -= (np.multiply.outer(X1_block, xm) + np.multiply.outer(xm, X1_block))
+                    XX_block += np.multiply.outer(xm, xm) * G_sum
+                    XX_block /= np.multiply.outer(xs, xs)
+
+                X1_block -= G_sum * xm
+                X1_block /= xs
+
+        else: # X will already have been transformed, so
+              # we only have to undo it if transformed=False
+
+            # or, the reverse
+            # X'GX = SA'GAS + X'G1xm' + xm1'GX - xm1'G1xm'
+
+            if not transformed:
+                XX_block *= np.multiply.outer(xs, xs[columns])
+                X1_block *= xs
+                XX_block += (np.multiply.outer(X1_block, xm[columns]) + np.multiply.outer(xm, X1_block[columns]))
+                XX_block += np.multiply.outer(xm, xm[columns]) * G_sum
+                X1_block += G_sum * xm
 
-        X1_block -= G_sum * xm
-        X1_block /= xs
-        
         Q = np.zeros((XX_block.shape[0] + 1,
                       XX_block.shape[1] + 1))
         Q[1:,1:] = XX_block
@@ -160,9 +189,103 @@ def quadratic_form(self,
         else:
             Q[0,1:] = X1_block
         Q[0,0] = G_sum
-        
+
         return Q
 
+    def scaled_to_raw(self, state):
+        """
+        Take a "scaled" (intercept, coef) (these are the params
+        used by glmnet) and return a (intercept, coef) on "raw" scale.
+        """
+        unscaled = self.unscaler_ @ state._stack
+        coef = unscaled[1:]
+        intercept = unscaled[0]
+        klass = state.__class__
+        return klass(coef=coef,
+                     intercept=intercept)
+
+    def raw_to_scaled(self, state):
+        """
+        Take a "raw" (intercept, coef) and return a (intercept, coef)
+        on "scaled" scale (i.e. the scale GLMnet uses in its objective).
+        """
+        unscaled = self.scaler_ @ state._stack
+        coef = unscaled[1:]
+        intercept = unscaled[0]
+        klass = state.__class__
+        return klass(coef=coef,
+                     intercept=intercept)
+
+@dataclass
+class UnscaleOperator(LinearOperator):
+
+    scaling : np.ndarray
+    centers: np.ndarray
+
+    def __post_init__(self):
+        ncoef = self.scaling.shape[0]
+        self.shape = (ncoef+1,)*2
+        self.dtype = float
+
+    # LinearOperator API
+    def _matvec(self, stacked):
+
+        intercept = float(stacked[0])
+        coef = np.squeeze(stacked[1:])
+
+        result = np.zeros(stacked.shape[0])
+        result[1:] = coef / self.scaling
+        result[0] = intercept - (result[1:] * self.centers).sum()
+
+        return result
+
+    def _rmatvec(self, stacked):
+
+        intercept = float(stacked[0])
+        coef = np.squeeze(stacked[1:])
+
+        result = np.zeros(stacked.shape[0])
+        result[0] = intercept
+        result[1:] = coef / self.scaling
+        result[1:] -= intercept * self.centers / self.scaling
+
+        return result
+
+@dataclass
+class ScaleOperator(LinearOperator):
+
+    scaling : np.ndarray
+    centers: np.ndarray
+
+    def __post_init__(self):
+        ncoef = self.scaling.shape[0]
+        self.shape = (ncoef+1,)*2
+        self.dtype = float
+
+    # LinearOperator API
+    def _matvec(self, stacked):
+
+        intercept = float(stacked[0])
+        coef = np.squeeze(stacked[1:])
+
+        result = np.zeros(stacked.shape[0])
+        result[1:] = coef * self.scaling
+        result[0] = intercept + (coef * self.centers).sum()
+
+        return result
+
+    def _rmatvec(self, stacked):
+
+        intercept = float(stacked[0])
+        coef = np.squeeze(stacked[1:])
+
+        result = np.zeros(stacked.shape[0])
+        result[0] = intercept
+        result[1:] = coef * self.scaling
+        result[1:] += intercept * self.centers
+
+        return result
+    
 @add_dataclass_docstring
 @dataclass
 class DiagonalOperator(LinearOperator):
diff --git a/glmnet/cox.py b/glmnet/cox.py
index c2e1f1d..4256850 100644
--- a/glmnet/cox.py
+++ b/glmnet/cox.py
@@ -57,11 +57,12 @@ def logl_score(self,
                    family,
                    y,
                    sample_weight):
+
         link_parameter = self.link_parameter
         family._result = family._coxdev(link_parameter,
                                         sample_weight)
         # the gradient is the gradient of the deviance
-        # we want deviance of the log-likelihood
+        # we want gradient of the log-likelihood
         return - family._result.gradient / 2
 
 @dataclass
@@ -190,10 +191,10 @@ def _get_family_spec(self,
                              status_id=self.family.status_id,
                              start_id=self.family.start_id)
 
-    def _check(self,
-               X,
-               y,
-               check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
         return _get_data(self,
                          X,
                          y,
@@ -212,7 +213,8 @@ def _summarize(self,
         # IRLS used normalized weights,
         # this unnormalizes them...
 
-        unscaled_precision_ = self.design_.quadratic_form(self._information)
+        unscaled_precision_ = self.design_.quadratic_form(self._information,
+                                                          transformed=False)
         
         keep = np.ones(unscaled_precision_.shape[0]-1, bool)
         if exclude is not []:
@@ -243,7 +245,10 @@ class RegCoxLM(RegGLM):
     
     fit_intercept: Literal[False] = False
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
         return _get_data(self,
                          X,
                          y,
@@ -268,10 +273,10 @@ class CoxNet(GLMNet):
     fit_intercept: Literal[False] = False
     regularized_estimator: BaseEstimator = RegCoxLM
     
-    def _check(self,
-               X,
-               y,
-               check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
         return _get_data(self,
                          X,
                          y,
@@ -290,6 +295,12 @@ def _get_family_spec(self,
                              status_id=self.family.status_id,
                              start_id=self.family.start_id)
     
+    def get_LM(self):
+        return CoxLM(family=self.family,
+                     offset_id=self.offset_id,
+                     weight_id=self.weight_id,
+                     response_id=self.response_id)
+
     def _get_initial_state(self,
                            X,
                            y,
@@ -305,10 +316,7 @@ def _get_initial_state(self,
         if keep.sum() > 0:
             X_keep = X[:,keep]
 
-            coxlm = CoxLM(family=self.family,
-                          offset_id=self.offset_id,
-                          weight_id=self.weight_id,
-                          response_id=self.response_id)
+            coxlm = self.get_LM()
             coxlm.fit(X_keep, y)
             coef_[keep] = coxlm.coef_
 
diff --git a/glmnet/docstrings.py b/glmnet/docstrings.py
index 95346ef..31ffed0 100644
--- a/glmnet/docstrings.py
+++ b/glmnet/docstrings.py
@@ -12,6 +12,11 @@
 #     Data class containing GLMNet control parameters.
 #     ''',
 
+    'intercept':'''
+intercept:
+    For a Design, is there an intercept?
+    ''',
+    
     'nlambda':'''
 nlambda: int
     Number of values on data-dependent grid of lambda values.
diff --git a/glmnet/glm.py b/glmnet/glm.py
index 6476cbf..e2ee28c 100644
--- a/glmnet/glm.py
+++ b/glmnet/glm.py
@@ -28,7 +28,6 @@
                      _get_data)
 
 from .base import (Design,
-                   DiagonalOperator,
                    _get_design)
 
 from .scoring import (Scorer,
@@ -54,44 +53,23 @@ class GLMFamilySpec(object):
     base: sm_family.Family = field(default_factory=sm_family.Gaussian)
 
     def link(self,
-             mean_parameter):
-        mu = mean_parameter # shorthand
+             mu):
         return self.base.link(mu)
-    link.__doc__ = """
-Parameters
-----------
-{mean_parameter}
-
-Returns
--------
-{linear_predictor}
-""".format(**_docstrings).strip()
     
     def deviance(self,
                  y,
-                 mean_parameter,
-                 sample_weight=None):
+                 mu,
+                 sample_weight):
         if sample_weight is not None:
-            return self.base.deviance(y, mean_parameter, freq_weights=sample_weight)
+            return self.base.deviance(y, mu, freq_weights=sample_weight)
         else:
-            return self.base.deviance(y, mean_parameter)
-    deviance.__doc__ = '''
-Parameters
-----------
-
-{mean_parameter}
-{sample_weight}
-
-Returns
--------
-{deviance}
-    '''.format(**_docstrings).strip()
+            return self.base.deviance(y, mu)
 
     def null_fit(self,
                  y,
-                 fit_intercept=True,
-                 sample_weight=None,
-                 offset=None):
+                 sample_weight,
+                 offset,
+                 fit_intercept):
 
         sample_weight = np.asarray(sample_weight)
         y = np.asarray(y)
@@ -131,63 +109,41 @@ def null_fit(self,
                              self,
                              offset,
                              None)
+
         else:
             state = GLMState(np.zeros(1), 0)
             state.link_parameter = offset
             state.mean_parameter = self.base.link.inverse(state.link_parameter)
         return state
-    null_fit.__doc__ = '''
-Parameters
-----------
-
-{mean_parameter}
-{fit_intercept}
-{sample_weight}
-{offset}
-    
-Returns
--------
-null_state: GLMState
-    Fitted null state of GLM.
-    '''.format(**_docstrings).strip()
 
     def get_null_deviance(self,
                           y,
-                          fit_intercept=True,
-                          sample_weight=None,
-                          offset=None):
+                          sample_weight,
+                          offset,
+                          fit_intercept):
         state0 = self.null_fit(y,
-                               fit_intercept=fit_intercept,
-                               sample_weight=sample_weight,
-                               offset=offset)
-        D = self.deviance(y,
-                          state0.mean_parameter,
-                          sample_weight=sample_weight)
+                               sample_weight,
+                               offset,
+                               fit_intercept)
+        D = self.deviance(y, state0.mean_parameter, sample_weight)
         return state0, D
 
-    get_null_deviance.__doc__ = '''
-Parameters
-----------
-
-{mean_parameter}
-{fit_intercept}
-{sample_weight}
-{offset}
+    def get_null_state(self,
+                       null_fit,
+                       nvars):
+        coefold = np.zeros(nvars)   # initial coefs = 0
+        state = GLMState(coef=coefold,
+                         intercept=null_fit.intercept)
+        state.mean_parameter = null_fit.mean_parameter
+        state.link_parameter = null_fit.link_parameter
+        return state
     
-Returns
--------
-null_state: GLMState
-    Fitted null state of GLM.
-{deviance}
-    '''.format(**_docstrings).strip()
-
     def get_response_and_weights(self,
                                  state,
-                                 response,
+                                 y,
                                  offset,
                                  sample_weight):
 
-        y = response # shorthand
         family = self.base
 
         # some checks for NAs/zeros
@@ -205,24 +161,28 @@ def get_response_and_weights(self,
 
         return pseudo_response, newton_weights
         
-    get_response_and_weights.__doc__ = '''
-Parameters
-----------
+    def _default_scorers(self):
 
-state: GLMState
-    State of GLM.
-{response}
-{offset}
-{sample_weight}
-    
-Returns
--------
-pseudo_response: np.ndarray
-    Pseudo-response for (quasi) Newton step.
-newton_weights: np.ndarray
-    Weights to be used for diagonal in Newton step.
-    
-    '''.format(**_docstrings).strip()
+        fam_name = self.base.__class__.__name__
+
+        def _dev(y, yhat, sample_weight):
+            return self.deviance(y, yhat, sample_weight) / y.shape[0]
+        dev_scorer = Scorer(name=f'{fam_name} Deviance',
+                            score=_dev,
+                            maximize=False)
+        
+        scorers_ = [dev_scorer,
+                    mse_scorer,
+                    mae_scorer,
+                    ungrouped_mse_scorer,
+                    ungrouped_mae_scorer]
+
+        if isinstance(self.base, sm_family.Binomial):
+            scorers_.extend([accuracy_scorer,
+                             auc_scorer,
+                             aucpr_scorer])
+
+        return scorers_
 
     def information(self,
                     state,
@@ -246,59 +206,24 @@ def information(self,
         n = W.shape[0]
         W = W.reshape(-1)
         return DiagonalOperator(W)
-    information.__doc__ = '''
-Parameters
-----------
-
-state: GLMState
-    State of GLM.
-{sample_weight}
-    
-Returns
--------
-information: DiagonalOperator
-    Diagonal information matrix of the response vector for
-    `state.mean_pararmeter`.  
-    '''.format(**_docstrings).strip()
-
-    # Private methods
-
-    def _default_scorers(self):
-        """
-        Construct default scorers for GLM.
-        """
 
-        fam_name = self.base.__class__.__name__
+@dataclass
+class DiagonalOperator(LinearOperator):
 
-        def _dev(y, yhat, sample_weight):
-            return self.deviance(y, yhat, sample_weight) / y.shape[0]
-        dev_scorer = Scorer(name=f'{fam_name} Deviance',
-                            score=_dev,
-                            maximize=False)
-        
-        scorers_ = [dev_scorer,
-                    mse_scorer,
-                    mae_scorer,
-                    ungrouped_mse_scorer,
-                    ungrouped_mae_scorer]
+    weights: np.ndarray
 
-        if isinstance(self.base, sm_family.Binomial):
-            scorers_.extend([accuracy_scorer,
-                             auc_scorer,
-                             aucpr_scorer])
+    def __post_init__(self):
+        self.weights = np.asarray(self.weights).reshape(-1)
+        n = self.weights.shape[0]
+        self.shape = (n, n)
 
-        return scorers_
+    def _matvec(self, arg):
+        return self.weights * arg.reshape(-1)
 
-    def _get_null_state(self,
-                        null_fit,
-                        nvars):
-        coefold = np.zeros(nvars)   # initial coefs = 0
-        state = GLMState(coef=coefold,
-                         intercept=null_fit.intercept)
-        state.mean_parameter = null_fit.mean_parameter
-        state.link_parameter = null_fit.link_parameter
-        return state
+    def _adjoint(self, arg):
+        return self._matvec(arg)
     
+
 @add_dataclass_docstring
 @dataclass
 class GLMControl(object):
@@ -314,9 +239,9 @@ class GLMBaseSpec(object):
     family: sm_family.Family = field(default_factory=sm_family.Gaussian)
     fit_intercept: bool = True
     control: GLMControl = field(default_factory=GLMControl)
-    response_id: Union[str,int] = None
-    weight_id: Union[str,int] = None
     offset_id: Union[str,int] = None
+    weight_id: Union[str,int] = None
+    response_id: Union[str,int] = None
     exclude: list = field(default_factory=list)
 
 add_dataclass_docstring(GLMBaseSpec, subs={'control':'control_glm'})
@@ -380,6 +305,7 @@ def logl_score(self,
         dmu_deta = family.link.inverse_deriv(self.link_parameter)
         
         # compute working residual
+        y = np.asarray(y).reshape(-1)
         r = (y - self.mu) 
         return sample_weight * r * dmu_deta / varmu 
 
@@ -459,7 +385,7 @@ class GLMBase(BaseEstimator,
               GLMBaseSpec):
 
     def _get_regularizer(self,
-                         X):
+                         nvars=None):
         return GLMRegularizer(fit_intercept=self.fit_intercept)
 
     # no standardization for GLM
@@ -478,7 +404,7 @@ def _get_family_spec(self,
         elif isinstance(self.family, GLMFamilySpec):
             return self.family
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self, X, y, check=True):
         return _get_data(self,
                          X,
                          y,
@@ -491,7 +417,8 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 4 options non sklearn API
-            dispersion=1,
+            warm_state=None,
+            dispersion=None,
             check=True,
             fit_null=True):
 
@@ -505,7 +432,7 @@ def fit(self,
         if not hasattr(self, "_family"):
             self._family = self._get_family_spec(y)
 
-        X, y, response, offset, weight = self._check(X, y, check=check)
+        X, y, response, offset, weight = self.get_data_arrays(X, y, check=check)
 
         sample_weight = weight
         self.sample_weight_ = normed_sample_weight = sample_weight / sample_weight.sum()
@@ -530,9 +457,12 @@ def fit(self,
         # the regularizer stores the warm start
 
         if regularizer is None:
-            regularizer = self._get_regularizer(X)
+            regularizer = self._get_regularizer(nvars=design.X.shape[1])
         self.regularizer_ = regularizer
 
+        if warm_state is not None:
+            self.regularizer_.warm_state = warm_state
+            
         if fit_null or not hasattr(self.regularizer_, 'warm_state'):
             (null_state,
              self.null_deviance_) = self._family.get_null_deviance(
@@ -606,13 +536,14 @@ def obj_function(response,
 
         self._set_coef_intercept(state)
 
-        if (hasattr(self._family, "base") and 
+        if (dispersion is None and hasattr(self._family, "base") and 
             isinstance(self._family.base, sm_family.Gaussian)): # GLM specific
             # usual estimate of sigma^2
             self.dispersion_ = self.deviance_ / (nobs-nvar-self.fit_intercept) 
         else:
             self.dispersion_ = dispersion
 
+        self.state_ = state
         return self
     fit.__doc__ = '''
 Fit a GLM.
@@ -622,18 +553,14 @@ def obj_function(response,
 
 {X}
 {y}
-{sample_weight}
-{regularizer}
-{dispersion}
-{check}
-fit_null: bool
-    Fit a null model if no warm state present in the regularizer.
-
+{weights}
+{summarize}
+    
 Returns
 -------
 
-self: GLMBase
-        Fitted GLM.
+self: object
+        GLM class instance.
         '''.format(**_docstrings)
     
     def predict(self, X, prediction_type='response'):
@@ -684,39 +611,32 @@ def score(self, X, y, sample_weight=None):
 '''.format(**_docstrings).strip()
 
     def _set_coef_intercept(self, state):
-        self.coef_ = state.coef.copy() # this makes a copy -- important to make this copy because `state.coef` is persistent
-        if hasattr(self, 'standardize') and self.standardize:
-            self.scaling_ = self.design_.scaling_
-            self.coef_ /= self.scaling_ 
-        self.intercept_ = state.intercept - (self.coef_ * self.design_.centers_).sum()
+        raw_state = self.design_.scaled_to_raw(state)
+        self.coef_ = raw_state.coef
+        self.intercept_ = raw_state.intercept
 
 GLMBase.__doc__ = '''
 Base class to fit a Generalized Linear Model (GLM). Base class for `GLMNet`.
 
 Parameters
 ----------
-{family}
 {fit_intercept}
+{summarize}
+{family}
 {control_glm}
-{response_id}
-{weight_id}
-{offset_id}
-{exclude}
 
 Attributes
-----------
-
+__________
 {coef_}
 {intercept_}
-{regularizer_}
+{summary_}
+{covariance_}
 {null_deviance_}
 {deviance_}
 {dispersion_}
-
+{regularizer_}
 '''.format(**_docstrings)
 
-
-
 @dataclass
 class GLM(GLMBase):
 
@@ -727,6 +647,7 @@ def fit(self,
             y,
             sample_weight=None,
             regularizer=None,             # last 4 options non sklearn API
+            warm_state=None,
             dispersion=1,
             check=True):
 
@@ -734,10 +655,11 @@ def fit(self,
                     y,
                     sample_weight=sample_weight,
                     regularizer=regularizer,
+                    warm_state=warm_state,
                     dispersion=dispersion,
                     check=check)
 
-        weight = self._check(X, y, check=False)[-1]
+        weight = self.get_data_arrays(X, y, check=False)[-1]
             
         if self.summarize:
             self.covariance_, self.summary_ = self._summarize(self.exclude,
@@ -745,7 +667,6 @@ def fit(self,
                                                               weight,
                                                               X.shape)
 
-
         else:
             self.summary_ = self.covariance_ = None
             
@@ -760,7 +681,8 @@ def _summarize(self,
         # IRLS used normalized weights,
         # this unnormalizes them...
 
-        unscaled_precision_ = self.design_.quadratic_form(self._information)
+        unscaled_precision_ = self.design_.quadratic_form(self._information,
+                                                          transformed=False)
         
         keep = np.ones(unscaled_precision_.shape[0]-1, bool)
         if exclude is not []:
@@ -796,36 +718,6 @@ def _summarize(self,
                                     index=index)
         return covariance_, summary_
         
-GLM.__doc__ = '''
-Base class to fit a Generalized Linear Model (GLM). Base class for `GLMNet`.
-
-Parameters
-----------
-{family}
-{fit_intercept}
-{control_glm}
-{response_id}
-{weight_id}
-{offset_id}
-{exclude}
-
-Notes
------
-
-This is a test
-
-Attributes
-----------
-
-{coef_}
-{intercept_}
-{regularizer_}
-{null_deviance_}
-{deviance_}
-{dispersion_}
-{summary_}
-{covariance_}
-'''.format(**_docstrings)
     
 @dataclass
 class GaussianGLM(RegressorMixin, GLM):
@@ -836,9 +728,9 @@ class BinomialGLM(ClassifierMixin, GLM):
 
     family: sm_family.Family = field(default_factory=sm_family.Binomial)
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self, X, y, check=True):
 
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+        X, y, response, offset, weight = super().get_data_arrays(X, y, check=check)
         encoder = LabelEncoder()
         labels = np.asfortranarray(encoder.fit_transform(response))
         self.classes_ = encoder.classes_
@@ -851,6 +743,7 @@ def fit(self,
             y,
             sample_weight=None,
             regularizer=None,             # last 4 options non sklearn API
+            warm_state=None,
             dispersion=1,
             check=True):
 
@@ -865,6 +758,7 @@ def fit(self,
                            y,
                            sample_weight=sample_weight,
                            regularizer=regularizer,             # last 4 options non sklearn API
+                           warm_state=warm_state,
                            dispersion=dispersion,
                            check=check)
 
diff --git a/glmnet/glmnet.py b/glmnet/glmnet.py
index e8c9953..fdd3ff1 100644
--- a/glmnet/glmnet.py
+++ b/glmnet/glmnet.py
@@ -66,10 +66,10 @@ class GLMNetSpec(object):
 class GLMNet(BaseEstimator,
              GLMNetSpec):
 
-    def _check(self,
-               X,
-               y,
-               check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
         return _get_data(self,
                          X,
                          y,
@@ -89,12 +89,13 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 3 options non sklearn API
+            warm_state=None,
             interpolation_grid=None):
 
         if not hasattr(self, "_family"):
             self._family = self._get_family_spec(y)
 
-        X, y, response, offset, weight = self._check(X, y)
+        X, y, response, offset, weight = self.get_data_arrays(X, y)
 
         if isinstance(X, pd.DataFrame):
             self.feature_names_in_ = list(X.columns)
@@ -124,7 +125,11 @@ def fit(self,
                                exclude=self.exclude
                                )
 
-        self.reg_glm_est_.fit(X, y, None, fit_null=False) 
+        self.reg_glm_est_.fit(X,
+                              y,
+                              None,
+                              fit_null=False,
+                              warm_state=warm_state) 
         regularizer_ = self.reg_glm_est_.regularizer_
 
         state, keep_ = self._get_initial_state(X,
@@ -184,6 +189,8 @@ def fit(self,
                                   check=False,
                                   fit_null=False)
 
+            self.state_ = self.reg_glm_est_.state_
+
             coefs_.append(self.reg_glm_est_.coef_.copy())
             intercepts_.append(self.reg_glm_est_.intercept_)
             dev_ratios_.append(1 - self.reg_glm_est_.deviance_ / self.null_deviance_)
@@ -308,7 +315,7 @@ def cross_validation_path(self,
         # truncate to the size we got
         predictions = predictions[:,:self.lambda_values_.shape[0]]
 
-        response, offset, weight = self._check(X, y, check=False)[2:]
+        response, offset, weight = self.get_data_arrays(X, y, check=False)[2:]
 
         # adjust for offset
         # because predictions are just X\beta
@@ -457,7 +464,7 @@ def _get_initial_state(self,
             intercept_ = glm.intercept_
         else:
             if self.fit_intercept:
-                response, offset, weight = self._check(X, y, check=False)[2:]
+                response, offset, weight = self.get_data_arrays(X, y, check=False)[2:]
                 state0 = self._family.null_fit(response,
                                                weight,
                                                offset,
@@ -468,5 +475,35 @@ def _get_initial_state(self,
                 intercept_ = 0
         return GLMState(coef_, intercept_), keep.astype(float)
 
+    def get_GLM(self):
+        return GLM(family=self.family,
+                   fit_intercept=self.fit_intercept,
+                   offset_id=self.offset_id,
+                   weight_id=self.weight_id,
+                   response_id=self.response_id)
+
+    def get_fixed_lambda(self,
+                         lambda_val):
+
+        check_is_fitted(self, ["coefs_", "feature_names_in_"])
 
+        estimator = self.regularized_estimator(
+                               lambda_val=lambda_val,
+                               family=self.family,
+                               alpha=self.alpha,
+                               penalty_factor=self.penalty_factor,
+                               lower_limits=self.lower_limits,
+                               upper_limits=self.upper_limits,
+                               fit_intercept=self.fit_intercept,
+                               standardize=self.standardize,
+                               control=self.control,
+                               offset_id=self.offset_id,
+                               weight_id=self.weight_id,            
+                               response_id=self.response_id,
+                               exclude=self.exclude
+                               )
 
+        coefs, intercepts = self.interpolate_coefs([lambda_val])
+        cls = self.state_.__class__
+        state = cls(coefs[0], intercepts[0])
+        return estimator, state
diff --git a/glmnet/inference.py b/glmnet/inference.py
new file mode 100644
index 0000000..8388bb6
--- /dev/null
+++ b/glmnet/inference.py
@@ -0,0 +1,1187 @@
+"""
+This module contains the core code needed for post selection
+after LASSO.
+
+"""
+
+import warnings
+from warnings import warn
+from copy import copy
+
+import numpy as np
+import scipy.sparse
+import pandas as pd
+from scipy.stats import norm as normal_dbn
+import statsmodels.api as sm
+
+import mpmath as mp
+mp.dps = 80
+
+from .base import _get_design
+from .glmnet import GLMNet
+
+from dataclasses import dataclass, asdict
+from typing import Optional
+
+@dataclass
+class TruncatedGaussian(object):
+    
+    estimate: float
+    sigma: float
+    smoothing_sigma: float
+    lower_bound: float
+    upper_bound: float
+    level: Optional[float] = 0.9
+    num_sd: Optional[float] = 10
+    num_grid: Optional[float] = 4000
+    use_sample: Optional[bool] = False
+    seed: Optional[int] = 0
+
+    def _get_family(self,
+                    basept=None):
+   
+        if basept is None:
+            basept = self.estimate
+            
+        self._rng = np.random.default_rng(self.seed)
+        
+        if not self.use_sample:
+            grid = np.linspace(basept - self.num_sd * self.sigma,
+                               basept + self.num_sd * self.sigma, self.num_grid)
+
+            weight = (normal_dbn.cdf((self.upper_bound - grid) / self.smoothing_sigma)
+                      - normal_dbn.cdf((self.lower_bound - grid) / self.smoothing_sigma))
+
+            weight *= normal_dbn.pdf((grid - basept) / self.sigma)
+
+            return discrete_family(grid, weight)
+
+        else:
+            sample = self._rng.standard_normal(self.num_grid) * self.sigma + basept
+            weight = (normal_dbn.cdf((self.upper_bound - sample) / self.smoothing_sigma)
+                      - normal_dbn.cdf((self.lower_bound - sample) / self.smoothing_sigma))
+            return discrete_family(sample, weight)
+
+    def pvalue(self,
+               null_value=0,
+               alternative='twosided',
+               basept=None):
+
+        if basept is None:
+            basept = null_value
+
+        if alternative not in ['twosided', 'greater', 'less']:
+            raise ValueError("alternative should be one of ['twosided', 'greater', 'less']")
+        _family = self._get_family(basept=basept)
+        tilt = (null_value - basept) / self.sigma**2
+        if alternative in ['less', 'twosided']:
+            _cdf = _family.cdf(tilt, x=self.estimate)
+            if alternative == 'less':
+                pvalue = _cdf
+            else:
+                pvalue = 2 * min(_cdf, 1 - _cdf)
+        else:
+            pvalue = _family.sf(tilt, x=self.estimate)
+        return pvalue
+
+    def interval(self,
+                 basept=None):
+                 
+        if basept is None:
+            basept = self.estimate
+            
+        _family = self._get_family(basept=basept)
+
+        L, U = _family.equal_tailed_interval(self.estimate,
+                                             alpha=1-self.level)
+        L *= self.sigma**2; L += basept
+        U *= self.sigma**2; U += basept
+
+        return L, U
+
+    def MLE(self,
+            basept=None):
+
+        if basept is None:
+            basept = self.estimate
+            
+        _family = self._get_family(basept=basept)
+
+        mle, _, _ = _family.MLE(self.estimate)
+        mle *= self.sigma**2; mle += basept
+        return mle
+    
+@dataclass
+class AffineConstraint(object):
+
+    linear: np.ndarray
+    offset: np.ndarray
+    observed: np.ndarray
+
+    def __post_init__(self):
+
+        if not all (self.linear @ self.observed - self.offset <= 0):
+            raise ValueError('constraint not satisfied for observed data')
+    
+    def interval_constraints(self,
+                             target,
+                             gamma,
+                             tol = 1.e-4):
+        r"""
+        Given an affine constraint $\{z:Az \leq b \leq \}$ (elementwise)
+        specified with $A$ as `support_directions` and $b$ as
+        `support_offset`, a new direction of interest $\eta$, and
+        an `observed_data` is Gaussian vector $Z \sim N(\mu,\Sigma)$ 
+        with `covariance` matrix $\Sigma$, this
+        function returns $\eta^TZ$ as well as an interval
+        bounding this value. 
+
+        The interval constructed is such that the endpoints are 
+        independent of $\eta^TZ$, hence the $p$-value
+        of `Kac Rice`_
+        can be used to form an exact pivot.
+
+        Parameters
+        ----------
+
+        support_directions : np.float
+             Matrix specifying constraint, $A$.
+
+        support_offsets : np.float
+             Offset in constraint, $b$.
+
+        covariance : np.float
+             Covariance matrix of `observed_data`.
+
+        observed_data : np.float
+             Observations.
+
+        direction_of_interest : np.float
+             Direction in which we're interested for the
+             contrast.
+
+        tol : float
+             Relative tolerance parameter for deciding 
+             sign of $Az-b$.
+
+        Returns
+        -------
+
+        lower_bound : float
+
+        observed : float
+
+        upper_bound : float
+
+        sigma : float
+
+        """
+
+        # shorthand
+        A, b, D = (self.linear,
+                   self.offset,
+                   self.observed)
+
+        N = D - gamma * target
+
+        U = A.dot(N) - b
+        V = A @ gamma
+        
+        # Inequalities are now U + V @ target <= 0
+        
+        # adding the zero_coords in the denominator ensures that
+        # there are no divide-by-zero errors in RHS
+        # these coords are never used in upper_bound or lower_bound
+
+        zero_coords = V == 0
+        RHS = -U / (V + zero_coords)
+        RHS[zero_coords] = np.nan
+        pos_coords = V > tol * np.fabs(V).max()
+        neg_coords = V < -tol * np.fabs(V).max()
+
+        if np.any(pos_coords):
+            upper_bound = RHS[pos_coords].min()
+        else:
+            upper_bound = np.inf
+
+        if np.any(neg_coords):
+            lower_bound = RHS[neg_coords].max()
+        else:
+            lower_bound = -np.inf
+
+        return lower_bound, upper_bound
+
+def lasso_inference(glmnet_obj,
+                    lambda_val,
+                    selection_data,
+                    full_data,
+                    level=.9,
+                    dispersion=None):
+
+    fixed_lambda, warm_state = glmnet_obj.get_fixed_lambda(lambda_val)
+    X_sel, Y_sel, weight_sel = selection_data
+    X_sel, Y_sel, _, _, weight_sel = fixed_lambda.get_data_arrays(X_sel,
+                                                                  Y_sel)
+    if weight_sel is None:
+        weight_sel = np.ones(X_sel.shape[0])
+
+    fixed_lambda.fit(X_sel,
+                     Y_sel,
+                     sample_weight=weight_sel,
+                     warm_state=warm_state)
+
+    FL = fixed_lambda # shorthand
+
+    if (not np.all(FL.upper_limits >= FL.control.big) or
+        not np.all(FL.lower_limits <= -FL.control.big)):
+        raise NotImplementedError('upper/lower limits coming soon')
+
+    active_set = np.nonzero(FL.coef_ != 0)[0]
+
+    if active_set.shape[0] != 0:
+        inactive_set = np.nonzero(FL.coef_ == 0)[0]
+
+        # fit unpenalized model on selection data
+
+        unreg_sel_GLM = glmnet_obj.get_GLM()
+
+        unreg_sel_GLM.summarize = True
+        unreg_sel_GLM.fit(X_sel[:,active_set],
+                          Y_sel,
+                          sample_weight=weight_sel,
+                          dispersion=dispersion)
+
+        # # quadratic approximation up to scaling and a factor of weight_sel.sum()
+
+        D_sel = _get_design(X_sel[:,active_set],
+                            weight_sel,
+                            standardize=glmnet_obj.standardize,
+                            intercept=glmnet_obj.fit_intercept)
+        P_noisy = D_sel.quadratic_form(unreg_sel_GLM._information,
+                                       transformed=True)
+
+        # fit unpenalized model on full data
+
+        X_full, Y_full, weight_full = full_data
+        if weight_full is None:
+            weight_full = np.ones(X_full.shape[0])
+
+        unreg_GLM = glmnet_obj.get_GLM()
+        unreg_GLM.summarize = True
+        unreg_GLM.fit(X_full[:,active_set],
+                      Y_full,
+                      sample_weight=weight_full,
+                      dispersion=dispersion)
+
+        # quadratic approximation
+
+        D = _get_design(X_full[:,active_set],
+                        weight_full,
+                        standardize=glmnet_obj.standardize,
+                        intercept=glmnet_obj.fit_intercept)
+
+        if np.asarray(FL.upper_limits).shape == ():
+            upper_limits = np.ones(D_sel.shape[1] - 1) * FL.upper_limits
+        else:
+            upper_limits = FL.upper_limits[active_set]
+            
+        if np.asarray(FL.lower_limits).shape == ():
+            lower_limits = np.ones(D_sel.shape[1] - 1) * FL.lower_limits
+        else:
+            lower_limits = FL.lower_limits[active_set]
+            
+        upper_limits = D_sel.scaling_ * upper_limits
+        lower_limits = D_sel.scaling_ * lower_limits
+
+        P_full = D.quadratic_form(unreg_GLM._information,
+                                  transformed=True)
+        if not FL.fit_intercept:
+            if FL.penalty_factor is not None:
+                penfac = FL.penalty_factor[active_set]
+            else:
+                penfac = np.ones_like(active_set)
+            P_full = P_full[1:,1:]
+            P_noisy = P_noisy[1:,1:]
+        else:
+            penfac = np.ones(active_set.shape[0])
+
+        Q_full = np.linalg.inv(P_full) 
+        Q_noisy = np.linalg.inv(P_noisy)
+
+        signs = np.sign(FL.coef_[active_set])
+
+        noisy_mle = D.raw_to_scaled(unreg_sel_GLM.state_)
+        
+        if FL.fit_intercept:
+            penfac = np.hstack([0, penfac])
+            signs = np.hstack([0, signs])
+            stacked = np.hstack([FL.state_.intercept,
+                                 FL.state_.coef[active_set]])
+            noisy_mle = noisy_mle._stack
+        else:
+            stacked = FL.state_.coef[active_set]
+            noisy_mle = noisy_mle.coef
+
+        # delta = lambda_val * penfac * signs
+
+        # # remember loss of glmnet is normalized by sum of weights
+        # # when taking newton step adjust by weight_sel.sum()
+
+        # delta = Q_sel @ delta * weight_sel.sum() 
+        # noisy_mle = stacked + delta # unitless scale
+
+        penalized = penfac > 0
+        n_penalized = penalized.sum()
+        n_coef = penalized.shape[0]
+        row_idx = np.arange(n_penalized)
+        col_idx = np.nonzero(penalized)[0]
+        data = -signs[penalized]
+        sel_P = scipy.sparse.coo_matrix((data, (row_idx, col_idx)), shape=(n_penalized, n_coef))
+
+        if FL.fit_intercept:
+            sel_U = sel_L = scipy.sparse.dia_array((np.ones((1, n_coef)), [1]), shape=(n_coef-1, n_coef))
+        else:
+            sel_U = sel_L = scipy.sparse.eye(n_coef)
+
+        ## the GLM's coef and intercept are on the original scale
+        ## we transform them here to the (typically) unitless "standardized" scale
+
+        full_mle = D.raw_to_scaled(unreg_GLM.state_)
+
+        if FL.fit_intercept:
+            full_mle = full_mle._stack
+        else:
+            full_mle = full_mle.coef
+
+        ## iterate over coordinates
+        Ls = np.zeros_like(stacked)
+        Us = np.zeros_like(stacked)
+        mles = np.zeros_like(stacked)
+        pvals = np.zeros_like(stacked)
+        TGs = []
+        
+        transform_to_raw = (D.unscaler_ @
+                            np.identity(D.shape[1]))
+        if not FL.fit_intercept:
+            transform_to_raw = transform_to_raw[1:,1:]
+
+        linear = scipy.sparse.vstack([sel_P, sel_U, -sel_L])
+        offset = np.hstack([np.zeros(sel_P.shape[0]), upper_limits, -lower_limits]) 
+        active_con = AffineConstraint(linear=linear,
+                                      offset=offset,
+                                      observed=stacked)
+        inactive = True
+        if inactive:
+            pf = FL.regularizer_.penalty_factor
+
+            logl_score = FL.state_.logl_score(FL._family,
+                                              Y_sel,
+                                              weight_sel / weight_sel.sum())
+
+            # X_{-E}'(Y-X\hat{\beta}_E) -- \hat{\beta}_E is the LASSO soln, not GLM !
+            # this is (2nd order Taylor series sense) equivalent to
+            # X_{-E}'(Y-X\bar{\beta}_E) + X_{-E}'WX_E(X_E'WX_E)^{-1}\lambda_E s_E
+            # with \bar{\beta}_E the GLM soln
+
+            score_ = (FL.design_.T @ logl_score)[1:]
+            score_ = score_[inactive_set]
+            # we now know that `score_` is bounded by \pm lambda_val
+
+            I = scipy.sparse.eye(score_.shape[0])
+            L = scipy.sparse.vstack([I, -I])
+            O = (np.ones(L.shape[0]) * lambda_val *
+                 np.hstack([pf[inactive_set],
+                            pf[inactive_set]]))
+
+            fudge_factor = 0.02 # allow 2% relative error on inactive gradient
+            inactive_con = AffineConstraint(linear=L,
+                                            offset=O * (1 + fudge_factor),
+                                            observed=score_)
+
+        for i in range(transform_to_raw.shape[0]):
+            ## call selection_interval and return
+            TG = _split_interval(active_con=active_con,
+                                 Q_noisy=Q_noisy * unreg_GLM.dispersion_,
+                                 Q_full=Q_full * unreg_GLM.dispersion_,
+                                 noisy_observation=noisy_mle, # these must be same scale / shift as glmnet
+                                 observation=full_mle, # these must be same scale / shift as glmnet
+                                 direction_of_interest=transform_to_raw[i], # this matrix describes the map from glmnet("transform") coords to raw("original") scale
+                                 level=level)
+
+            (L, U), mle, pval = (TG.interval(), TG.MLE(), TG.pvalue())
+
+            Ls[i] = L
+            Us[i] = U
+            mles[i] = mle
+            pvals[i] = pval
+            TGs.append(TG)
+            
+        idx = active_set.tolist()
+        if FL.fit_intercept:
+            idx = ['intercept'] + idx
+
+        TG_df = pd.concat([pd.Series(asdict(tg)) for tg in TGs], axis=1).T
+        TG_df.index = idx
+        df = pd.concat([pd.DataFrame({'mle': mles, 'pval': pvals, 'lower': Ls, 'upper': Us}, index=idx), TG_df], axis=1)
+        df['TG'] = TGs
+        return df
+    else:
+        return None
+
+def _split_interval(active_con,
+                    Q_noisy,
+                    Q_full,
+                    noisy_observation,
+                    observation,
+                    direction_of_interest,
+                    tol = 1.e-4,
+                    level = 0.90,
+                    dispersion=1):
+    """
+    Given an affine in cone constraint $\{z:Az+b \leq 0\}$ (elementwise)
+    specified with $A$ as `support_directions` and $b$ as
+    `support_offset`, a new direction of interest $\eta$, and
+    `noisy_observation` is Gaussian vector $Z \sim N(\mu,\Sigma)$ 
+    with `covariance` matrix $\Sigma$, this
+    function returns a confidence interval
+    for $\eta^T\mu$.
+
+    Parameters
+    ----------
+
+    support_directions : np.float
+         Matrix specifying constraint, $A$.
+
+    support_offset : np.float
+         Offset in constraint, $b$.
+
+    covariance : np.float
+         Covariance matrix of `observed_data`.
+
+    noisy_observation : np.float
+         Observations.
+
+    observation : np.float
+         Observations.
+
+    direction_of_interest : np.float
+         Direction in which we're interested for the
+         contrast.
+
+    tol : float
+         Relative tolerance parameter for deciding 
+         sign of $Az-b$.
+
+    Returns
+    -------
+
+    confidence_interval : (float, float)
+
+    """
+
+    noisy_var = direction_of_interest.T @ Q_noisy @ direction_of_interest
+    full_var = direction_of_interest.T @ Q_full @ direction_of_interest
+    noisy_estimate = (direction_of_interest * noisy_observation).sum()
+    estimate = (direction_of_interest * observation).sum()
+
+    slice_dir = Q_full @ direction_of_interest / full_var
+    (lower_bound,
+     upper_bound) = active_con.interval_constraints(noisy_estimate,
+                                                    slice_dir,
+                                                    tol=tol)
+
+    sigma = np.sqrt(full_var)
+    smoothing_sigma = np.sqrt(max(noisy_var - full_var, 0))
+
+    return TruncatedGaussian(estimate=estimate,
+                             sigma=sigma,
+                             smoothing_sigma=smoothing_sigma,
+                             lower_bound=lower_bound,
+                             upper_bound=upper_bound,
+                             level=level)
+    
+def _norm_interval(lower, upper):
+    r"""
+    A multiprecision evaluation of
+
+    .. math::
+
+        \Phi(U) - \Phi(L)
+
+    Parameters
+    ----------
+
+    lower : float
+        The lower limit $L$
+
+    upper : float
+        The upper limit $U$
+
+    """
+    #cdf = normal_dbn.cdf
+    cdf = mp.ncdf
+    if lower > 0:
+        if lower < 4:
+            return cdf(-lower) - cdf(-upper)
+        else:
+            return np.exp(-np.fabs(lower*(upper-lower)))
+    elif upper < 0:
+        if upper < -4:
+            return np.exp(-np.fabs(upper*(upper-lower)))
+        else:
+            return cdf(-lower) - cdf(-upper)
+    else:
+        return cdf(upper) - cdf(lower)
+
+def find_root(f, y, lb, ub, tol=1e-6):
+    """
+    searches for solution to f(x) = y in (lb, ub), where 
+    f is a monotone decreasing function
+    """       
+    
+    # make sure solution is in range
+    a, b   = lb, ub
+    fa, fb = f(a), f(b)
+    
+    # assume a < b
+    if fa > y and fb > y:
+        while fb > y : 
+            b, fb = b + (b-a), f(b + (b-a))
+    elif fa < y and fb < y:
+        while fa < y : 
+            a, fa = a - (b-a), f(a - (b-a))
+    
+    
+    # determine the necessary number of iterations
+    try:
+        max_iter = int( np.ceil( ( np.log(tol) - np.log(b-a) ) / np.log(0.5) ) )
+    except OverflowError:
+        warnings.warn('root finding failed, returning np.nan')
+        return np.nan
+        
+
+    # bisect (slow but sure) until solution is obtained
+    for _ in range(max_iter):
+        try:
+            c, fc  = (a+b)/2, f((a+b)/2)
+            if fc > y: a = c
+            elif fc < y: b = c
+        except OverflowError:
+            warnings.warn('root finding failed, returning np.nan')
+            return np.nan
+
+    return c
+        
+class discrete_family(object):
+
+    def __init__(self, sufficient_stat, weights, theta=0.):
+        r"""
+        A  discrete 1-dimensional
+        exponential family with reference measure $\sum_j w_j \delta_{X_j}$
+        and sufficient statistic `sufficient_stat`. For any $\theta$, the distribution
+        is
+
+        .. math::
+        
+            P_{\theta} = \sum_{j} e^{\theta X_j - \Lambda(\theta)} w_j \delta_{X_j}
+
+        where
+
+        .. math::
+
+            \Lambda(\theta) = \log \left(\sum_j w_j e^{\theta X_j} \right).
+
+        Parameters
+        ----------
+
+        sufficient_stat : `np.float((n))`
+
+        weights : `np.float(n)`
+
+        Notes
+        -----
+
+        The weights are normalized to sum to 1.
+        """
+        xw = np.array(sorted(zip(sufficient_stat, weights)), float)
+        xw[:,1] = np.maximum(xw[:,1], 1e-40)
+        self._x = xw[:,0]
+        self._w = xw[:,1]
+        self._lw = np.log(xw[:,1])
+        
+        self._w /= self._w.sum() # make sure they are a pmf
+        self.n = len(xw)
+        self._theta = np.nan
+        self.theta = theta
+
+    @property
+    def theta(self):
+        """
+        The natural parameter of the family.
+        """
+        return self._theta
+
+    @theta.setter
+    def theta(self, _theta):
+        if _theta != self._theta:
+            _thetaX = _theta * self.sufficient_stat + self._lw
+            _largest = _thetaX.max() - 15 # try to avoid over/under flow, 5 seems arbitrary
+            _exp_thetaX = np.exp(_thetaX - _largest)
+            _prod = _exp_thetaX
+            self._partition = np.sum(_prod)
+            self._pdf = _prod / self._partition
+            self._partition *= np.exp(_largest)
+        self._theta = _theta
+
+    @property
+    def partition(self):
+        r"""
+        Partition function at `self.theta`:
+
+        .. math::
+
+            \sum_j e^{\theta X_j} w_j
+        """
+        if hasattr(self, "_partition"):
+            return self._partition
+
+    @property
+    def sufficient_stat(self):
+        """
+        Sufficient statistics of the exponential family.
+        """
+        return self._x
+
+    @property
+    def weights(self):
+        """
+        Weights of the exponential family.
+        """
+        return self._w
+
+    def pdf(self, theta):
+        r"""
+        Density of $P_{\theta}$ with respect to $P_0$.
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        Returns
+        -------
+
+        pdf : np.float
+        
+        """
+        self.theta = theta # compute partition if necessary
+        return self._pdf
+ 
+    def cdf(self, theta, x=None, gamma=1):
+        r"""
+        The cumulative distribution function of $P_{\theta}$ with
+        weight `gamma` at `x`
+
+        .. math::
+
+            P_{\theta}(X < x) + \gamma * P_{\theta}(X = x)
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        x : float (optional)
+             Where to evaluate CDF.
+
+        gamma : float(optional)
+             Weight given at `x`.
+
+        Returns
+        -------
+
+        cdf : np.float
+
+        """
+        pdf = self.pdf(theta)
+        if x is None:
+            return np.cumsum(pdf) - pdf * (1 - gamma)
+        else:
+            tr = np.sum(pdf * (self.sufficient_stat < x)) 
+            if x in self.sufficient_stat:
+                tr += gamma * np.sum(pdf[np.where(self.sufficient_stat == x)])
+            return tr
+
+    def sf(self, theta, x=None, gamma=0, return_unnorm=False):
+        r"""
+        The complementary cumulative distribution function 
+        (i.e. survival function) of $P_{\theta}$ with
+        weight `gamma` at `x`
+
+        .. math::
+
+            P_{\theta}(X > x) + \gamma * P_{\theta}(X = x)
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        x : float (optional)
+             Where to evaluate SF.
+
+        gamma : float(optional)
+             Weight given at `x`.
+
+        Returns
+        -------
+
+        sf : np.float
+
+        """
+        pdf = self.pdf(theta)
+        if x is None:
+            return np.cumsum(pdf[::-1])[::-1] - pdf * (1 - gamma)
+        else:
+            tr = np.sum(pdf * (self.sufficient_stat > x)) 
+            if x in self.sufficient_stat:
+                tr += gamma * np.sum(pdf[np.where(self.sufficient_stat == x)])
+            return tr
+
+    def E(self, theta, func):
+        r"""
+        Expectation of `func` under $P_{\theta}$
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        func : callable
+             Assumed to be vectorized.
+
+        gamma : float(optional)
+             Weight given at `x`.
+
+        Returns
+        -------
+
+        E : np.float
+
+        """
+        T = np.asarray(func(self.sufficient_stat))
+        pdf_ = self.pdf(theta)
+
+        if T.ndim == 1:
+            return (T * pdf_).sum()
+        else:
+            val = (T * pdf_[:,None]).sum(0)
+            return val
+
+
+    def Var(self, theta, func):
+        r"""
+        Variance of `func` under $P_{\theta}$
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        func : callable
+             Assumed to be vectorized.
+
+        Returns
+        -------
+
+        var : np.float
+
+        """
+
+        mu = self.E(theta, func)
+        return self.E(theta, lambda x: (func(x)-mu)**2)
+        
+    def Cov(self, theta, func1, func2):
+        r"""
+        Covariance of `func1` and `func2` under $P_{\theta}$
+
+        Parameters
+        ----------
+
+        theta : float
+             Natural parameter.
+
+        func1, func2 : callable
+             Assumed to be vectorized.
+
+        Returns
+        -------
+
+        cov : np.float
+
+        """
+
+        mu1 = self.E(theta, func1)
+        mu2 = self.E(theta, func2)
+        return self.E(theta, lambda x: (func1(x)-mu1)*(func2(x)-mu2))
+
+    def equal_tailed_interval(self,
+                              observed,
+                              alpha=0.05,
+                              randomize=True,
+                              auxVar=None,
+                              tol=1e-6):
+        """
+        Form interval by inverting
+        equal-tailed test with $\alpha/2$ in each tail.
+
+        Parameters
+        ----------
+
+        observed : float
+             Observed sufficient statistic.
+
+        alpha : float (optional)
+             Size of two-sided test.
+
+        randomize : bool
+             Perform the randomized test (or conservative test).
+
+        auxVar : [None, float]
+             If randomizing and not None, use this
+             as the random uniform variate.
+
+        Returns
+        -------
+
+        lower, upper : float
+             Limits of confidence interval.
+
+        """
+
+        mu = self.E(self.theta, lambda x: x)
+        sigma  = np.sqrt(self.Var(self.theta, lambda x: x))
+        lb = mu - 20 * sigma
+        ub = mu + 20 * sigma
+        F = lambda th : self.cdf(th, observed)
+        L = find_root(F, 1.0 - 0.5 * alpha, lb, ub)
+        U = find_root(F, 0.5 * alpha, lb, ub)
+        return L, U
+
+    def equal_tailed_test(self,
+                          theta0,
+                          observed,
+                          alpha=0.05):
+        r"""
+        Equal tailed test of H_0:theta=theta_0
+
+        Parameters
+        ----------
+
+        theta0 : float
+             Natural parameter under null hypothesis.
+
+        observed : float
+             Observed sufficient statistic.
+
+        alpha : float (optional)
+             Size of two-sided test.
+
+        randomize : bool
+             Perform the randomized test (or conservative test).
+
+        Returns
+        -------
+
+        decision : np.bool
+             Is the null hypothesis $H_0:\theta=\theta_0$ rejected?
+   
+        Notes
+        -----
+
+        We need an auxiliary uniform variable to carry out the randomized test.
+        Larger auxVar corresponds to x being slightly "larger." It can be passed in,
+        or chosen at random. If randomize=False, we get a conservative test.
+        """
+
+        pval = self.cdf(theta0, observed, gamma=0.5)
+        return min(pval, 1-pval) < alpha
+
+    def MLE(self,
+            observed,
+            initial=0,
+            max_iter=20,
+            tol=1.e-4):
+
+        r"""
+        Compute the maximum likelihood estimator
+        based on observed sufficient statistic `observed`.
+
+        Parameters
+        ----------
+
+        observed : float
+             Observed value of sufficient statistic
+
+        initial : float
+             Starting point for Newton-Raphson
+
+        max_iter : int (optional)
+             Maximum number of Newton-Raphson iterations
+
+        tol : float (optional)
+             Tolerance parameter for stopping, based
+             on relative change in parameter estimate.
+             Iteration stops when the change is smaller
+             than `tol * max(1, np.fabs(cur_estimate))`.
+
+        Returns
+        -------
+
+        theta_hat : float
+             Maximum likelihood estimator.
+   
+        std_err : float
+             Estimated variance of `theta_hat` based
+             on inverse of variance of sufficient
+             statistic at `theta_hat`, i.e. the
+             observed Fisher information.
+
+        """
+
+        cur_est = initial
+
+        def first_two_moments(x):
+            return np.array([x, x**2]).T
+        
+        for i in range(max_iter):
+            cur_moments = self.E(cur_est, first_two_moments) # gradient and
+                                                             # Hessian of CGF
+                                                             # (almost)
+            grad, hessian = (cur_moments[0] - observed, 
+                             cur_moments[1] - cur_moments[0]**2)
+            next_est = cur_est - grad / hessian # newton step
+
+            if np.fabs(next_est - cur_est) < tol * max(1, np.fabs(cur_est)):
+                break
+            cur_est = next_est
+
+            if i == max_iter - 1:
+                warnings.warn('Newton-Raphson failed to converge after %d iterations' % max_iter)
+
+        cur_moments = self.E(cur_est, first_two_moments) # gradient and
+                                                         # Hessian of CGF
+                                                         # (almost)
+        grad, hessian = (cur_moments[0] - observed, 
+                         cur_moments[1] - cur_moments[0]**2)
+
+        return cur_est, 1. / hessian, grad
+
+def _truncated_inference(estimate,
+                         sigma,
+                         smoothing_sigma,
+                         lower_bound,
+                         upper_bound,
+                         basept=None,
+                         level=0.9):
+
+    if basept is None:
+        basept = estimate
+    noisy_sigma = np.sqrt(smoothing_sigma**2 + sigma**2)
+
+    if smoothing_sigma > 1e-6 * sigma:
+
+        grid = np.linspace(basept - 10 * sigma, basept + 10 * sigma, 2001)
+
+        weight = (normal_dbn.cdf((upper_bound - grid) / smoothing_sigma)
+                  - normal_dbn.cdf((lower_bound - grid) / smoothing_sigma))
+
+        weight *= normal_dbn.pdf((grid - basept) / sigma)
+
+        sel_distr = discrete_family(grid, weight)
+
+        use_sample = False
+        if use_sample:
+            rng = np.random.default_rng(0)
+            sample = rng.standard_normal(2000) * sigma + basept
+            weight_s = (normal_dbn.cdf((upper_bound - sample) / smoothing_sigma)
+                      - normal_dbn.cdf((lower_bound - sample) / smoothing_sigma))
+            sel_distr =  discrete_family(sample, weight_s)
+
+        L, U = sel_distr.equal_tailed_interval(estimate,
+                                               alpha=1-level)
+        mle, _, _ = sel_distr.MLE(estimate)
+        mle *= sigma**2; mle += estimate
+        pval = sel_distr.cdf(-basept / sigma**2, estimate)
+        pval = 2 * min(pval, 1-pval)
+        L *= sigma**2; L += basept
+        U *= sigma**2; U += basept
+
+    else:
+        warnings.warn('assuming data for selection is same as for inference -- using hard selection')
+        
+        lb = estimate - 20 * sigma
+        ub = estimate + 20 * sigma
+
+        if estimate < lower_bound or estimate > upper_bound:
+            warn('Constraints not satisfied: returning [-np.inf, np.inf]')
+            return -np.inf, np.inf, np.nan, np.nan
+        
+        def F(theta):
+            
+            Z = (estimate - theta) / sigma
+            Z_L = (lower_bound - theta) / sigma
+            Z_U = (upper_bound - theta) / sigma
+            num = _norm_interval(Z_L, Z)
+            den = _norm_interval(Z_L, Z_U)
+            if Z_L > 0 and den < 1e-10:
+                C = np.fabs(Z_L)
+                D = Z-Z_L
+                cdf = 1-np.exp(-C*D)
+            elif Z_U < 0 and den < 1e-10:
+                C = np.fabs(Z_U)
+                D = Z_U-Z
+                if C*D < 0:
+                    raise ValueError
+                cdf = np.exp(-C*D)
+            else:
+                cdf = num / den
+            return cdf
+            
+        
+        pval = F(0)
+        pval = 2 * min(pval, 1-pval)
+
+        lb = lower_bound - 20 * sigma
+        ub = upper_bound + 20 * sigma
+
+        alpha = 0.5 * (1 - level)
+        L = find_root(F, 1.0 - 0.5 * alpha, lb, ub)
+        if np.isnan(L):
+            L = -np.inf
+        U = find_root(F, 0.5 * alpha, lb, ub)
+        if np.isnan(U):
+            U = np.inf
+
+        mle = np.nan
+        
+    return L, U, mle, pval, sel_distr
+
+def score_inference(score,
+                    cov_score,
+                    lamval,
+                    prop=0.8,
+                    chol_cov=None,
+                    perturbation=None,
+                    rng=None):
+
+    # perturbation should be N(0, cov_score) roughly -- shouldn't have a multiplier for the proportion!
+
+    # this is the X for the LASSO problem
+    # X.T @ X = S = cov_score
+
+    if chol_cov is None:
+        chol_cov = X = np.linalg.cholesky(cov_score).T 
+
+    if rng is None:
+        rng = np.random.default_rng()
+
+    # shorthand
+    Z = score # X.T @ Y in OLS case
+    X = chol_cov 
+    p = X.shape[1]
+    
+    if perturbation is None:
+        perturbation = X.T @ rng.standard_normal(p) # X is square here...
+
+    # this is the Y of the LASSO problem
+
+    Y = scipy.linalg.solve_triangular(chol_cov.T, Z, lower=True)
+    Df = pd.DataFrame({'response':Y})
+
+    GN = GLMNet(response_id='response',
+                fit_intercept=False,
+                standardize=False)
+    GN.fit(X, Df)
+    
+    Z_sel = Z + np.sqrt((1 - prop) / prop) * perturbation
+    Y_sel = scipy.linalg.solve_triangular(chol_cov.T, Z_sel, lower=True)
+    Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
+    X_sel = np.sqrt(prop) * X
+
+    return lasso_inference(GN, 
+                           prop * lamval / p,
+                           (X_sel, Df_sel, None),
+                           (X, Df, None),
+                           dispersion=1)
+
+def resampler_inference(sample,
+                        lamval=None,
+                        lam_frac=1,
+                        prop=0.8,
+                        random_idx=None,
+                        rng=None,
+                        estimate=None,
+                        standardize=True):
+
+    if estimate is None:
+        estimate = sample.mean(0)
+
+    centered = sample - estimate[None,:]
+    B, p = centered.shape
+
+    if random_idx is None:
+        if rng is None:
+            rng = np.random.default_rng()
+        random_idx = rng.choice(B, 1)
+        
+    prec_score = centered.T @ centered / B
+    cov_score = np.linalg.inv(prec_score)
+
+    centered_scores = centered @ cov_score
+    scaling = centered_scores.std(0)
+    score = cov_score @ estimate
+
+    if standardize:
+        centered_scores /= scaling[None,:]
+        cov_score /= np.multiply.outer(scaling, scaling)
+        score /= scaling
+        
+    # pick a lam
+    if lamval is None:
+        max_scores = np.fabs(centered_scores).max(1)
+        lamval = lam_frac * np.median(max_scores)
+
+    # pick a pseudo-Gaussian perturbation
+    perturbation = centered_scores[random_idx].reshape((p,))
+    
+    df = score_inference(score=score,
+                         cov_score=cov_score,
+                         lamval=lamval,
+                         prop=prop,
+                         perturbation=perturbation)
+
+    if df is not None:
+        if standardize:
+            for col in ['mle', 'upper', 'lower']:
+                df[col].values[:] = df[col] / scaling[df.index]
+
+            for j in df.index:
+                df.loc[j,'TG'] = None
+                # XX try to correct this!!
+                # tg = df.loc[j, 'TG']
+                # tg_params = asdict(tg)
+                # tg_params.update(estimate=tg.estimate * scaling[j],
+                #                  sigma=tg.sigma * scaling[j],
+                #                  smoothing_sigma=tg.smoothing_sigma * scaling[j],
+                #                  lower_bound=tg.lower_bound * scaling[j],
+                #                  upper_bound=tg.upper_bound * scaling[j])
+                # df.loc[j,'TG'] = TruncatedGaussian(**tg_params)
+
+    return df
+
diff --git a/glmnet/paths/fastnet.py b/glmnet/paths/fastnet.py
index 5246565..248336f 100644
--- a/glmnet/paths/fastnet.py
+++ b/glmnet/paths/fastnet.py
@@ -11,6 +11,7 @@
 
 from ..base import _get_design
 from ..glmnet import GLMNet
+from ..glm import GLMState
 from ..elnet import (_check_and_set_limits,
                     _check_and_set_vp,
                     _design_wrapper_args)
@@ -61,7 +62,7 @@ def fit(self,
         else:
             self.feature_names_in_ = ['X{}'.format(i) for i in range(X.shape[1])]
 
-        X, y, response, offset, weight = self._check(X, y)
+        X, y, response, offset, weight = self.get_data_arrays(X, y)
 
         if not scipy.sparse.issparse(X):
             X = np.asfortranarray(X)
@@ -128,6 +129,10 @@ def fit(self,
         self.coefs_ = result['coefs']
         self.intercepts_ = result['intercepts']
             
+        if self.coefs_.ndim == 1:
+            self.state_ = GLMState(self.coefs_[-1],
+                                   self.intercepts_[-1])
+
         self.lambda_values_ = result['lambda_values']
         nfits = self.lambda_values_.shape[0]
         dev_ratios_ = self._fit['dev'][:nfits]
diff --git a/glmnet/paths/fishnet.py b/glmnet/paths/fishnet.py
index e638c6f..da28123 100644
--- a/glmnet/paths/fishnet.py
+++ b/glmnet/paths/fishnet.py
@@ -28,9 +28,12 @@ class FishNet(FastNetMixin):
     def __post_init__(self):
         self._family = GLMFamilySpec(base=sm_family.Poisson())
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
 
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+        X, y, response, offset, weight = super().get_data_arrays(X, y, check=check)
         if np.any(response < 0):
             raise ValueError("negative responses encountered;  not permitted for Poisson family")
         response = np.asarray(response, float).copy()
diff --git a/glmnet/paths/lognet.py b/glmnet/paths/lognet.py
index bdd4dfd..b783ec1 100644
--- a/glmnet/paths/lognet.py
+++ b/glmnet/paths/lognet.py
@@ -41,9 +41,12 @@ def _extract_fits(self,
         V = super()._extract_fits(X_shape, response_shape)
         return V
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
 
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+        X, y, response, offset, weight = super().get_data_arrays(X, y, check=check)
         encoder = LabelEncoder()
         labels = np.asfortranarray(encoder.fit_transform(response))
         self.classes_ = encoder.classes_
diff --git a/glmnet/paths/multiclassnet.py b/glmnet/paths/multiclassnet.py
index 676831c..05486e4 100644
--- a/glmnet/paths/multiclassnet.py
+++ b/glmnet/paths/multiclassnet.py
@@ -26,7 +26,7 @@
 @dataclass
 class MultiClassFamily(object):
 
-    def default_scorers(self):
+    def _default_scorers(self):
 
         return [accuracy_scorer,
                 misclass_scorer,
@@ -68,9 +68,9 @@ def _offset_predictions(self,
         value = exp_value / exp_value.sum(-1)[:,:,None]
         return value
 
-    def _check(self, X, y, check=True):
+    def get_data_arrays(self, X, y, check=True):
 
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+        X, y, response, offset, weight = super().get_data_arrays(X, y, check=check)
         encoder = OneHotEncoder(sparse_output=False)
         y_onehot = np.asfortranarray(encoder.fit_transform(response.reshape((-1,1))))
         self.categories_ = encoder.categories_[0]
diff --git a/glmnet/paths/multigaussnet.py b/glmnet/paths/multigaussnet.py
index 7f30c38..b2a3943 100644
--- a/glmnet/paths/multigaussnet.py
+++ b/glmnet/paths/multigaussnet.py
@@ -25,7 +25,7 @@
 @dataclass
 class MultiClassFamily(object):
 
-    def default_scorers(self):
+    def _default_scorers(self):
         return [mse_scorer, mae_scorer]
 
 @dataclass
@@ -49,8 +49,13 @@ def _extract_fits(self,
         self._fit['dev'] = self._fit['rsq']
         return super()._extract_fits(X_shape, response_shape)
 
-    def _check(self, X, y, check=True):
-        X, y, response, offset, weight = super()._check(X, y, check=check)
+    def get_data_arrays(self,
+                        X,
+                        y,
+                        check=True):
+        X, y, response, offset, weight = super().get_data_arrays(X,
+                                                                 y,
+                                                                 check=check)
         return X, y, np.asfortranarray(response), offset, weight
 
     def _wrapper_args(self,
diff --git a/glmnet/regularized_glm.py b/glmnet/regularized_glm.py
index 9d1e294..3cda87a 100644
--- a/glmnet/regularized_glm.py
+++ b/glmnet/regularized_glm.py
@@ -113,7 +113,7 @@ def newton_step(self,
         
         warm = (cur_state.coef,
                 cur_state.intercept,
-                cur_state.eta) # just X\beta -- doesn't include offset
+                cur_state.linear_predictor) # just X\beta -- doesn't include offset
 
         elnet_fit = self.elnet_estimator.fit(design,
                                              z,
@@ -141,18 +141,29 @@ class RegGLM(GLM,
 
     control: RegGLMControl = field(default_factory=RegGLMControl)
 
+    def get_LM(self):
+        return GLM(family=self.family,
+                   fit_intercept=self.fit_intercept,
+                   offset_id=self.offset_id,
+                   weight_id=self.weight_id,
+                   response_id=self.response_id)
+
     def _get_regularizer(self,
-                         X):
+                         nvars=None):
 
         # self.design_ will have been set by now
 
+        if nvars is None:
+            check_is_fitted(self, ["design_"])
+            nvars = self.design_.X.shape[1]
+
         return ElNetRegularizer(lambda_val=self.lambda_val,
                                 alpha=self.alpha,
                                 penalty_factor=self.penalty_factor,
                                 lower_limits=self.lower_limits * self.design_.scaling_,
                                 upper_limits=self.upper_limits * self.design_.scaling_,
                                 fit_intercept=self.fit_intercept,
-                                nvars=X.shape[1],
+                                nvars=nvars,
                                 control=self.control,
                                 exclude=self.exclude)
 
@@ -170,6 +181,7 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 3 options non sklearn API
+            warm_state=None,
             check=True,
             fit_null=True):
 
@@ -177,6 +189,7 @@ def fit(self,
                     y,
                     sample_weight=sample_weight,
                     regularizer=regularizer,
+                    warm_state=warm_state,
                     dispersion=1,
                     check=check)
 
@@ -213,6 +226,7 @@ def fit(self,
             y,
             sample_weight=None,           # ignored
             regularizer=None,             # last 4 options non sklearn API
+            warm_state=None,
             dispersion=1,
             check=True):
 
diff --git a/pyproject.toml b/pyproject.toml
index e819950..a6be16e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,7 @@ requires = ["setuptools>=42",
             "matplotlib>=3.3.3",
             "versioneer",
 	    "statsmodels",
+	    "mpmath",
 	    "coxdev"]
 build-backend = "setuptools.build_meta"
 
@@ -41,3 +42,11 @@ dynamic = ['version']
 [project.license]
 file = 'LICENSE.txt'
 
+[tool.versioneer]
+VCS = "git"
+style = "pep440"
+versionfile_source = "glmnet/_version.py"
+versionfile_build = "glmnet/_version.py"
+tag_prefix = "v"
+parentdir_prefix = "glmnet-"
+
diff --git a/setup.py b/setup.py
index e4ca64a..2f42b2c 100644
--- a/setup.py
+++ b/setup.py
@@ -43,9 +43,10 @@
                                                    'gaussnet',
                                                    'multigaussnet']]
 
+long_description = open('README.md', 'rt', encoding='utf-8').read()
+
 def main(**extra_args):
     setup(name='glmnet',
-          url="http://github.org/intro-stat-learning/glmnetpy",
           version=versioneer.get_version(),
           packages = ['glmnet',
                       'glmnet.paths'],
@@ -54,7 +55,7 @@ def main(**extra_args):
           include_package_data=True,
           data_files=[],
           scripts=[],
-          #long_description=long_description,
+          long_description=long_description,
           long_description_content_type=long_description_content_type,
           cmdclass = cmdclass,
           **extra_args
diff --git a/tests/flex/__init__.py b/tests/flex/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/paths/__init__.py b/tests/paths/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_design.py b/tests/test_design.py
index af6c895..2c7dada 100644
--- a/tests/test_design.py
+++ b/tests/test_design.py
@@ -4,6 +4,7 @@
 import statsmodels.api as sm
 
 from glmnet.base import Design
+from glmnet.glm import GLMState
 rng = np.random.default_rng(0)
 
 n, p, q = 100, 5, 3
@@ -86,55 +87,116 @@ def test_design(X, weights, standardize, intercept):
     assert np.allclose(design.T @ V_l, design_s.T @ V_l)
     
 
-# @pytest.mark.parametrize('X', [rng.standard_normal((n, p)),
-#                                scipy.sparse.csc_array(rng.standard_normal((n, p)))] )
-# @pytest.mark.parametrize('weights', [np.ones(n), rng.uniform(1, 2, size=(n,))])
-# @pytest.mark.parametrize('standardize', [True, False])
-# @pytest.mark.parametrize('intercept', [True, False])
-# @pytest.mark.parametrize('gls', [None,
-#                                  rng.uniform(1, 2, size=(n,)),
-#                                  rng.uniform(1, 2, size=(n,n))])
-# def test_quadratic_form(X, weights, standardize, intercept, gls):
-#     """
-#     test the linear / adjoint maps of Design from an np.ndarray and a scipy.sparse.csc_array
-#     """
+@pytest.mark.parametrize('X', [rng.standard_normal((n, p)),
+                               scipy.sparse.csc_array(rng.standard_normal((n, p)))] )
+@pytest.mark.parametrize('weights', [np.ones(n), rng.uniform(1, 2, size=(n,))])
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+@pytest.mark.parametrize('gls', [None,
+                                 np.diag(rng.uniform(1, 2, size=(n,))),
+                                 rng.uniform(1, 2, size=(n,n))])
+def test_quadratic_form(X, weights, standardize, intercept, gls):
+    """
+    test the linear / adjoint maps of Design from an np.ndarray and a scipy.sparse.csc_array
+    """
+    
+    if gls is not None and gls.ndim == 2:
+        gls = (gls + gls.T) / 2
+
+    X_s = scipy.sparse.csc_array(X)
+    W = weights
+    design = Design(X, W, standardize=standardize)
+    design_s = Design(X_s, W, standardize=standardize)
+
+    columns = [0,1,3]
+    Q_s = design_s.quadratic_form(G=gls, columns=columns, transformed=standardize)
+    Q = design.quadratic_form(G=gls, columns=columns, transformed=standardize)
+
+    X_eff = scipy.sparse.csc_array(X).toarray()
+
+    if standardize:
+        xm = (X_eff * W[:,None]).sum(0) / W.sum()
+        x2 = (X_eff**2 * W[:,None]).sum(0) / W.sum()
+        xs = np.sqrt(x2 - xm**2)
+    else:
+        xm = np.zeros(p)
+        xs = np.ones(p)
+
+    X_eff = X_eff / xs[None,:] - np.multiply.outer(np.ones(n), xm / xs)
+    X_eff = np.concatenate([np.ones((n,1)), X_eff], axis=1)
+
+    if columns is not None:
+        columns = np.array(columns)
+        columns += 1
+        columns = np.hstack([[0], columns])
+
+    if gls is None:
+        Q_eff = X_eff.T @ X_eff
+    elif gls.ndim == 1:
+        Q_eff = X_eff.T @ (gls[:, None] * X_eff)
+    else:
+        Q_eff = X_eff.T @ gls @ X_eff
+    if columns is not None:
+        Q_eff = Q_eff[:, columns]
+    assert np.allclose(Q, Q_eff)  # calculation is done correctly
+
+    assert np.allclose(Q_s, Q) # sparse and dense agree
+
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+def test_unscaler(intercept,
+                  standardize):
+
+    X = np.random.standard_normal((20,5))
+    D = Design(X, intercept=True, standardize=True)
+    M = D.unscaler_ @ np.identity(6)
+    MT = D.unscaler_.T @ np.identity(6) 
+    assert np.allclose(MT, M.T)
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+def test_scaler(intercept,
+                  standardize):
+
+    X = np.random.standard_normal((20,5))
+    D = Design(X, intercept=True, standardize=True)
+    M = D.scaler_ @ np.identity(6)
+    MT = D.scaler_.T @ np.identity(6) 
+    assert np.allclose(MT, M.T)
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+def test_scaler_unscaler_inv(intercept,
+                             standardize):
+
+    X = np.random.standard_normal((20,5))
+    D = Design(X, intercept=True, standardize=True)
+    M = D.unscaler_ @ np.identity(6)
+    MI = D.scaler_ @ np.identity(6) 
+    assert np.allclose(M @ MI, np.identity(6))
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('intercept', [True, False])
+def test_scaler_to_raw_conversion(intercept,
+                                  standardize):
+
+    X = np.random.standard_normal((20,5))
+    D = Design(X, intercept=True, standardize=True)
+    raw_state = GLMState(coef=np.random.standard_normal(5),
+                         intercept=np.random.standard_normal())
+    scaled_state = D.raw_to_scaled(raw_state)
+    raw_state_roundtrip = D.scaled_to_raw(scaled_state)
+
+    assert np.allclose(raw_state._stack,
+                       raw_state_roundtrip._stack)
+    
+    scaled_state = GLMState(coef=np.random.standard_normal(5),
+                            intercept=np.random.standard_normal())
+    raw_state = D.scaled_to_raw(scaled_state)
+    scaled_state_roundtrip = D.raw_to_scaled(raw_state)
+
+    assert np.allclose(scaled_state._stack,
+                       scaled_state_roundtrip._stack)
+    
     
-#     if gls is not None and gls.ndim == 2:
-#         gls = (gls + gls.T) / 2
-#     X_s = scipy.sparse.csc_array(X)
-#     design = Design(X, W, standardize=stand)
-#     design_s = Design(X_s, W, standardize=stand)
-
-#     Q_s = design_s.quadratic_form(G=gls, columns=columns)
-#     Q = design.quadratic_form(G=gls, columns=columns)
-
-#     X_eff = scipy.sparse.csc_array(X).toarray()
-#     if stand:
-#         xm = (X_eff * W[:,None]).sum(0) / W.sum()
-#         x2 = (X_eff**2 * W[:,None]).sum(0) / W.sum()
-#         xs = np.sqrt(x2 - xm**2)
-#     else:
-#         xm = np.zeros(p)
-#         xs = np.ones(p)
-
-#     X_eff = X_eff / xs[None,:] - np.multiply.outer(np.ones(n), xm / xs)
-#     X_eff = np.concatenate([np.ones((n,1)), X_eff], axis=1)
-
-#     if columns is not None:
-#         columns = np.array(columns)
-#         columns += 1
-#         columns = np.hstack([[0], columns])
-
-#     if gls is None:
-#         Q_eff = X_eff.T @ X_eff
-#     elif gls.ndim == 1:
-#         Q_eff = X_eff.T @ (gls[:, None] * X_eff)
-#     else:
-#         Q_eff = X_eff.T @ gls @ X_eff
-#     if columns is not None:
-#         Q_eff = Q_eff[:, columns]
-#     assert np.allclose(Q, Q_eff)  # calculation is done correctly
-
-#     assert np.allclose(Q_s, Q) # sparse and dense agree
-
-        
diff --git a/tests/test_inference.py b/tests/test_inference.py
new file mode 100644
index 0000000..d9430e9
--- /dev/null
+++ b/tests/test_inference.py
@@ -0,0 +1,472 @@
+import pytest
+
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+
+import scipy.sparse
+
+from glmnet import GLMNet
+from glmnet.inference import (lasso_inference,
+                              score_inference,
+                              resampler_inference,
+                              TruncatedGaussian)
+
+def test_Auto():
+
+    df = sm.datasets.get_rdataset('Auto', package='ISLR').data
+    df = df.set_index('name')
+    X = np.asarray(df.drop(columns=['mpg']))
+    y = np.asarray(df['mpg'])
+
+    GN = GLMNet(response_id='response',
+                fit_intercept=True,
+                standardize=True)
+
+    Df = pd.DataFrame({'response':y})
+
+    GN.fit(X, Df)
+    prop = 0.8
+    n =	df.shape[0]
+    m = int(prop*n)
+
+    df = lasso_inference(GN,
+                         2 / n, # GN.lambda_values_[min(10, GN.lambda_values_.shape[0]-1)],
+                         (X[:m], Df.iloc[:m], None),
+                         (X, Df, None))
+
+def sample_orthogonal(rng=None,
+                      p=50,
+                      s=5,
+                      penalty_facs=False,
+                      alt=True,
+                      prop=0.8):
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+
+    X = np.identity(p)
+    Y = rng.standard_normal(p) 
+    if alt:
+        beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+    mu = beta
+    Y += mu
+    Df = pd.DataFrame({'response':Y})
+
+    penalty_factor = np.ones(p)
+    if penalty_facs:
+        penalty_factor[:p//10] = 0.5
+        penalty_factor[p//10:2*p//10] = 0
+        
+    penalty_factor *= p / penalty_factor.sum()
+
+    GN = GLMNet(response_id='response',
+                fit_intercept=False,
+                standardize=False,
+                penalty_factor=penalty_factor)
+
+    lamval = 2
+    Y_sel = Y + np.sqrt((1 - prop) / prop) * rng.standard_normal(Y.shape)
+    active_naive = np.nonzero(np.fabs(Y_sel) > lamval)[0]
+
+    Df_sel = pd.DataFrame({'response':Y_sel * np.sqrt(prop)})
+    X_sel = np.sqrt(prop) * X
+    GN.fit(X, Df)
+    active_naive = np.nonzero(np.fabs(Y_sel) > lamval)[0]
+
+    pivots = []
+    test_pvals = []
+
+    if active_naive.shape[0] > 0:
+        df = lasso_inference(GN, 
+                             prop * lamval / p,
+                             (X_sel, Df_sel, None),
+                             (X, Df, None),
+                             dispersion=1)
+        df['target'] = mu[df.index]
+
+        signs = np.sign(Y_sel[df.index])
+        assert np.all(Y_sel[df.index] * signs >= lamval * GN.penalty_factor[df.index])
+
+    if df is not None:
+        for j, s in zip(df.index, signs):
+            if s == 1:
+                upper_bound = np.inf
+                if penalty_factor is not None:
+                    lower_bound = lamval * penalty_factor[j]
+                else:
+                    lower_bound = lamval
+            else:
+                if penalty_factor is not None:
+                    upper_bound = -lamval * penalty_factor[j]
+                else:
+                    upper_bound = -lamval
+                lower_bound = -np.inf
+
+            tg = df.loc[j,'TG']
+
+            TG = TruncatedGaussian(estimate=Y[j],
+                                   sigma=1,
+                                   smoothing_sigma=np.sqrt((1 - prop) / prop),
+                                   lower_bound=lower_bound,
+                                   upper_bound=upper_bound,
+                                   level=0.90)
+            pval = TG.pvalue()
+            pivots.append(TG.pvalue(null_value=df.loc[j,'target']))
+            test_pvals.append(TG.pvalue(null_value=0))
+            assert np.allclose(pval, df.loc[j]['pval'])
+        df['pivot_byhand'] = pivots
+        df['pval_byhand'] = test_pvals
+        assert np.allclose(df['pval'], df['pval_byhand'])
+
+    if df is not None:
+        df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
+        assert np.allclose(df['pivot'], df['pivot_byhand'])        
+
+    return df, None
+
+def resample_orthogonal(rng=None,
+                        p=50,
+                        s=5,
+                        alt=True,
+                        prop=0.8,
+                        standardize=True,
+                        B=2000):
+
+    # when standardize is True, coverage will be fine but pivot won't be
+    # this is because the TruncatedGaussian instance is not corrected for scale
+
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+    if alt:
+        beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+
+    scale = np.linspace(1, 3, p)
+    beta_hat = beta + rng.standard_normal(p) * scale
+    bootstrap_noise = rng.standard_normal((B, p)) * scale[None,:]
+    sample = bootstrap_noise + beta_hat[None,:]
+
+    df = resampler_inference(sample,
+                             prop=prop,
+                             standardize=standardize)
+    if df is not None:
+        df['target'] = beta[df.index]
+        if not standardize:
+            df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j,'target']) for j in df.index]
+        else:
+            # scaling transformation not taken into account so cannot reuse TruncatedGaussian object
+            df['pivot'] = np.nan * df['pval']
+        return df
+
+def sample_AR1(rho=0.6,               
+               rng=None,
+               p=100,
+               s=5,
+               alt=True,
+               prop=0.8,
+               dispersion=2):
+
+    D = np.fabs(np.subtract.outer(np.arange(p), np.arange(p)))
+    dispersion = 2
+    S = (rho**D) * dispersion
+
+    return sample_cov(S,
+                      rng=rng,
+                      p=p,
+                      s=s,
+                      alt=alt,
+                      prop=prop)
+
+def resample_AR1(rng=None,
+                 p=50,
+                 rho=0.6,
+                 s=5,
+                 alt=True,
+                 prop=0.8,
+                 standardize=True,
+                 B=2000):
+
+    # when standardize is True, coverage will be fine but pivot won't be
+    # this is because the TruncatedGaussian instance is not corrected for scale
+
+    D = np.fabs(np.subtract.outer(np.arange(p), np.arange(p)))
+    dispersion = 2
+    S_init = (rho**D) * dispersion
+    S = np.linalg.inv(S_init)
+    
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+    if alt:
+        beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+
+    S_sqrt = np.linalg.cholesky(S)
+    beta_hat = beta + S_sqrt @ rng.standard_normal(p) 
+    bootstrap_noise = rng.standard_normal((B, p)) @ S_sqrt.T
+    sample = bootstrap_noise + beta_hat[None,:]
+
+    S_i = np.linalg.inv(S)
+    mu = S_i @ beta
+
+    df = resampler_inference(sample,
+                             prop=prop,
+                             standardize=standardize)
+    if df is not None:
+        prec_E = S_i[df.index][:,df.index]
+        df['target'] = np.linalg.inv(prec_E) @ mu[df.index]
+        if not standardize:
+            df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j,'target']) for j in df.index]
+        else:
+            # scaling transformation not taken into account so cannot reuse TruncatedGaussian object
+            df['pivot'] = np.nan * df['pval']
+        return df, None
+
+def sample_cov(S,
+               rng=None,
+               p=100,
+               s=5,
+               alt=True,
+               prop=0.8,
+               lamval=3):
+
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+
+    S_sqrt = X = np.linalg.cholesky(S).T  # X.T @ X = S
+
+    noise = X.T @ rng.standard_normal(p) 
+    if alt:
+        beta[subs] = rng.standard_normal(s) * 2 + 3 * rng.choice([1,-1])
+    mu = S @ beta
+
+    Z = mu + noise
+    perturbation = X.T @ rng.standard_normal(p)
+    
+    df = score_inference(score=Z,
+                         cov_score=S,
+                         lamval=lamval,
+                         prop=prop,
+                         chol_cov=S_sqrt,
+                         perturbation=perturbation)
+    if df is not None:
+        active = list(df.index)
+        df['target'] = np.linalg.inv(S[active][:,active]) @ mu[active]
+
+    if df is not None:
+        df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
+        
+    return df, {'score':Z,'cov_score':S, 'lamval':lamval, 'prop':prop, 'chol_cov':S_sqrt, 'perturbation':perturbation}
+
+def sample_randomX(n,
+                   p,
+                   fit_intercept,
+                   standardize,
+                   rng=None,
+                   alt=False,
+                   s=10,
+                   snr=1,
+                   prop=0.8,
+                   penalty_facs=False,
+                   cv=True,
+                   upper_limits=np.inf):
+
+    if rng is None:
+        rng = np.random.default_rng(0)
+    beta = np.zeros(p)
+    subs = rng.choice(p, s, replace=False)
+
+    X = rng.standard_normal((n, p))
+    D = np.ones(p) # np.linspace(1, p, p) / p + 1
+    X *= D[None,:]
+    sd = np.fabs(1+np.random.standard_normal())
+    Y = rng.standard_normal(n) * sd
+    if alt:
+        beta[subs] = snr * (rng.uniform(3, 5) * rng.choice([-1,1], size=s, replace=True) / np.sqrt(n))
+
+    mu = X @ beta
+    Y += mu
+
+    if penalty_facs:
+        penalty_factor = np.ones(p)
+        penalty_factor[:10] = 0.5
+    else:
+        penalty_factor = None
+
+    GN = GLMNet(response_id='response',
+                fit_intercept=fit_intercept,
+                standardize=standardize,
+                penalty_factor=penalty_factor,
+                upper_limits=upper_limits)
+
+    m = int(prop*n)
+
+    Df = pd.DataFrame({'response':Y})
+    GN.fit(X[:m], Df.iloc[:m]) 
+
+    if cv:
+        GN.cross_validation_path(X[:m], Df.iloc[:m])
+        lamval = GN.index_best_['Mean Squared Error']
+    else:
+        eps = rng.standard_normal((X.shape[0], 1000)) * sd
+        noise_score = X.T @ eps
+        lamval = 1.2 * np.median(np.fabs(noise_score).max(0)) / X.shape[0]
+        
+    df = lasso_inference(GN, 
+                         lamval,
+                         (X[:m], Df.iloc[:m], None),
+                         (X, Df, None))
+
+    if df is not None:
+        if fit_intercept:
+            active_set = np.array(df.index[1:]).astype(int)
+        else:
+            active_set = np.array(df.index).astype(int)
+        X_sel = X[:,active_set]
+
+        if fit_intercept:
+            X_sel = np.column_stack([np.ones(X_sel.shape[0]), X_sel])
+        targets = np.linalg.pinv(X_sel) @ mu
+
+        df['target'] = targets
+            
+    if df is not None:
+        df['pivot'] = [df.loc[j,'TG'].pvalue(df.loc[j, 'target']) for j in df.index]
+        
+    return df, None
+
+@pytest.mark.parametrize('standardize', [True, False])
+@pytest.mark.parametrize('penalty_facs', [True, False])
+@pytest.mark.parametrize('fit_intercept', [True, False])
+#@pytest.mark.parametrize('upper_limits', [np.inf, 0.1])
+@pytest.mark.parametrize('p', [103])
+@pytest.mark.parametrize('n', [500])
+def test_randomX(n,
+                 p,
+                 standardize,
+                 fit_intercept,
+                 penalty_facs,
+                 upper_limits=np.inf):
+
+    for _ in range(5):
+        df = None
+        while df is None: # make sure it is run
+            kwargs = dict(n=n,
+                          p=p,
+                          standardize=standardize,
+                          fit_intercept=fit_intercept,
+                          penalty_facs=penalty_facs,
+                          cv=False,
+                          upper_limits=upper_limits)
+
+            df = main(sample_randomX,
+                      kwargs,
+                      ntrial=1)
+
+def test_resampler(p=50,
+                   standardize=True):
+
+    for _ in range(5):
+        df = None
+        while df is None: # make sure it is run
+            df = main(resample_orthogonal,
+                      {'p':p},
+                      ntrial=1)
+
+
+def test_orthogonal(p=100):
+
+    for _ in range(5):
+        df = None
+        while df is None: # make sure it is run
+            df = main(sample_orthogonal,
+                      {'p':p},
+                      ntrial=1)
+
+
+def test_AR1(p=100,
+             rho=0.6):
+
+    for _ in range(5):
+        df = None
+        while df is None: # make sure it is run
+            df = sample_AR1(p=p,
+                            rho=rho)
+
+            
+
+def main(sampler,
+         kwargs,
+         ntrial=500):
+
+    ncover = 0
+    nsel = 0
+
+    dfs = []
+
+    rng = np.random.default_rng(0)
+    kwargs.update(rng=rng)
+    
+    for i in range(ntrial):
+        df, _ = sampler(**kwargs)
+        print(kwargs)
+        print(rng.standard_normal())
+        if df is not None:
+            dfs.append(df)
+        if len(dfs) > 0:
+            all_df = pd.concat(dfs)
+            ncover += ((all_df['lower'] < all_df['target']) & (all_df['upper'] > all_df['target'])).sum()
+            nsel += all_df.shape[0]
+
+            print('cover:', ncover / nsel,
+                  'power:', (all_df['pval'] < 0.05).mean(),
+                  'pivot<0.05:', (all_df['pivot'] < 0.05).mean(),
+                  'std(pivot):', all_df['pivot'].std())
+
+    if len(dfs) > 0:
+        return all_df
+        
+def test_truncated_inference(B=1000,
+                             smoothing_sigma=np.sqrt(1/3),
+                             sigma=1,
+                             upper_bound=None,
+                             lower_bound=None,
+                             alt=False,
+                             level=0.9):
+
+    pvals = []
+    cover = []
+
+    rng = np.random.default_rng(0)
+    
+    for _ in range(B):
+        mu = rng.standard_normal() + 3
+        if not alt:
+            mu *= 0
+        if lower_bound is None or upper_bound is None:
+            lower_bound = rng.uniform(0, 2)
+            upper_bound = lower_bound + rng.uniform(3, 5)
+        while True:
+            Z = rng.standard_normal() * sigma
+            Z += mu
+            Z_noisy = Z + rng.standard_normal() * smoothing_sigma
+            if (Z_noisy > lower_bound) and (Z_noisy < upper_bound):
+                TG = TruncatedGaussian(estimate=Z,
+                                       sigma=sigma,
+                                       smoothing_sigma=smoothing_sigma,
+                                       lower_bound=lower_bound,
+                                       upper_bound=upper_bound,
+                                       level=level)
+                
+                (L, U), mle, pval = (TG.interval(), TG.MLE(), TG.pvalue())
+
+                cover.append((L<mu) * (U>mu))
+                pvals.append(pval)
+                print(np.mean(cover), np.mean(np.array(pvals) < 0.05))
+                break
+