From 4b08bc532fde4f8b2c1e25c735438ef3720e5b40 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Mon, 19 Dec 2016 17:32:44 -0800
Subject: [PATCH 001/617] icml started

---
 selection/bayesian/ci_via_approx_density.py |  437 +++++++
 selection/bayesian/test_conditional_prob.py |    6 +-
 selection/distributions/api.py              |    1 +
 selection/distributions/intervals.py        |  188 +++
 selection/randomized/M_estimator.py         |  281 +++--
 selection/randomized/glm.py                 |  226 +++-
 selection/randomized/query.py               | 1164 +++++++++++++++++++
 selection/randomized/randomization.py       |  244 +++-
 8 files changed, 2347 insertions(+), 200 deletions(-)
 create mode 100644 selection/bayesian/ci_via_approx_density.py
 create mode 100644 selection/distributions/intervals.py
 create mode 100644 selection/randomized/query.py

diff --git a/selection/bayesian/ci_via_approx_density.py b/selection/bayesian/ci_via_approx_density.py
new file mode 100644
index 000000000..b10095ffd
--- /dev/null
+++ b/selection/bayesian/ci_via_approx_density.py
@@ -0,0 +1,437 @@
+import time
+import numpy as np
+import regreg.api as rr
+from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
+from scipy.stats import norm
+from selection.randomized.M_estimator import M_estimator
+from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
+
+def myround(a, decimals=1):
+    a_x = np.round(a, decimals=1)* 10.
+    rem = np.zeros(a.shape[0], bool)
+    rem[(np.remainder(a_x, 2) == 1)] = 1
+    a_x[rem] = a_x[rem] + 1.
+    return a_x/10.
+
+
+class neg_log_cube_probability(rr.smooth_atom):
+    def __init__(self,
+                 q, #equals p - E in our case
+                 lagrange,
+                 randomization_scale = 1., #equals the randomization variance in our case
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+
+        self.randomization_scale = randomization_scale
+        self.lagrange = lagrange
+        self.q = q
+
+        rr.smooth_atom.__init__(self,
+                                (self.q,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=None,
+                                coef=coef)
+
+    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
+
+        arg = self.apply_offset(arg)
+
+        arg_u = (arg + self.lagrange)/self.randomization_scale
+        arg_l = (arg - self.lagrange)/self.randomization_scale
+        prod_arg = np.exp(-(2. * self.lagrange * arg)/(self.randomization_scale**2))
+        neg_prod_arg = np.exp((2. * self.lagrange * arg)/(self.randomization_scale**2))
+        cube_prob = norm.cdf(arg_u) - norm.cdf(arg_l)
+        log_cube_prob = -np.log(cube_prob).sum()
+        threshold = 10 ** -10
+        indicator = np.zeros(self.q, bool)
+        indicator[(cube_prob > threshold)] = 1
+        positive_arg = np.zeros(self.q, bool)
+        positive_arg[(arg>0)] = 1
+        pos_index = np.logical_and(positive_arg, ~indicator)
+        neg_index = np.logical_and(~positive_arg, ~indicator)
+        log_cube_grad = np.zeros(self.q)
+        log_cube_grad[indicator] = (np.true_divide(-norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
+                                        cube_prob[indicator]))/self.randomization_scale
+
+        log_cube_grad[pos_index] = ((-1. + prod_arg[pos_index])/
+                                     ((prod_arg[pos_index]/arg_u[pos_index])-
+                                      (1./arg_l[pos_index])))/self.randomization_scale
+
+        log_cube_grad[neg_index] = ((arg_u[neg_index] -(arg_l[neg_index]*neg_prod_arg[neg_index]))
+                                    /self.randomization_scale)/(1.- neg_prod_arg[neg_index])
+
+
+        if mode == 'func':
+            return self.scale(log_cube_prob)
+        elif mode == 'grad':
+            return self.scale(log_cube_grad)
+        elif mode == 'both':
+            return self.scale(log_cube_prob), self.scale(log_cube_grad)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+
+class approximate_conditional_prob_E(rr.smooth_atom):
+
+    def __init__(self,
+                 t, #point at which density is to computed
+                 approx_density,
+                 coef = 1.,
+                 offset= None,
+                 quadratic= None):
+
+        self.t = t
+        self.AD = approx_density
+        self.q = self.AD.p - self.AD.nactive
+        self.inactive_conjugate = self.active_conjugate = approx_density.randomization.CGF_conjugate
+
+        if self.active_conjugate is None:
+            raise ValueError(
+                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates')
+
+        lagrange = []
+        for key, value in self.AD.penalty.weights.iteritems():
+            lagrange.append(value)
+        lagrange = np.asarray(lagrange)
+
+        self.inactive_lagrange = lagrange[~self.AD._overall]
+        self.active_lagrange = lagrange[self.AD._overall]
+
+        rr.smooth_atom.__init__(self,
+                                (self.AD.nactive,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=self.AD.feasible_point,
+                                coef=coef)
+
+        self.coefs[:] = self.AD.feasible_point
+        self.B_active = self.AD.opt_linear_term[:self.AD.nactive, :self.AD.nactive]
+        self.B_inactive = self.AD.opt_linear_term[self.AD.nactive:, :self.AD.nactive]
+
+        self.nonnegative_barrier = nonnegative_softmax_scaled(self.AD.nactive)
+
+
+    def sel_prob_smooth_objective(self, param, j, mode='both', check_feasibility=False):
+
+        param = self.apply_offset(param)
+        index = np.zeros(self.AD.nactive, bool)
+        index[j] = 1
+        data = np.squeeze(self.t * self.AD.target_linear_term[:, index]) \
+               + self.AD.target_linear_term[:, ~index].dot(self.AD.target_observed[~index])
+
+        offset_active = self.AD.opt_affine_term[:self.AD.nactive] + self.AD.null_statistic[:self.AD.nactive] + data[:self.AD.nactive]
+
+        offset_inactive = self.AD.null_statistic[self.AD.nactive:] + data[self.AD.nactive:]
+
+        active_conj_loss = rr.affine_smooth(self.active_conjugate,
+                                            rr.affine_transform(self.B_active, offset_active))
+
+        cube_obj = neg_log_cube_probability(self.q, self.inactive_lagrange, randomization_scale = 1.)
+
+        cube_loss = rr.affine_smooth(cube_obj, rr.affine_transform(self.B_inactive, offset_inactive))
+
+        total_loss = rr.smooth_sum([active_conj_loss,
+                                    cube_loss,
+                                    self.nonnegative_barrier])
+
+        if mode == 'func':
+            f = total_loss.smooth_objective(param, 'func')
+            return self.scale(f)
+        elif mode == 'grad':
+            g = total_loss.smooth_objective(param, 'grad')
+            return self.scale(g)
+        elif mode == 'both':
+            f, g = total_loss.smooth_objective(param, 'both')
+            return self.scale(f), self.scale(g)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+    def minimize2(self, j, step=1, nstep=30, tol=1.e-6):
+
+        current = self.coefs
+        current_value = np.inf
+
+        objective = lambda u: self.sel_prob_smooth_objective(u, j, 'func')
+        grad = lambda u: self.sel_prob_smooth_objective(u, j, 'grad')
+
+        for itercount in range(nstep):
+            newton_step = grad(current)
+
+            # make sure proposal is feasible
+
+            count = 0
+            while True:
+                count += 1
+                proposal = current - step * newton_step
+                #print("current proposal and grad", proposal, newton_step)
+                if np.all(proposal > 0):
+                    break
+                step *= 0.5
+                if count >= 40:
+                    #print(proposal)
+                    raise ValueError('not finding a feasible point')
+
+            # make sure proposal is a descent
+
+            count = 0
+            while True:
+                proposal = current - step * newton_step
+                proposed_value = objective(proposal)
+                #print(current_value, proposed_value, 'minimize')
+                if proposed_value <= current_value:
+                    break
+                step *= 0.5
+
+            # stop if relative decrease is small
+
+            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+                current = proposal
+                current_value = proposed_value
+                break
+
+            current = proposal
+            current_value = proposed_value
+
+            if itercount % 4 == 0:
+                step *= 2
+
+        # print('iter', itercount)
+        value = objective(current)
+
+        return current, value
+
+class approximate_conditional_density_E(rr.smooth_atom, M_estimator):
+
+    def __init__(self, loss, epsilon, penalty, randomization,
+                 coef=1.,
+                 offset=None,
+                 quadratic=None,
+                 nstep=10):
+
+        M_estimator.__init__(self, loss, epsilon, penalty, randomization)
+
+        rr.smooth_atom.__init__(self,
+                                (1,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                coef=coef)
+
+    def solve_approx(self):
+
+        self.Msolve()
+        self.feasible_point = np.abs(self.initial_soln[self._overall])
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self._overall,
+                                              beta_full=self._beta_full,
+                                              inactive=~self._overall)[0]
+
+        score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), bootstrap_score)
+
+        nactive = self._overall.sum()
+
+        Sigma_D_T = score_cov[:, :nactive]
+        Sigma_T = score_cov[:nactive, :nactive]
+        Sigma_T_inv = np.linalg.inv(Sigma_T)
+
+        score_linear_term = self.score_transform[0]
+        (self.opt_linear_term, self.opt_affine_term) = self.opt_transform
+
+        # decomposition
+        #print(self.opt_affine_term[nactive:])
+        target_linear_term = (score_linear_term.dot(Sigma_D_T)).dot(Sigma_T_inv)
+
+        # observed target and null statistic
+        target_observed = self.observed_score_state[:nactive]
+        null_statistic = (score_linear_term.dot(self.observed_score_state))-(target_linear_term.dot(target_observed))
+
+        (self.target_linear_term, self.target_observed, self.null_statistic) \
+            = (target_linear_term, target_observed, null_statistic)
+        self.nactive = nactive
+
+        #defining the grid on which marginal conditional densities will be evaluated
+        grid_length = 120
+        self.grid = np.linspace(-4, 8, num=grid_length)
+        #s_obs = np.round(self.target_observed, decimals =1)
+
+        print("observed values", target_observed)
+        self.ind_obs = np.zeros(nactive, int)
+        self.norm = np.zeros(nactive)
+        self.h_approx = np.zeros((nactive, self.grid.shape[0]))
+
+        for j in range(nactive):
+            obs = target_observed[j]
+            self.norm[j] = Sigma_T[j,j]
+            if obs < self.grid[0]:
+                self.ind_obs[j] = 0
+            elif obs > np.max(self.grid):
+                self.ind_obs[j] = grid_length
+            else:
+                self.ind_obs[j] = np.argmin(np.abs(self.grid-obs))
+
+                #self.ind_obs[j] = (np.where(self.grid == obs)[0])[0]
+            self.h_approx[j, :] = self.approx_conditional_prob(j)
+
+
+    def approx_conditional_prob(self, j):
+        h_hat = []
+
+        for i in range(self.grid.shape[0]):
+
+            approx = approximate_conditional_prob_E(self.grid[i], self)
+            h_hat.append(-(approx.minimize2(j, nstep=50)[::-1])[0])
+
+        return np.array(h_hat)
+
+
+    def area_normalized_density(self, j, mean):
+
+        normalizer = 0.
+
+        approx_nonnormalized = []
+        for i in range(self.grid.shape[0]):
+            approx_density = np.exp(-np.true_divide((self.grid[i] - mean) ** 2, 2 * self.norm[j])
+                                    + (self.h_approx[j,:])[i])
+
+            normalizer += approx_density
+
+            approx_nonnormalized.append(approx_density)
+
+        return np.cumsum(np.array(approx_nonnormalized / normalizer))
+
+    def approximate_ci(self, j):
+
+        param_grid = np.round(np.linspace(-5, 10, num=151), decimals=1)
+
+        area = np.zeros(param_grid.shape[0])
+
+        for k in range(param_grid.shape[0]):
+
+            area_vec = self.area_normalized_density(j, param_grid[k])
+            area[k] = area_vec[self.ind_obs[j]]
+
+        region = param_grid[(area >= 0.05) & (area <= 0.95)]
+
+        if region.size > 0:
+            return np.nanmin(region), np.nanmax(region)
+        else:
+            return 0, 0
+
+
+
+def test_approximate_ci_E(n=200, p=10, s=5, snr=5, rho=0.1,
+                          lam_frac=1.,
+                          loss='gaussian'):
+
+    from selection.tests.instance import logistic_instance, gaussian_instance
+    from selection.randomized.api import randomization
+
+    if loss == "gaussian":
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+        loss = rr.glm.gaussian(X, y)
+    elif loss == "logistic":
+        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
+        loss = rr.glm.logistic(X, y)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
+
+    # randomizer = randomization.isotropic_gaussian((p,), scale=sigma)
+
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    # W[0] = 0 # use at least some unpenalized
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    randomization = randomization.isotropic_gaussian((p,), 1.)
+    ci = approximate_conditional_density_E(loss, epsilon, penalty, randomization)
+
+    ci.solve_approx()
+    print("nactive", ci._overall.sum())
+    active_set = np.asarray([i for i in range(p) if ci._overall[i]])
+
+    true_support = np.asarray([i for i in range(p) if i < s])
+
+    nactive = ci.nactive
+
+    print("active set, true_support", active_set, true_support)
+
+    #truth = np.round((np.linalg.pinv(X_1[:, active])).dot(X_1[:, active].dot(true_beta[active])))
+    truth = beta[ci._overall]
+
+    print("true coefficients", truth)
+
+    if (set(active_set).intersection(set(true_support)) == set(true_support))== True:
+
+        ci_active_E = np.zeros((nactive, 2))
+        toc = time.time()
+        for j in range(nactive):
+            ci_active_E[j, :] = np.array(ci.approximate_ci(j))
+            print(ci_active_E[j, :])
+        tic = time.time()
+        print('ci time now', tic - toc)
+        #print('ci intervals now', ci_active_E)
+
+        return active_set, ci_active_E, truth, nactive
+
+    else:
+        return 0
+
+#test_approximate_ci_E()
+
+def compute_coverage(p=10):
+
+    niter = 50
+    coverage = np.zeros(p)
+    nsel = np.zeros(p)
+    nerr = 0
+    for iter in range(niter):
+        print("\n")
+        print("iteration", iter)
+        try:
+            test_ci = test_approximate_ci_E()
+            if test_ci != 0:
+                ci_active = test_ci[1]
+                print("ci", ci_active)
+                active_set = test_ci[0]
+                true_val = test_ci[2]
+                nactive = test_ci[3]
+                toc = time.time()
+                for l in range(nactive):
+                    nsel[active_set[l]] += 1
+                    print(true_val[l])
+                    if (ci_active[l,0]<= true_val[l]) and (true_val[l]<= ci_active[l,1]):
+                        coverage[active_set[l]] += 1
+                tic = time.time()
+                print('ci time', tic - toc)
+
+            print(coverage[~np.isnan(coverage)])
+            print(nsel[~np.isnan(nsel)])
+            print('coverage so far',np.true_divide(np.sum(coverage[~np.isnan(coverage)]), np.sum(nsel[~np.isnan(nsel)])))
+
+        except ValueError:
+            nerr +=1
+            print('ignore iteration raising ValueError')
+            continue
+
+    coverage_prop = np.true_divide(coverage, nsel)
+    coverage_prop[coverage_prop == np.inf] = 0
+    coverage_prop = np.nan_to_num(coverage_prop)
+    return coverage_prop, nsel, nerr
+
+
+print(compute_coverage())
+
+
+
+
+
+
+
+
+
diff --git a/selection/bayesian/test_conditional_prob.py b/selection/bayesian/test_conditional_prob.py
index 05a21e759..2d1f2cac8 100644
--- a/selection/bayesian/test_conditional_prob.py
+++ b/selection/bayesian/test_conditional_prob.py
@@ -11,7 +11,7 @@
 from selection.randomized.api import randomization
 from selection.bayesian.paired_bootstrap import pairs_bootstrap_glm, bootstrap_cov
 
-n = 100
+n = 200
 p = 10
 s = 5
 snr = 5
@@ -134,7 +134,6 @@ def test_approximate_ci():
                                                           randomization.isotropic_gaussian((p,), 1.),
                                                           epsilon)
 
-
         ci_active = np.zeros((nactive,2))
         toc = time.time()
         for j in range(nactive):
@@ -231,8 +230,9 @@ def compute_coverage():
                     if (ci_active[l,0]<= true_val[l]) and (true_val[l]<= ci_active[l,1]):
                         coverage[active_set[l]] += 1
                 tic = time.time()
-            print('ci time', tic - toc)
+                print('ci time', tic - toc)
             print('coverage so far',np.true_divide(coverage, nsel))
+            print('coverage so far',np.true_divide(np.sum(coverage[~np.isnan(coverage)]), np.sum(nsel[~np.isnan(nsel)])))
 
         except ValueError:
             nerr +=1
diff --git a/selection/distributions/api.py b/selection/distributions/api.py
index 1c76e1169..5b006ea8e 100644
--- a/selection/distributions/api.py
+++ b/selection/distributions/api.py
@@ -1 +1,2 @@
 from .discrete_family import discrete_family
+from .intervals import intervals_from_sample
diff --git a/selection/distributions/intervals.py b/selection/distributions/intervals.py
new file mode 100644
index 000000000..09fd5becb
--- /dev/null
+++ b/selection/distributions/intervals.py
@@ -0,0 +1,188 @@
+"""
+This module contains a class for
+forming confindence intervals and
+testing 1-dimensional linear hypotheses
+about the underlying mean vector of
+a Gaussian subjected to selection.
+"""
+
+from __future__ import print_function, division
+import numpy as np
+
+class intervals_from_sample(object):
+
+    """
+    Construct confidence intervals
+    for real-valued parameters by tilting
+    a multiparameter exponential family
+    with reference measure a Monte Carlo sample.
+    The exponential family is assumed to
+    be derived from a Gaussian with
+    some selective weight and the
+    parameters are linear functionals of the
+    mean parameter of the Gaussian.
+    """
+    def __init__(self, reference, sample, observed, covariance):
+        '''
+        Parameters
+        ----------
+        reference : np.float(k)
+            Reference value of mean parameter. Often
+            taken to be an unpenalized MLE or perhaps
+            (approximate) selective MLE / MAP.
+        sample : np.float(s, k)
+            A Monte Carlo sample drawn from selective distribution.
+        observed : np.float(k)
+            Observed value of Gaussian estimator.
+            Often an unpenalized MLE.
+        covariance : np.float(k, k)
+            Covariance of original Gaussian.
+            Used only to compute unselective
+            variance of linear functionals of the
+            (approximately) Gaussian estimator.
+        '''
+
+        (self.reference,
+         self.sample,
+         self.observed,
+         self.covariance) = (np.asarray(reference),
+                             np.asarray(sample),
+                             np.asarray(observed),
+                             covariance)
+
+        self.shape = reference.shape
+        self.nsample = self.sample.shape[1]
+
+    def pivots_all(self, parameter=None):
+        '''
+        Compute pivotal quantities, i.e.
+        the selective distribution function
+        under $H_{0,k}:\theta_k=\theta_{0,k}$
+        where $\theta_0$ is `parameter`.
+        Parameters
+        ----------
+        parameter : np.float(k) (optional)
+            Value of mean parameter under
+            coordinate null hypotheses.
+            Defaults to `np.zeros(k)`
+        Returns
+        -------
+        pivots : np.float(k)
+            Pivotal quantites. Each is
+            (asymptotically) uniformly
+            distributed on [0,1] under
+            corresponding $H_{0,k}$.
+        '''
+        pivots = np.zeros(self.shape)
+        for j in range(self.shape[0]):
+            linear_func = np.zeros(self.shape)
+            linear_func[j] = 1.
+            pivots[j] = self._pivot_param(linear_func, parameter[j])
+        return pivots
+
+    def confidence_interval(self, linear_func, level=0.9):
+        '''
+        Construct a `level*100`% confidence
+        interval for a linear functional
+        of the mean parameter
+        of the underlying Gaussian.
+        Parameters
+        ----------
+        linear_func : np.float(k)
+            Linear functional determining
+            parameter.
+        level : float (optional)
+            Specify the
+            confidence level.
+        Returns
+        -------
+        L, U : float
+            Lower and upper limits of confidence
+            interval.
+        '''
+        alpha = 1 - level
+        pvalues_at_grid, grid = self._pivots_grid(linear_func)
+        accepted_indices = np.array(pvalues_at_grid > alpha)
+        if np.sum(accepted_indices) > 0:
+            lower = np.min(grid[accepted_indices])
+            upper = np.max(grid[accepted_indices])
+            return lower, upper
+
+    def confidence_intervals_all(self, level=0.9):
+        '''
+        Construct a `level*100`% confidence
+        interval for each $\theta_j$
+        of the mean parameter
+        of the underlying Gaussian.
+        Parameters
+        ----------
+        level : float (optional)
+            Specify the confidence level.
+        Returns
+        -------
+        LU : np.float(k,2)
+            Array with lower and upper confidence limits.
+        '''
+
+        lower, upper = np.zeros(self.shape), np.zeros(self.shape)
+        for j in range(self.shape[0]):
+            linear_func = np.zeros(self.shape)
+            linear_func[j] = 1.
+            limits = self.confidence_interval(linear_func, level=level)
+            if limits is not None:
+                lower[j], upper[j] = limits
+            else:
+                lower[j], upper[j] = np.nan, np.nan # bad reference -- all pvalues less then alpha
+        return np.array([lower, upper]).T
+
+    # Private methods
+
+    def _pivot_param(self, linear_func, param):
+        """
+        Compute pivotal quantity for the
+        quantitiy linear_func.dot(parameter)
+        at the hypothesized value param.
+        """
+        linear_func = np.atleast_1d(linear_func)
+        ref = (linear_func * self.reference).sum()
+        var = np.sum(linear_func * self.covariance.dot(linear_func))
+
+        _sample = self.sample.dot(linear_func)
+        _observed = (self.observed * linear_func).sum()
+
+        indicator = _sample < _observed
+        log_gaussian_tilt = _sample  * (param - ref)
+        log_gaussian_tilt /= var
+        emp_exp = self._empirical_exp(linear_func, param)
+        likelihood_ratio = np.exp(log_gaussian_tilt) / emp_exp
+        return np.clip(np.mean(indicator * likelihood_ratio), 0, 1)
+
+    def _pivots_grid(self, linear_func, npts=1000, num_sd=10):
+        """
+        Compute pivots on a 1D grid centered at
+        (reference*linear_func).sum() and reference.
+        """
+        linear_func = np.atleast_1d(linear_func)
+        stdev = np.sqrt(np.sum(linear_func * self.covariance.dot(linear_func)))
+        grid = np.linspace(-10*stdev, 10*stdev, 1000) + (self.reference * linear_func).sum()
+        pivots_at_grid = [self._pivot_param(linear_func, grid[i])
+                          for i in range(grid.shape[0])]
+        pivots_at_grid = [2*min(pval, 1-pval) for pval in pivots_at_grid]
+        pivots_at_grid = np.asarray(pivots_at_grid)
+        return pivots_at_grid, grid
+
+    def _empirical_exp(self, linear_func, param):
+        """
+        Empirical expected value of the exponential.
+        """
+        linear_func = np.atleast_1d(linear_func)
+        ref = (self.reference * linear_func).sum()
+        var = np.sum(linear_func * self.covariance.dot(linear_func))
+        factor = (param - ref) / var
+
+        # we can probably save a little bit of time
+        # by caching _sample
+        _sample = self.sample.dot(linear_func)
+
+        tilted_sample = np.exp(_sample * factor)
+        return tilted_sample.mean()
\ No newline at end of file
diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index d3fa4c937..e07ccfa9f 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -1,38 +1,34 @@
 import numpy as np
 import regreg.api as rr
 
-class M_estimator(object):
+from .query import query
+from .randomization import split
+
+class M_estimator(query):
 
     def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
         """
         Fits the logistic regression to a candidate active set, without penalty.
         Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
-
-        Computes $\bar{\beta}_E$ which is the restricted 
+        Computes $\bar{\beta}_E$ which is the restricted
         M-estimator (i.e. subject to the constraint $\beta_{-E}=0$).
-
         Parameters:
         -----------
-
         active: np.bool
             The active set from fitting the logistic lasso
-
         solve_args: dict
             Arguments to be passed to regreg solver.
-
         Returns:
         --------
-
         None
-
         Notes:
         ------
-
         Sets self._beta_unpenalized which will be used in the covariance matrix calculation.
         Also computes Hessian of loss at restricted M-estimator as well as the bootstrap covariance.
-
         """
 
+        query.__init__(self, randomization)
+
         (self.loss,
          self.epsilon,
          self.penalty,
@@ -42,27 +38,20 @@ def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':
                              penalty,
                              randomization,
                              solve_args)
-         
-        self._solved = False
-        self._randomized = False
-
-    def randomize(self):
 
-        if not self._randomized:
-            self._randomZ = self.randomization.sample()
-            self._random_term = rr.identity_quadratic(self.epsilon, 0, -self._randomZ, 0)
+    # Methods needed for subclassing a query
 
-        # set the _randomized bit
+    def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
 
-        self._randomized = True
-
-    def solve(self):
+        self.randomize()
 
         (loss,
+         randomized_loss,
          epsilon,
          penalty,
          randomization,
          solve_args) = (self.loss,
+                        self.randomized_loss,
                         self.epsilon,
                         self.penalty,
                         self.randomization,
@@ -70,15 +59,13 @@ def solve(self):
 
         # initial solution
 
-        problem = rr.simple_problem(loss, penalty)
-
-        self.randomize()
-        self.initial_soln = problem.solve(self._random_term, **solve_args)
+        problem = rr.simple_problem(randomized_loss, penalty)
+        self.initial_soln = problem.solve(**solve_args)
 
         # find the active groups and their direction vectors
         # as well as unpenalized groups
 
-        groups = np.unique(penalty.groups) 
+        groups = np.unique(penalty.groups)
         active_groups = np.zeros(len(groups), np.bool)
         unpenalized_groups = np.zeros(len(groups), np.bool)
 
@@ -103,21 +90,25 @@ def solve(self):
 
         # solve the restricted problem
 
-        self.overall = active + unpenalized
-        self.inactive = ~self.overall
-        self.unpenalized = unpenalized
-        self.active_directions = np.array(active_directions).T
-        self.active_groups = np.array(active_groups, np.bool)
-        self.unpenalized_groups = np.array(unpenalized_groups, np.bool)
+        self._overall = active + unpenalized
+        self._inactive = ~self._overall
+        self._unpenalized = unpenalized
+        self._active_directions = np.array(active_directions).T
+        self._active_groups = np.array(active_groups, np.bool)
+        self._unpenalized_groups = np.array(unpenalized_groups, np.bool)
 
-        self.selection_variable = {'groups':self.active_groups, 
-                                   'directions':self.active_directions}
+        self.selection_variable = {'groups':self._active_groups,
+                                   'variables':self._overall,
+                                   'directions':self._active_directions}
 
         # initial state for opt variables
 
-        initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + self._random_term.objective(self.initial_soln, 'grad') + epsilon * self.initial_soln)
-        initial_subgrad = initial_subgrad[self.inactive]
-        initial_unpenalized = self.initial_soln[self.unpenalized]
+        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
+                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad'))
+                          # the quadratic of a smooth_atom is not included in computing the smooth_objective
+
+        initial_subgrad = initial_subgrad[self._inactive]
+        initial_unpenalized = self.initial_soln[self._unpenalized]
         self.observed_opt_state = np.concatenate([initial_scalings,
                                                   initial_unpenalized,
                                                   initial_subgrad], axis=0)
@@ -126,18 +117,11 @@ def solve(self):
 
         self._solved = True
 
-        self._solved = True
-
-    def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
-
-        """
-        Should return a bootstrap_score
-        """
+        # Now setup the pieces for linear decomposition
 
         (loss,
          epsilon,
          penalty,
-         randomization,
          initial_soln,
          overall,
          inactive,
@@ -146,13 +130,12 @@ def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
          active_directions) = (self.loss,
                                self.epsilon,
                                self.penalty,
-                               self.randomization,
                                self.initial_soln,
-                               self.overall,
-                               self.inactive,
-                               self.unpenalized,
-                               self.active_groups,
-                               self.active_directions)
+                               self._overall,
+                               self._inactive,
+                               self._unpenalized,
+                               self._active_groups,
+                               self._active_directions)
 
         # scaling should be chosen to be Lipschitz constant for gradient of Gaussian part
 
@@ -182,7 +165,7 @@ def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
         # U for unpenalized
         # -E for inactive
 
-        _opt_linear_term = np.zeros((p, self.active_groups.sum() + unpenalized.sum() + inactive.sum()))
+        _opt_linear_term = np.zeros((p, self._active_groups.sum() + unpenalized.sum() + inactive.sum()))
         _score_linear_term = np.zeros((p, p))
 
         # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
@@ -198,7 +181,7 @@ def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
         for _i, _n in zip(inactive_idx, null_idx):
             _score_linear_term[_i,_n] = -_sqrt_scaling
 
-        # c_E piece 
+        # c_E piece
 
         scaling_slice = slice(0, active_groups.sum())
         if len(active_directions)==0:
@@ -231,7 +214,7 @@ def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
 
         _opt_affine_term = np.zeros(p)
         idx = 0
-        groups = np.unique(penalty.groups) 
+        groups = np.unique(penalty.groups)
         for i, g in enumerate(groups):
             if active_groups[i]:
                 group = penalty.groups == g
@@ -239,12 +222,16 @@ def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
                 idx += 1
 
         # two transforms that encode score and optimization
-        # variable roles 
+        # variable roles
 
         # later, we will modify `score_transform`
         # in `linear_decomposition`
 
+        _opt_linear_term = np.concatenate((_opt_linear_term[self._overall,:], _opt_linear_term[~self._overall,:]), 0)
+        _opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]),0)
         self.opt_transform = (_opt_linear_term, _opt_affine_term)
+
+        _score_linear_term = np.concatenate((_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 
         # now store everything needed for the projections
@@ -264,93 +251,88 @@ def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
         self.group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, bound=1.)
         self.subgrad_slice = subgrad_slice
 
+        self._setup = True
+
+    def setup_sampler(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
+        pass
+
     def projection(self, opt_state):
         """
         Full projection for Langevin.
-
         The state here will be only the state of the optimization variables.
         """
 
-        if not hasattr(self, "scaling_slice"):
+        if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-        new_state = opt_state.copy() # not really necessary to copy
-        new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
-        new_state[self.subgrad_slice] = self.group_lasso_dual.bound_prox(opt_state[self.subgrad_slice])
 
+        if ('subgradient' not in self.selection_variable and
+            'scaling' not in self.selection_variable): # have not conditioned on any thing else
+            new_state = opt_state.copy() # not really necessary to copy
+            new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
+            new_state[self.subgrad_slice] = self.group_lasso_dual.bound_prox(opt_state[self.subgrad_slice])
+        elif ('subgradient' not in self.selection_variable and
+              'scaling' in self.selection_variable): # conditioned on the initial scalings
+                                                     # only the subgradient in opt_state
+            new_state = self.group_lasso_dual.bound_prox(opt_state)
+        elif ('subgradient' in self.selection_variable and
+              'scaling' not in self.selection_variable): # conditioned on the subgradient
+                                                         # only the scaling in opt_state
+            new_state = np.maximum(opt_state, 0)
+        else:
+            new_state = opt_state
         return new_state
 
-    def randomization_gradient(self, data_state, data_transform, opt_state):
+    # optional things to condition on
+
+    def condition_on_subgradient(self):
         """
-        Randomization derivative at full state.
+        Maybe we should allow subgradients of only some variables...
         """
-
-        if not hasattr(self, "opt_transform"):
+        if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-        # reconstruction of randoimzation omega
-
         opt_linear, opt_offset = self.opt_transform
-        data_linear, data_offset = data_transform
-        data_piece = data_linear.dot(data_state) + data_offset
-        opt_piece = opt_linear.dot(opt_state) + opt_offset
 
-        # value of the randomization omega
+        new_offset = opt_linear[:,self.subgrad_slice].dot(self.observed_opt_state[self.subgrad_slice]) + opt_offset
+        new_linear = opt_linear[:,self.scaling_slice]
 
-        full_state = (data_piece + opt_piece) 
+        self.opt_transform = (new_linear, new_offset)
 
-        # gradient of negative log density of randomization at omega
+        # for group LASSO this should not induce a bigger jacobian as
+        # the subgradients are in the interior of a ball
+        self.selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
 
-        randomization_derivative = self.randomization.gradient(full_state)
+        # reset variables
 
-        # chain rule for data, optimization parts
+        self.observed_opt_state = self.observed_opt_state[self.scaling_slice]
+        self.scaling_slice = slice(None, None, None)
+        self.subgrad_slice = np.zeros(new_linear.shape[1], np.bool)
+        self.num_opt_var = new_linear.shape[1]
 
-        data_grad = data_linear.T.dot(randomization_derivative)
-        opt_grad = opt_linear.T.dot(randomization_derivative)
-
-        return data_grad, opt_grad - self.grad_log_jacobian(opt_state)
-
-
-    def grad_log_jacobian(self, opt_state):
-        """
-        log_jacobian depends only on data through
-        Hessian at \bar{\beta}_E which we 
-        assume is close to Hessian at \bar{\beta}_E^*
+    def condition_on_scalings(self):
         """
-        # needs to be implemented for group lasso
-        return 0.
-
-
-    def linear_decomposition(self, target_score_cov, target_cov, observed_target_state):
+        Maybe we should allow subgradients of only some variables...
         """
-        Compute out the linear decomposition
-        of the score based on the target. This decomposition
-        writes the (limiting CLT version) of the data in the score as linear in the 
-        target and in some independent Gaussian error.
-        
-        This second independent piece is conditioned on, resulting
-        in a reconstruction of the score as an affine function of the target
-        where the offset is the part related to this independent
-        Gaussian error.
-        """
-
-        target_score_cov = np.atleast_2d(target_score_cov) 
-        target_cov = np.atleast_2d(target_cov) 
-        observed_target_state = np.atleast_1d(observed_target_state)
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
 
-        linear_part = target_score_cov.T.dot(np.linalg.pinv(target_cov))
+        opt_linear, opt_offset = self.opt_transform
 
-        offset = self.observed_score_state - linear_part.dot(observed_target_state)
+        new_offset = opt_linear[:,self.scaling_slice].dot(self.observed_opt_state[self.scaling_slice]) + opt_offset
+        new_linear = opt_linear[:,self.subgrad_slice]
 
-        # now compute the composition of this map with
-        # self.score_transform
+        self.opt_transform = (new_linear, new_offset)
 
-        score_linear, score_offset = self.score_transform
-        composition_linear_part = score_linear.dot(linear_part)
+        # for group LASSO this will induce a bigger jacobian
+        self.selection_variable['scalings'] = self.observed_opt_state[self.scaling_slice]
 
-        composition_offset = score_linear.dot(offset) + score_offset
+        # reset slices
 
-        return (composition_linear_part, composition_offset)
+        self.observed_opt_state = self.observed_opt_state[self.subgrad_slice]
+        self.subgrad_slice = slice(None, None, None)
+        self.scaling_slice = np.zeros(new_linear.shape[1], np.bool)
+        self.num_opt_var = new_linear.shape[1]
 
 
 
@@ -363,6 +345,75 @@ def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
     X_restricted = X[:,active]
     loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
     beta_E = loss_restricted.solve(**solve_args)
-    
+
     return beta_E
 
+class M_estimator_split(M_estimator):
+
+    def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10}):
+        total_size = loss.saturated_loss.shape[0]
+        self.randomization = split(loss.shape, subsample_size, total_size)
+        M_estimator.__init__(self,loss, epsilon, penalty, self.randomization, solve_args=solve_args)
+
+        total_size = loss.saturated_loss.shape[0]
+        if subsample_size > total_size:
+            raise ValueError('subsample size must be smaller than total sample size')
+
+        self.total_size, self.subsample_size = total_size, subsample_size
+
+    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B=2000):
+
+        M_estimator.setup_sampler(self,
+                                  scaling=scaling,
+                                  solve_args=solve_args)
+
+        # now we need to estimate covariance of
+        # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)
+
+        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0] # shorthand
+
+        from .glm import pairs_bootstrap_score # need to correct these imports!!!
+
+        bootstrap_score = pairs_bootstrap_score(self.loss,
+                                                self._overall,
+                                                beta_active=self._beta_full[self._overall],
+                                                solve_args=solve_args)
+
+        # find unpenalized MLE on subsample
+
+        newq, oldq = rr.identity_quadratic(0, 0, 0, 0), self.randomized_loss.quadratic
+        self.randomized_loss.quadratic = newq
+        beta_active_subsample = restricted_Mest(self.randomized_loss,
+                                                self._overall)
+
+        bootstrap_score_split = pairs_bootstrap_score(self.loss,
+                                                      self._overall,
+                                                      beta_active=beta_active_subsample,
+                                                      solve_args=solve_args)
+        self.randomized_loss.quadratic = oldq
+
+        inv_frac = n / m
+
+        def subsample_diff(m, n, indices):
+            subsample = np.random.choice(indices, size=m, replace=False)
+            full_score = bootstrap_score(indices) # a sum of n terms
+            randomized_score = bootstrap_score_split(subsample) # a sum of m terms
+            return full_score - randomized_score * inv_frac
+
+        first_moment = np.zeros(p)
+        second_moment = np.zeros((p, p))
+
+        _n = np.arange(n)
+        for _ in range(B):
+            indices = np.random.choice(_n, size=n, replace=True)
+            randomized_score = subsample_diff(m, n, indices)
+            first_moment += randomized_score
+            second_moment += np.multiply.outer(randomized_score, randomized_score)
+
+        first_moment /= B
+        second_moment /= B
+
+        cov = second_moment - np.multiply.outer(first_moment,
+                                                first_moment)
+
+        self.randomization.set_covariance(cov)
\ No newline at end of file
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 9baa2f747..a445d1bb5 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -1,15 +1,18 @@
 import functools # for bootstrap partial mapping
 
 import numpy as np
+from regreg.api import glm
 
-from .M_estimator import restricted_Mest, M_estimator
+from .M_estimator import restricted_Mest, M_estimator, M_estimator_split
 from .greedy_step import greedy_score_step
+from .threshold_score import threshold_score
+
 from regreg.api import glm
 
-def pairs_bootstrap_glm(glm_loss, 
-                        active, 
-                        beta_full=None, 
-                        inactive=None, 
+def pairs_bootstrap_glm(glm_loss,
+                        active,
+                        beta_full=None,
+                        inactive=None,
                         scaling=1.,
                         solve_args={'min_its':50, 'tol':1.e-10}):
     """
@@ -39,6 +42,8 @@ def pairs_bootstrap_glm(glm_loss,
     if inactive is not None:
         _bootC = X_inactive.T.dot(_bootW.dot(X_active))
         _bootI = _bootC.dot(_bootQinv)
+    else:
+        _bootI = None
 
     nactive = active.sum()
     if inactive is not None:
@@ -49,7 +54,7 @@ def pairs_bootstrap_glm(glm_loss,
         X_full = X_active
         beta_overall = beta_active
 
-    _boot_mu = lambda X_full: glm_loss.saturated_loss.smooth_objective(X_full.dot(beta_overall), 'grad') + Y
+    _boot_mu = lambda X_full, beta_overall: glm_loss.saturated_loss.mean_function(X_full.dot(beta_overall))
 
     if ntotal > nactive:
         observed = np.hstack([beta_active, -glm_loss.smooth_objective(beta_full, 'grad')[inactive]])
@@ -59,10 +64,10 @@ def pairs_bootstrap_glm(glm_loss,
     # scaling is a lipschitz constant for a gradient squared
     _sqrt_scaling = np.sqrt(scaling)
 
-    def _boot_score(indices):
+    def _boot_score(X_full, Y, ntotal, _bootQinv, _bootI, nactive, _sqrt_scaling, beta_overall, indices):
         X_star = X_full[indices]
         Y_star = Y[indices]
-        score = X_star.T.dot(Y_star - _boot_mu(X_star))
+        score = X_star.T.dot(Y_star - _boot_mu(X_star, beta_overall))
         result = np.zeros(ntotal)
         result[:nactive] = _bootQinv.dot(score[:nactive])
         if ntotal > nactive:
@@ -74,8 +79,32 @@ def _boot_score(indices):
     observed[:nactive] *= _sqrt_scaling
     observed[nactive:] /= _sqrt_scaling
 
-    return _boot_score, observed
+    return functools.partial(_boot_score, X_full, Y, ntotal, _bootQinv, _bootI, nactive, _sqrt_scaling, beta_overall), observed
+
+def pairs_bootstrap_score(glm_loss,
+                          active,
+                          beta_active=None,
+                          solve_args={'min_its':50, 'tol':1.e-10}):
+    """
+    pairs bootstrap of (beta_hat_active, -grad_inactive(beta_hat_active))
+    """
+    X, Y = glm_loss.data
 
+    if beta_active is None:
+        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+    X_active = X[:,active]
+
+    _bootW = np.diag(glm_loss.saturated_loss.hessian(X_active.dot(beta_active)))
+
+    _boot_mu = lambda X_active, beta_active: glm_loss.saturated_loss.mean_function(X_active.dot(beta_active))
+
+    def _boot_score(X, Y, active, beta_active, indices):
+        X_star = X[indices]
+        Y_star = Y[indices]
+        score = -X_star.T.dot(Y_star - _boot_mu(X_star[:,active], beta_active))
+        return score
+
+    return functools.partial(_boot_score, X, Y, active, beta_active)
 
 def set_alpha_matrix(glm_loss,
                      active,
@@ -114,14 +143,11 @@ def set_alpha_matrix(glm_loss,
         X_full = X_active
         beta_overall = beta_active
 
-    # self.loss.loss(X.dot(beta)) == np.exp(X.dot(beta)) / (1 + np.exp(X.dot(beta))) - Y
-    obs_residuals = - glm_loss.saturated_loss.smooth_objective(X_full.dot(beta_overall), 'grad')
+    obs_residuals = Y - glm_loss.saturated_loss.mean_function(X_full.dot(beta_overall))
 
     return np.dot(np.dot(_Qinv, X_active.T), np.diag(obs_residuals))
 
 
-
-
 def _parametric_cov_glm(glm_loss,
                         active,
                         beta_full=None,
@@ -168,15 +194,14 @@ def pairs_inactive_score_glm(glm_loss, active, beta_active, scaling=1.):
 
     """
     Bootstrap inactive score at \bar{\beta}_E
-
     Will be used with forward stepwise.
     """
     inactive = ~active
     beta_full = np.zeros(glm_loss.shape)
     beta_full[active] = beta_active
 
-    _full_boot_score = pairs_bootstrap_glm(glm_loss, 
-                                           active, 
+    _full_boot_score = pairs_bootstrap_glm(glm_loss,
+                                           active,
                                            beta_full=beta_full,
                                            inactive=inactive,
                                            scaling=scaling)[0]
@@ -186,32 +211,145 @@ def _boot_score(indices):
 
     return _boot_score
 
+def target(loss,
+           active,
+           queries,
+           subset=None,
+           bootstrap=False,
+           solve_args={'min_its':50, 'tol':1.e-10},
+           reference=None):
+    """
+    Form target from self.loss
+    restricting to active variables.
+    If subset is not None, then target returns
+    only those coordinates of the active
+    variables.
+    Parameters
+    ----------
+    query : `query`
+       A query with a glm loss.
+    active : np.bool
+       Indicators of active variables.
+    queries : `multiple_queries`
+       Sampler returned for this queries.
+    subset : np.bool
+       Indicator of subset of variables
+       to be returned. Includes both
+       active and inactive variables.
+    bootstrap : bool
+       If True, sampler returned uses bootstrap
+       otherwise uses a plugin CLT.
+    reference : np.float (optional)
+       Optional reference parameter. Defaults
+       to the observed reference parameter.
+       Must have shape `active.sum()`.
+    solve_args : dict
+       Args used to solve restricted M estimator.
+    Returns
+    -------
+    target_sampler : `targeted_sampler`
+    """
+
+    unpenalized_mle = restricted_Mest(loss, active, solve_args=solve_args)
+    X, Y = loss.data
+    n, _ = X.shape
+
+    # workout which inactive ones to return
+
+    if subset is None:
+        subset = active
+
+    active_subset = (active * subset)[active]
+    nactive = active.sum()
+    nactive_subset = active_subset.sum()
+    inactive = ~active * subset
+
+    boot_target, boot_target_observed = pairs_bootstrap_glm(loss, active, inactive=inactive)
+
+    def _subsetter(value):
+        if nactive_subset > 0:
+            return np.hstack([value[active_subset], value[nactive:]])
+        else:
+            return value[nactive:]
+
+    def _target(indices):
+        return _subsetter(boot_target(indices))
+    target_observed = _subsetter(boot_target_observed)
+
+    form_covariances = glm_nonparametric_bootstrap(n, n)
+    queries.setup_sampler(form_covariances)
+    queries.setup_opt_state()
+
+    if reference is None:
+        reference = target_observed
+
+    if bootstrap:
+        alpha_mat = set_alpha_matrix(loss, active, inactive=inactive)
+        alpha_subset = np.ones(alpha_mat.shape[0], np.bool)
+        alpha_subset[:nactive] = active_subset
+        alpha_mat = alpha_mat[alpha_subset]
+
+        target_sampler = queries.setup_bootstrapped_target(_target,
+                                                           target_observed,
+                                                           alpha_mat,
+                                                           reference=reference)
+    else:
+        target_sampler = queries.setup_target(_target,
+                                              target_observed,
+                                              reference=reference)
+    return target_sampler, target_observed
+
 class glm_group_lasso(M_estimator):
 
     def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
         M_estimator.setup_sampler(self, scaling=scaling, solve_args=solve_args)
 
         bootstrap_score = pairs_bootstrap_glm(self.loss,
-                                              self.overall, 
+                                              self.selection_variable['variables'],
                                               beta_full=self._beta_full,
-                                              inactive=self.inactive)[0]
+                                              inactive=~self.selection_variable['variables'])[0]
 
         return bootstrap_score
 
+class split_glm_group_lasso(M_estimator_split):
+
+    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}):
+        M_estimator_split.setup_sampler(self, scaling=scaling, solve_args=solve_args)
+
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self.selection_variable['variables'],
+                                              beta_full=self._beta_full,
+                                              inactive=~self.selection_variable['variables'])[0]
+
+        return bootstrap_score
+
+
 class glm_group_lasso_parametric(M_estimator):
 
     # this setup_sampler returns only the active set
 
     def setup_sampler(self):
         M_estimator.setup_sampler(self)
-        return self.overall
+        return self.selection_variable['variables']
+
 
+class glm_greedy_step(greedy_score_step, glm):
 
-class glm_greedy_step(greedy_score_step):
+    # XXX this makes the assumption that our
+    # greedy_score_step maximized over ~active
 
     def setup_sampler(self):
         greedy_score_step.setup_sampler(self)
-        bootstrap_score = pairs_inactive_score_glm(self.loss, 
+        bootstrap_score = pairs_inactive_score_glm(self.loss,
+                                                   self.active,
+                                                   self.beta_active)
+        return bootstrap_score
+
+class glm_threshold_score(threshold_score):
+
+    def setup_sampler(self):
+        threshold_score.setup_sampler(self)
+        bootstrap_score = pairs_inactive_score_glm(self.loss,
                                                    self.active,
                                                    self.beta_active)
         return bootstrap_score
@@ -222,9 +360,9 @@ class fixedX_group_lasso(M_estimator):
     def __init__(self, X, Y, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
         loss = glm.gaussian(X, Y)
         M_estimator.__init__(self,
-                             loss, 
-                             epsilon, 
-                             penalty, 
+                             loss,
+                             epsilon,
+                             penalty,
                              randomization, solve_args=solve_args)
 
     def setup_sampler(self):
@@ -233,8 +371,8 @@ def setup_sampler(self):
         X, Y = self.loss.data
 
         bootstrap_score = resid_bootstrap(self.loss,
-                                          self.overall, 
-                                          self.inactive)[0]
+                                          self.selection_variable['variables'],
+                                          ~self.selection_variable['variables'])[0]
         return bootstrap_score
 
 # Methods to form appropriate covariances
@@ -242,10 +380,8 @@ def setup_sampler(self):
 def bootstrap_cov(sampler, boot_target, cross_terms=(), nsample=2000):
     """
     m out of n bootstrap
-
     returns estimates of covariance matrices: boot_target with itself,
     and the blocks of (boot_target, boot_other) for other in cross_terms
-
     """
 
     _mean_target = 0.
@@ -274,6 +410,8 @@ def bootstrap_cov(sampler, boot_target, cross_terms=(), nsample=2000):
         _outer_cross[i] /= nsample
 
     _cov_target = _outer_target - np.multiply.outer(_mean_target, _mean_target)
+    if len(cross_terms) == 0:
+        return _cov_target
     return [_cov_target] + [_o - np.multiply.outer(_mean_target, _m) for _m, _o in zip(_mean_cross, _outer_cross)]
 
 def glm_nonparametric_bootstrap(m, n):
@@ -367,3 +505,35 @@ def glm_parametric_covariance(glm_loss, solve_args={'min_its':50, 'tol':1.e-10})
     """
     return functools.partial(parametric_cov, glm_loss, solve_args=solve_args)
 
+
+def standard_ci(X, y , active, leftout_indices, alpha=0.1):
+
+    import regreg.api as rr
+
+    loss = rr.glm.logistic(X[leftout_indices, ], y[leftout_indices])
+    boot_target, target_observed = pairs_bootstrap_glm(loss, active)
+    nactive = np.sum(active)
+    size= np.sum(leftout_indices)
+    observed = target_observed[:nactive]
+    boot_target_restricted = lambda indices: boot_target(indices)[:nactive]
+    sampler = lambda: np.random.choice(size, size=(size,), replace=True)
+    target_cov = bootstrap_cov(sampler, boot_target_restricted)
+
+    from scipy.stats import norm as ndist
+    quantile = - ndist.ppf(alpha / float(2))
+    LU = np.zeros((2, target_observed.shape[0]))
+    for j in range(observed.shape[0]):
+        sigma = np.sqrt(target_cov[j, j])
+        LU[0, j] = observed[j] - sigma * quantile
+        LU[1, j] = observed[j] + sigma * quantile
+    return LU.T
+
+
+def standard_ci_sm(X, y, active, leftout_indices, alpha=0.1):
+    XE = X[:, active]
+    X2, y2 = XE[leftout_indices, :], y[leftout_indices]
+    import statsmodels.discrete.discrete_model as sm
+    logit = sm.Logit(y2, X2)
+    result = logit.fit(disp=0)
+    LU = result.conf_int(alpha=alpha)
+    return LU.T
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
new file mode 100644
index 000000000..9c017d1c0
--- /dev/null
+++ b/selection/randomized/query.py
@@ -0,0 +1,1164 @@
+from itertools import product
+import numpy as np
+from scipy.stats import norm as ndist
+from scipy.optimize import bisect
+
+from ..distributions.api import discrete_family, intervals_from_sample
+from ..sampling.langevin import projected_langevin
+
+class query(object):
+
+    def __init__(self, randomization):
+
+        self.randomization = randomization
+        self._solved = False
+        self._randomized = False
+        self._setup = False
+
+    # Methods reused by subclasses
+
+    def randomize(self):
+
+        if not self._randomized:
+            self.randomized_loss = self.randomization.randomize(self.loss, self.epsilon)
+        self._randomized = True
+
+    def randomization_gradient(self, data_state, data_transform, opt_state):
+        """
+        Randomization derivative at full state.
+        """
+
+        # reconstruction of randoimzation omega
+
+        opt_linear, opt_offset = self.opt_transform
+        data_linear, data_offset = data_transform
+        data_piece = data_linear.dot(data_state) + data_offset
+        opt_piece = opt_linear.dot(opt_state) + opt_offset
+
+        # value of the randomization omega
+
+        full_state = (data_piece + opt_piece)
+
+        # gradient of negative log density of randomization at omega
+
+        randomization_derivative = self.randomization.gradient(full_state)
+
+        # chain rule for data, optimization parts
+
+        data_grad = data_linear.T.dot(randomization_derivative)
+        opt_grad = opt_linear.T.dot(randomization_derivative)
+
+        return data_grad, opt_grad - self.grad_log_jacobian(opt_state)
+
+    def linear_decomposition(self, target_score_cov, target_cov, observed_target_state):
+        """
+        Compute out the linear decomposition
+        of the score based on the target. This decomposition
+        writes the (limiting CLT version) of the data in the score as linear in the
+        target and in some independent Gaussian error.
+
+        This second independent piece is conditioned on, resulting
+        in a reconstruction of the score as an affine function of the target
+        where the offset is the part related to this independent
+        Gaussian error.
+        """
+
+        target_score_cov = np.atleast_2d(target_score_cov)
+        target_cov = np.atleast_2d(target_cov)
+        observed_target_state = np.atleast_1d(observed_target_state)
+
+        linear_part = target_score_cov.T.dot(np.linalg.pinv(target_cov))
+
+        offset = self.observed_score_state - linear_part.dot(observed_target_state)
+
+        # now compute the composition of this map with
+        # self.score_transform
+
+        score_linear, score_offset = self.score_transform
+        composition_linear_part = score_linear.dot(linear_part)
+
+        composition_offset = score_linear.dot(offset) + score_offset
+
+        return (composition_linear_part, composition_offset)
+
+    def reconstruction_map(self, data_state, data_transform, opt_state):
+
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
+
+        # reconstruction of randoimzation omega
+
+        data_state = np.atleast_2d(data_state)
+        opt_state = np.atleast_2d(opt_state)
+
+        opt_linear, opt_offset = self.opt_transform
+        data_linear, data_offset = data_transform
+        data_piece = data_linear.dot(data_state.T) + data_offset[:, None]
+        opt_piece = opt_linear.dot(opt_state.T) + opt_offset[:, None]
+
+        # value of the randomization omega
+
+        return (data_piece + opt_piece).T
+
+    def log_density(self, data_state, data_transform, opt_state):
+
+        full_data = self.reconstruction_map(data_state, data_transform, opt_state)
+        return self.randomization.log_density(full_data)
+
+    # Abstract methods to be
+    # implemented by subclasses
+
+    def grad_log_jacobian(self, opt_state):
+        """
+        log_jacobian depends only on data through
+        Hessian at \bar{\beta}_E which we
+        assume is close to Hessian at \bar{\beta}_E^*
+        """
+        # needs to be implemented for group lasso
+        return 0.
+
+    def jacobian(self, opt_state):
+        """
+        log_jacobian depends only on data through
+        Hessian at \bar{\beta}_E which we
+        assume is close to Hessian at \bar{\beta}_E^*
+        """
+        # needs to be implemented for group lasso
+        return 1.
+
+    def solve(self):
+
+        raise NotImplementedError('abstract method')
+
+    def setup_sampler(self):
+        """
+        Setup query to prepare for sampling.
+        Should set a few key attributes:
+
+            - observed_score_state
+            - num_opt_var
+            - observed_opt_state
+            - opt_transform
+            - score_transform
+
+        """
+        raise NotImplementedError('abstract method -- only keyword arguments')
+
+    def projection(self, opt_state):
+
+        raise NotImplementedError('abstract method -- projection of optimization variables')
+
+class multiple_queries(object):
+
+    '''
+    Combine several queries of a given data
+    through randomized algorithms.
+    '''
+
+    def __init__(self, objectives):
+        '''
+        Parameters
+        ----------
+        objectives : sequence
+           A sequences of randomized objective functions.
+        Notes
+        -----
+        Each element of `objectives` must
+        have a `setup_sampler` method that returns
+        a description of the distribution of the
+        data implicated in the objective function,
+        typically through the score or gradient
+        of the objective function.
+        These descriptions are passed to a function
+        `form_covariances` to linearly decompose
+        each score in terms of a target
+        and an asymptotically independent piece.
+        Returns
+        -------
+        None
+        '''
+
+        self.objectives = objectives
+
+    def solve(self):
+        '''
+        Ensure that each objective has been solved.
+        '''
+        for objective in self.objectives:
+            if not objective._solved:
+                objective.solve()
+
+    def setup_sampler(self, form_covariances):
+        '''
+        Parameters
+        ----------
+        form_covariances : callable
+           A callable used to decompose
+           target of inference and the score
+           of each objective.
+        Notes
+        -----
+        This function sets the initial
+        `opt_state` of all optimization
+        variables in each view.
+        We also store a reference to `form_covariances`
+        which is called in the
+        construction of `targeted_sampler`.
+        Returns
+        -------
+        None
+        '''
+
+        self.form_covariances = form_covariances
+
+        nqueries = self.nqueries = len(self.objectives)
+
+        self.score_info = []
+
+        for objective in self.objectives:
+            score_ = objective.setup_sampler()
+            self.score_info.append(score_)
+
+    def setup_opt_state(self):
+        self.num_opt_var = 0
+        self.opt_slice = []
+
+        for objective in self.objectives:
+            self.opt_slice.append(slice(self.num_opt_var, self.num_opt_var + objective.num_opt_var))
+            self.num_opt_var += objective.num_opt_var
+
+        self.observed_opt_state = np.zeros(self.num_opt_var)
+        for i in range(len(self.objectives)):
+            self.observed_opt_state[self.opt_slice[i]] = self.objectives[i].observed_opt_state
+
+    def setup_target(self,
+                     target_info,
+                     observed_target_state,
+                     reference=None,
+                     target_set=None):
+
+        '''
+        Parameters
+        ----------
+        target_info : object
+           Passed as first argument to `self.form_covariances`.
+        observed_target_state : np.float
+           Observed value of the target estimator.
+        reference : np.float (optional)
+           Reference parameter for Gaussian approximation
+           of target.
+        target_set : sequence (optional)
+           Which coordinates of target are really
+           of interest. If not None, then coordinates
+           not in target_set are assumed to have 0
+           mean in the sampler.
+        Notes
+        -----
+        The variable `target_set` can be used for
+        a selected model test when some functionals
+        are assumed to have 0 mean in the limiting
+        Gaussian approximation. This can
+        sometimes mean an increase in power.
+        Returns
+        -------
+        target : targeted_sampler
+            An instance of `targeted_sampler` that
+            can be used to sample, test hypotheses,
+            form intervals.
+        '''
+
+        self.setup_opt_state()
+
+        return targeted_sampler(self,
+                                target_info,
+                                observed_target_state,
+                                self.form_covariances,
+                                target_set=target_set,
+                                reference=reference)
+
+    def setup_bootstrapped_target(self,
+                                  target_bootstrap,
+                                  observed_target_state,
+                                  target_alpha,
+                                  target_set=None,
+                                  reference=None,
+                                  boot_size=None):
+
+        self.setup_opt_state()
+
+        return bootstrapped_target_sampler(self,
+                                           target_bootstrap,
+                                           observed_target_state,
+                                           target_alpha,
+                                           target_set=target_set,
+                                           reference=reference,
+                                           boot_size=boot_size)
+
+class targeted_sampler(object):
+
+    '''
+    Object to sample from target of a selective sampler.
+    '''
+
+    def __init__(self,
+                 multi_view,
+                 target_info,
+                 observed_target_state,
+                 form_covariances,
+                 reference=None,
+                 target_set=None):
+
+        '''
+        Parameters
+        ----------
+        multi_view : `multiple_queries`
+           Instance of `multiple_queries`. Attributes
+           `objectives`, `score_info` are key
+           attributed. (Should maybe change constructor
+           to reflect only what is needed.)
+        target_info : object
+           Passed as first argument to `self.form_covariances`.
+        observed_target_state : np.float
+           Observed value of the target estimator.
+        form_covariances : callable
+           Used in linear decomposition of each score
+           and the target.
+        reference : np.float (optional)
+           Reference parameter for Gaussian approximation
+           of target.
+        target_set : sequence (optional)
+           Which coordinates of target are really
+           of interest. If not None, then coordinates
+           not in target_set are assumed to have 0
+           mean in the sampler.
+        Notes
+        -----
+        The callable `form_covariances`
+        should accept `target_info` as first argument
+        and a keyword argument `cross_terms` which
+        correspond to the `score_info` of each
+        objective of `multi_view`. This used in
+        a linear decomposition of each score into
+        a piece correlated with `target` and
+        an independent piece.
+        The independent piece is treated as a
+        nuisance parameter and conditioned on
+        (i.e. is fixed within the sampler).
+        '''
+
+        # sampler will draw samples for bootstrap
+        # these are arguments to target_info and score_bootstrap
+        # nonparamteric bootstrap is np.random.choice(n, size=(n,), replace=True)
+        # residual bootstrap might be X_E.dot(\bar{\beta}_E)
+        # + np.random.choice(resid, size=(n,), replace=True)
+
+        # if target_set is not None, we assume that
+        # these coordinates (specified by a list of coordinates) of target
+        # is assumed to be independent of the rest
+        # the corresponding block of `target_cov` is zeroed out
+
+        # we need these attributes of multi_view
+
+        self.nqueries = len(multi_view.objectives)
+        self.opt_slice = multi_view.opt_slice
+        self.objectives = multi_view.objectives
+
+        self.observed_target_state = observed_target_state
+        self.shape = observed_target_state.shape
+
+        covariances = multi_view.form_covariances(target_info, cross_terms=multi_view.score_info)
+        self.target_cov = np.atleast_2d(covariances[0])
+
+        # XXX we're not really using this target_set in our tests
+
+        # zero out some coordinates of target_cov
+        # to enforce independence of target and null statistics
+
+        if target_set is not None:
+            null_set = set(range(self.target_cov.shape[0])).difference(target_set)
+            for t, n in product(target_set, null_set):
+                self.target_cov[t, n] = 0.
+                self.target_cov[n, t] = 0.
+
+        self.score_cov = covariances[1:]
+
+        self.target_transform = []
+        for i in range(self.nqueries):
+            self.target_transform.append(
+                self.objectives[i].linear_decomposition(self.score_cov[i],
+                                                        self.target_cov,
+                                                        self.observed_target_state))
+
+        self.target_inv_cov = np.linalg.inv(self.target_cov)
+        # size of reference? should it only be target_set?
+        if reference is None:
+            reference = np.zeros(self.target_inv_cov.shape[0])
+        self.reference = reference
+
+        # need to vectorize the state for Langevin
+
+        self.overall_opt_slice = slice(0, multi_view.num_opt_var)
+        self.target_slice = slice(multi_view.num_opt_var,
+                                  multi_view.num_opt_var + self._reference_inv.shape[0])
+        self.keep_slice = self.target_slice
+
+        # set the observed state
+
+        self.observed_state = np.zeros(multi_view.num_opt_var + self._reference_inv.shape[0])
+        self.observed_state[self.target_slice] = self.observed_target_state
+        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
+
+    def set_reference(self, reference):
+        self._reference = np.atleast_1d(reference)
+        self._reference_inv = self.target_inv_cov.dot(self.reference)
+
+    def get_reference(self):
+        return self._reference
+
+    reference = property(get_reference, set_reference)
+
+    def projection(self, state):
+        '''
+        Projection map of projected Langevin sampler.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Typically, the projection will only act on
+           `opt_vars`.
+        Returns
+        -------
+        projected_state : np.float
+        '''
+
+        opt_state = state[self.overall_opt_slice]
+        new_opt_state = np.zeros_like(opt_state)
+        for i in range(self.nqueries):
+            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
+        state[self.overall_opt_slice] = new_opt_state
+        return state
+
+    def gradient(self, state):
+        '''
+        Gradient of log-density at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        target_state, opt_state = state[self.target_slice], state[self.overall_opt_slice]
+        target_grad, opt_grad = np.zeros_like(target_state), np.zeros_like(opt_state)
+        full_grad = np.zeros_like(state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+            target_grad_curr, opt_grad[self.opt_slice[i]] = \
+                self.objectives[i].randomization_gradient(target_state, self.target_transform[i], opt_state[self.opt_slice[i]])
+            target_grad += target_grad_curr.copy()
+
+        target_grad = - target_grad
+        target_grad += self._reference_inv.flatten() - self.target_inv_cov.dot(target_state)
+        full_grad[self.target_slice] = target_grad
+        full_grad[self.overall_opt_slice] = -opt_grad
+
+        return full_grad
+
+    def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+        Parameters
+        ----------
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        keep_opt : bool
+           Should we return optimization variables
+           as well as the target?
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if stepsize is None:
+            stepsize = 1. / self.crude_lipschitz()
+
+        if keep_opt:
+            keep_slice = slice(None, None, None)
+        else:
+            keep_slice = self.keep_slice
+
+        target_langevin = projected_langevin(self.observed_state.copy(),
+                                             self.gradient,
+                                             self.projection,
+                                             stepsize)
+
+        samples = []
+        for i in range(ndraw + burnin):
+            target_langevin.next()
+            if (i >= burnin):
+                samples.append(target_langevin.state[keep_slice].copy())
+
+        return np.asarray(samples)
+
+    def hypothesis_test(self,
+                        test_stat,
+                        observed_value,
+                        ndraw=10000,
+                        burnin=2000,
+                        stepsize=None,
+                        sample=None,
+                        parameter=None,
+                        alternative='twosided'):
+
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+        Parameters
+        ----------
+        test_stat : callable
+           Test statistic to evaluate on sample from
+           selective distribution.
+        observed_value : float
+           Observed value of test statistic.
+           Used in p-value calculation.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc. If not None,
+           `ndraw, burnin, stepsize` are ignored.
+        parameter : np.float (optional)
+           If not None, defaults to `self.reference`.
+           Otherwise, sample is reweighted using Gaussian tilting.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
+
+        if parameter is None:
+            parameter = self.reference
+
+        delta = self.target_inv_cov.dot(parameter - self.reference)
+        W = np.exp(sample.dot(delta))
+
+        family = discrete_family(sample_test_stat, W)
+        pval = family.cdf(0, observed_value)
+
+        if alternative == 'greater':
+            return 1 - pval
+        elif alternative == 'less':
+            return pval
+        else:
+            return 2 * min(pval, 1 - pval)
+
+    def confidence_intervals(self,
+                             observed,
+                             ndraw=10000,
+                             burnin=2000,
+                             stepsize=None,
+                             sample=None,
+                             level=0.9):
+        '''
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        level : float (optional)
+            Specify the
+            confidence level.
+        Notes
+        -----
+        Construct selective confidence intervals
+        for each parameter of the target.
+        Returns
+        -------
+        intervals : [(float, float)]
+            List of confidence intervals.
+        '''
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        nactive = observed.shape[0]
+        intervals_instance = intervals_from_sample(self.reference,
+                                                   sample,
+                                                   observed,
+                                                   self.target_cov)
+
+        return intervals_instance.confidence_intervals_all(level=level)
+
+    def coefficient_pvalues(self,
+                            observed,
+                            parameter=None,
+                            ndraw=10000,
+                            burnin=2000,
+                            stepsize=None,
+                            sample=None,
+                            alternative='twosided'):
+        '''
+        Construct selective p-values
+        for each parameter of the target.
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        parameter : np.float (optional)
+            A vector of parameters with shape `self.shape`
+            at which to evaluate p-values. Defaults
+            to `np.zeros(self.shape)`.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalues : np.float
+
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        if parameter is None:
+            parameter = np.zeros(self.shape)
+
+        nactive = observed.shape[0]
+        intervals_instance = intervals_from_sample(self.reference,
+                                                   sample,
+                                                   observed,
+                                                   self.target_cov)
+
+        pval = intervals_instance.pivots_all(parameter)
+
+        if alternative == 'greater':
+            return 1 - pval
+        elif alternative == 'less':
+            return pval
+        else:
+            return 2 * np.minimum(pval, 1 - pval)
+
+    def crude_lipschitz(self):
+        """
+        A crude Lipschitz constant for the
+        gradient of the log-density.
+        Returns
+        -------
+        lipschitz : float
+
+        """
+        lipschitz = np.linalg.svd(self.target_inv_cov)[1].max()
+        for transform, objective in zip(self.target_transform, self.objectives):
+            lipschitz += np.linalg.svd(transform[0])[1].max()**2 * objective.randomization.lipschitz
+            lipschitz += np.linalg.svd(objective.score_transform[0])[1].max()**2 * objective.randomization.lipschitz
+        return lipschitz
+
+
+    def reconstruction_map(self, state):
+        '''
+        Reconstruction of randomization at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be array with each row a state.
+        Returns
+        -------
+        reconstructed : np.float
+           Has shape of `opt_vars` with same number of rows
+           as `state`.
+
+        '''
+
+        state = np.atleast_2d(state)
+        if len(state.shape) > 2:
+            raise ValueError('expecting at most 2-dimensional array')
+
+        target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
+        reconstructed = np.zeros_like(opt_state)
+
+        for i in range(self.nqueries):
+            reconstructed[:, self.opt_slice[i]] = self.objectives[i].reconstruction_map(target_state,
+                                                                                        self.target_transform[i],
+                                                                                        opt_state[:,self.opt_slice[i]])
+        return np.squeeze(reconstructed)
+
+    def log_randomization_density(self, state):
+        '''
+        Log of randomization density at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be two-dimensional with each row a state.
+        Returns
+        -------
+        density : np.float
+            Has number of rows as `state` if 2-dimensional.
+        '''
+
+        reconstructed = self.reconstruction_map(state)
+        value = np.zeros(reconstructed.shape[0])
+
+        for i in range(self.nqueries):
+            log_dens = self.objectives[i].randomization.log_density
+            value += log_dens(reconstructed[:,self.opt_slice[i]])
+        return np.squeeze(value)
+
+    def hypothesis_test_translate(self,
+                                  sample,
+                                  test_stat,
+                                  observed_target,
+                                  parameter=None,
+                                  alternative='twosided'):
+
+        '''
+        Carry out a hypothesis test
+        based on the distribution of the
+        residual `observed_target - target`
+        sampled at `self.reference`.
+        Parameters
+        ----------
+        sample : np.array
+           Sample of target and optimization variables drawn at `self.reference`.
+        test_stat : callable
+           Test statistic to evaluate on sample from
+           selective distribution.
+        observed_target : np.float
+           Observed value of target estimate.
+           Used in p-value calculation.
+        parameter : np.float (optional)
+           If not None, defaults to `self.reference`.
+           Otherwise, sample is reweighted using Gaussian tilting.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        _intervals = translate_intervals(self,
+                                         sample,
+                                         observed_target)
+
+        if parameter is None:
+            parameter = self.reference
+
+        return _intervals.pivot(test_stat,
+                                parameter,
+                                alternative=alternative)
+
+
+    def confidence_intervals_translate(self,
+                                       observed_target,
+                                       ndraw=10000,
+                                       burnin=2000,
+                                       stepsize=None,
+                                       sample=None,
+                                       level=0.9):
+        '''
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        level : float (optional)
+            Specify the
+            confidence level.
+        Notes
+        -----
+        Construct selective confidence intervals
+        for each parameter of the target.
+        Returns
+        -------
+        intervals : [(float, float)]
+            List of confidence intervals.
+        '''
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize, keep_opt=True)
+
+        _intervals = translate_intervals(self,
+                                         sample,
+                                         observed_target)
+
+        limits = []
+
+        for i in range(observed_target.shape[0]):
+            keep = np.zeros_like(observed_target)
+            keep[i] = 1.
+            limits.append(_intervals.confidence_interval(keep, level=level))
+
+        return np.array(limits)
+
+    def coefficient_pvalues_translate(self,
+                                      observed_target,
+                                      parameter=None,
+                                      ndraw=10000,
+                                      burnin=2000,
+                                      stepsize=None,
+                                      sample=None,
+                                      alternative='twosided'):
+        '''
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        parameter : np.float (optional)
+            A vector of parameters with shape `self.shape`
+            at which to evaluate p-values. Defaults
+            to `np.zeros(self.shape)`.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalues : np.float
+            P values for each coefficient.
+
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize, keep_opt=True)
+
+        if parameter is None:
+            parameter = np.zeros_like(observed_target)
+
+        _intervals = translate_intervals(self,
+                                         sample,
+                                         observed_target)
+
+        pvalues = []
+
+        for i in range(observed_target.shape[0]):
+            keep = np.zeros_like(observed_target)
+            keep[i] = 1.
+
+            _parameter = self.reference.copy()
+            _parameter[i] = parameter[i]
+            pvalues.append(_intervals.pivot(lambda x: keep.dot(x),
+                                            _parameter,
+                                            alternative=alternative))
+
+        return np.array(pvalues)
+
+class bootstrapped_target_sampler(targeted_sampler):
+
+    # make one of these for each hypothesis test
+
+    def __init__(self,
+                 multi_view,
+                 target_info,
+                 observed_target_state,
+                 target_alpha,
+                 target_set=None,
+                 reference=None,
+                 boot_size=None):
+
+        # sampler will draw bootstrapped weights for the target
+
+        if boot_size is None:
+            boot_size = target_alpha.shape[1]
+
+        targeted_sampler.__init__(self, multi_view,
+                                  target_info,
+                                  observed_target_state,
+                                  target_set,
+                                  reference)
+        # for bootstrap
+
+        self.boot_size = boot_size
+        self.target_alpha = target_alpha
+        self.boot_transform = []
+
+
+        for i in range(self.nqueries):
+            composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
+                                                                                                  self.target_cov,
+                                                                                                  self.observed_target_state)
+            boot_linear_part = np.dot(composition_linear_part, target_alpha)
+            boot_offset = composition_offset + np.dot(composition_linear_part, self.reference).flatten()
+            self.boot_transform.append((boot_linear_part, boot_offset))
+
+        # set the observed state for bootstrap
+
+        self.boot_slice = slice(multi_view.num_opt_var, multi_view.num_opt_var + self.boot_size)
+        self.observed_state = np.zeros(multi_view.num_opt_var + self.boot_size)
+        self.observed_state[self.boot_slice] = np.ones(self.boot_size)
+        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
+
+
+    def gradient(self, state):
+
+        boot_state, opt_state = state[self.boot_slice], state[self.overall_opt_slice]
+        boot_grad, opt_grad = np.zeros_like(boot_state), np.zeros_like(opt_state)
+        full_grad = np.zeros_like(state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+            boot_grad_curr, opt_grad[self.opt_slice[i]] = \
+                self.objectives[i].randomization_gradient(boot_state, self.boot_transform[i],
+                                                          opt_state[self.opt_slice[i]])
+            boot_grad += boot_grad_curr.copy()
+
+        boot_grad = -boot_grad
+        boot_grad -= boot_state
+
+        full_grad[self.boot_slice] = boot_grad
+        full_grad[self.overall_opt_slice] = -opt_grad
+
+        return full_grad
+
+    def sample(self, ndraw, burnin, stepsize = None, keep_opt=False):
+        if stepsize is None:
+            stepsize = 1. / self.observed_state.shape[0]
+
+        bootstrap_langevin = projected_langevin(self.observed_state.copy(),
+                                                self.gradient,
+                                                self.projection,
+                                                stepsize)
+        if keep_opt:
+            boot_slice = slice(None, None, None)
+        else:
+            boot_slice = self.boot_slice
+
+        samples = []
+        for i in range(ndraw + burnin):
+            bootstrap_langevin.next()
+            if (i >= burnin):
+                samples.append(bootstrap_langevin.state[boot_slice].copy())
+        samples = np.asarray(samples)
+
+        if keep_opt:
+            target_samples = samples[:,self.boot_slice].dot(self.target_alpha.T) + self.reference[None, :]
+            opt_sample0 = samples[0,self.overall_opt_slice]
+            result = np.zeros((samples.shape[0], opt_sample0.shape[0] + target_samples.shape[1]))
+            result[:,self.overall_opt_slice] = samples[:,self.overall_opt_slice]
+            result[:,self.target_slice] = target_samples
+            return result
+        else:
+            target_samples = samples.dot(self.target_alpha.T) + self.reference[None, :]
+            return target_samples
+
+def naive_confidence_intervals(target, observed, alpha=0.1):
+    """
+    Compute naive Gaussian based confidence
+    intervals for target.
+    Parameters
+    ----------
+
+    target : `targeted_sampler`
+    observed : np.float
+        A vector of observed data of shape `target.shape`
+    alpha : float (optional)
+        1 - confidence level.
+    Returns
+    -------
+    intervals : np.float
+        Gaussian based confidence intervals.
+    """
+    quantile = - ndist.ppf(alpha/float(2))
+    LU = np.zeros((2, target.shape[0]))
+    for j in range(target.shape[0]):
+        sigma = np.sqrt(target.target_cov[j, j])
+        LU[0,j] = observed[j] - sigma * quantile
+        LU[1,j] = observed[j] + sigma * quantile
+    return LU.T
+
+class translate_intervals(object): # intervals_from_sample):
+
+    """
+    Location family based intervals... (cryptic)
+    randomization density should be `g` composed with the affine
+    mapping and take an argument like one row of sample
+    target_linear is the linear part of the affine mapping with
+    respect to target
+    weights for a given candidate will look like
+          randomization_density(sample + (candidate, 0, 0) - (reference, 0, 0)) /
+          randomization_density(sample)
+    if the samples are samples of \bar{\beta}. if we have samples of
+    \Delta from our reference, then the weights will look like
+    randomization_density(sample + (candidate, 0, 0))
+    randomization_density(sample + (reference, 0, 0))
+    WE ARE ASSUMING sample is sampled from targeted_sampler.reference
+    """
+
+    def __init__(self,
+                 targeted_sampler,
+                 sample,
+                 observed):
+        self.targeted_sampler = targeted_sampler
+        self.observed = observed.copy() # this is our observed unpenalized estimator
+        self._logden = targeted_sampler.log_randomization_density(sample)
+        self._delta = sample.copy()
+        self._delta[:, targeted_sampler.target_slice] -= targeted_sampler.reference[None, :]
+
+    def pivot(self,
+              test_statistic,
+              candidate,
+              alternative='twosided'):
+        '''
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalue : np.float
+
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        observed_delta = self.observed - candidate
+        observed_stat = test_statistic(observed_delta)
+
+        candidate_sample, weights = self._weights(candidate)
+        #sample_stat = np.array([test_statistic(s) for s in candidate_sample[:, self.targeted_sampler.target_slice]])
+        sample_stat = np.array([test_statistic(s) for s in self._delta[:, self.targeted_sampler.target_slice]])
+
+        pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
+
+        if alternative == 'twosided':
+            return 2 * min(pivot, 1 - pivot)
+        elif alternative == 'less':
+            return pivot
+        else:
+            return 1 - pivot
+
+    def confidence_interval(self, linear_func, level=0.95, how_many_sd=20):
+
+        target_delta = self._delta[:,self.targeted_sampler.target_slice]
+        projected_delta = target_delta.dot(linear_func)
+        projected_observed = self.observed.dot(linear_func)
+
+        delta_min, delta_max = projected_delta.min(), projected_delta.max()
+
+        _norm = np.linalg.norm(linear_func)
+        grid_min, grid_max = -how_many_sd * np.std(projected_delta), how_many_sd * np.std(projected_delta)
+
+        reference = self.targeted_sampler.reference
+
+        def _rootU(gamma):
+            return self.pivot(lambda x: linear_func.dot(x),
+                              reference + gamma * linear_func / _norm**2,
+                              alternative='less') - (1 - level) / 2.
+
+
+        def _rootL(gamma):
+            return self.pivot(lambda x: linear_func.dot(x),
+                              reference + gamma * linear_func / _norm**2,
+                              alternative='less') - (1 + level) / 2.
+
+        upper = bisect(_rootU, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
+        lower = bisect(_rootL, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
+
+        return lower + projected_observed, upper + projected_observed
+
+    # Private methods
+
+    def _weights(self, candidate):
+
+        candidate_sample = self._delta.copy()
+        candidate_sample[:, self.targeted_sampler.target_slice] += candidate[None, :]
+        _lognum = self.targeted_sampler.log_randomization_density(candidate_sample)
+
+        _logratio = _lognum - self._logden
+        _logratio -= _logratio.max()
+
+        return candidate_sample, np.exp(_logratio)
+
+
diff --git a/selection/randomized/randomization.py b/selection/randomized/randomization.py
index 9a7fe11ef..fcd10ac27 100644
--- a/selection/randomized/randomization.py
+++ b/selection/randomized/randomization.py
@@ -1,12 +1,10 @@
 """
 Different randomization options for selective sampler.
-
 Main method used in selective sampler is the gradient method which
-should be a gradient of the negative of the log-density. For a 
+should be a gradient of the negative of the log-density. For a
 Gaussian density, this will be a convex function, not a concave function.
 """
-
-from functools import partial
+from __future__ import division, print_function
 
 import numpy as np
 import regreg.api as rr
@@ -14,14 +12,15 @@
 
 class randomization(rr.smooth_atom):
 
-    def __init__(self, 
-                 shape, 
-                 density, 
-                 grad_negative_log_density, 
-                 sampler, 
-                 CGF=None, # cumulant generating function and gradient
-                 CGF_conjugate=None, # convex conjugate of CGF and gradient
-                 lipschitz=1):
+    def __init__(self,
+                 shape,
+                 density,
+                 grad_negative_log_density,
+                 sampler,
+                 CGF=None,  # cumulant generating function and gradient
+                 CGF_conjugate=None,  # convex conjugate of CGF and gradient
+                 lipschitz=1,
+                 log_density=None):
 
         rr.smooth_atom.__init__(self,
                                 shape)
@@ -29,7 +28,11 @@ def __init__(self,
         self._grad_negative_log_density = grad_negative_log_density
         self._sampler = sampler
         self.lipschitz = lipschitz
-        
+
+        if log_density is None:
+            log_density = lambda x: np.log(density(x))
+
+        self._log_density = log_density
         self.CGF = CGF
         self.CGF_conjugate = CGF_conjugate
 
@@ -52,91 +55,232 @@ def sample(self, size=()):
     def gradient(self, perturbation):
         """
         Evaluate the gradient of the log-density.
-
         Parameters
         ----------
-
         perturbation : np.float
-
         Returns
         -------
-
         gradient : np.float
         """
         return self.smooth_objective(perturbation, mode='grad')
 
+    def log_density(self, perturbation):
+        """
+        Evaluate the log-density.
+        Parameters
+        ----------
+        perturbation : np.float
+        Returns
+        -------
+        value : float
+        """
+        return np.squeeze(self._log_density(perturbation))
+
+    def randomize(self, loss, epsilon=0):
+        """
+        Randomize the loss.
+        """
+
+        randomized_loss = rr.smooth_sum([loss])
+        _randomZ = self.sample()
+        randomized_loss.quadratic = rr.identity_quadratic(epsilon, 0, -_randomZ, 0)
+        return randomized_loss
+
     @staticmethod
     def isotropic_gaussian(shape, scale):
+        """
+        Isotropic Gaussian with SD `scale`.
+        Parameters
+        ----------
+        shape : tuple
+            Shape of noise.
+        scale : float
+            SD of noise.
+        """
         rv = ndist(scale=scale, loc=0.)
-        density = lambda x: rv.pdf(x)
+        density = lambda x: np.product(rv.pdf(x))
         grad_negative_log_density = lambda x: x / scale**2
         sampler = lambda size: rv.rvs(size=shape + size)
         CGF = isotropic_gaussian_CGF(shape, scale)
         CGF_conjugate = isotropic_gaussian_CGF_conjugate(shape, scale)
-        return randomization(shape, 
-                             density, 
-                             grad_negative_log_density, 
-                             sampler, 
+
+        p = np.product(shape)
+        constant = -0.5 * p * np.log(2 * np.pi * scale**2)
+        return randomization(shape,
+                             density,
+                             grad_negative_log_density,
+                             sampler,
                              CGF=CGF,
                              CGF_conjugate=CGF_conjugate,
-                             lipschitz=1./scale**2)
+                             lipschitz=1./scale**2,
+                             log_density = lambda x: -0.5 * (np.atleast_2d(x)**2).sum(1) / scale**2 + constant)
 
     @staticmethod
     def gaussian(covariance):
+        """
+        Gaussian noise with a given covariance.
+        Parameters
+        ----------
+        covariance : np.float((*,*))
+            Positive definite covariance matrix. Non-negative definite
+            will raise an error.
+        """
         precision = np.linalg.inv(covariance)
-        sqrt_precision = np.linalg.cholesky(precision)
+        sqrt_precision = np.linalg.cholesky(precision).T
         _det = np.linalg.det(covariance)
         p = covariance.shape[0]
         _const = np.sqrt((2*np.pi)**p * _det)
         density = lambda x: np.exp(-(x * precision.dot(x)).sum() / 2) / _const
         grad_negative_log_density = lambda x: precision.dot(x)
         sampler = lambda size: sqrt_precision.dot(np.random.standard_normal((p,) + size))
-        return randomization((p,), 
-                             density, 
-                             grad_negative_log_density, 
-                             sampler, 
-                             lipschitz=np.linalg.svd(precision)[1].max())
+
+        return randomization((p,),
+                             density,
+                             grad_negative_log_density,
+                             sampler,
+                             lipschitz=np.linalg.svd(precision)[1].max(),
+                             log_density = lambda x: -np.sum(sqrt_precision.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const))
 
     @staticmethod
     def laplace(shape, scale):
+        """
+        Standard Laplace noise multiplied by `scale`
+        Parameters
+        ----------
+        shape : tuple
+            Shape of noise.
+        scale : float
+            Scale of noise.
+        """
         rv = laplace(scale=scale, loc=0.)
-        density = lambda x: rv.pdf(x)
+        density = lambda x: np.product(rv.pdf(x))
         grad_negative_log_density = lambda x: np.sign(x) / scale
         sampler = lambda size: rv.rvs(size=shape + size)
         CGF = laplace_CGF(shape, scale)
         CGF_conjugate = laplace_CGF_conjugate(shape, scale)
-        return randomization(shape, 
-                             density, 
-                             grad_negative_log_density, 
-                             sampler, 
+        constant = -np.product(shape) * np.log(2 * scale)
+        return randomization(shape,
+                             density,
+                             grad_negative_log_density,
+                             sampler,
                              CGF=CGF,
                              CGF_conjugate=CGF_conjugate,
-                             lipschitz=1./scale**2)
+                             lipschitz=1./scale**2,
+                             log_density = lambda x: -np.fabs(np.atleast_2d(x)).sum(1) / scale - np.log(scale) + constant)
 
     @staticmethod
     def logistic(shape, scale):
+        """
+        Standard logistic noise multiplied by `scale`
+        Parameters
+        ----------
+        shape : tuple
+            Shape of noise.
+        scale : float
+            Scale of noise.
+        """
         # from http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.logistic.html
-        density = lambda x: (np.exp(-x / scale) / (1 + np.exp(-x / scale))**2) / scale
+        density = lambda x: (np.product(np.exp(-x / scale) /
+                                        (1 + np.exp(-x / scale))**2)
+                             / scale**(np.product(x.shape)))
         # negative log density is (with \mu=0)
         # x/s + log(s) + 2 \log (1 + e(-x/s))
         grad_negative_log_density = lambda x: (1 - np.exp(-x / scale)) / ((1 + np.exp(-x / scale)) * scale)
         sampler = lambda size: np.random.logistic(loc=0, scale=scale, size=shape + size)
-        return randomization(shape, 
-                             density, 
-                             grad_negative_log_density, 
-                             sampler, 
-                             lipschitz=.25/scale**2)
+
+        constant = - np.product(shape) * np.log(scale)
+        return randomization(shape,
+                             density,
+                             grad_negative_log_density,
+                             sampler,
+                             lipschitz=.25/scale**2,
+                             log_density = lambda x: -np.atleast_2d(x).sum(1) / scale - 2 * np.log(1 + np.exp(-np.atleast_2d(x) / scale)).sum(1) + constant)
+
+class split(randomization):
+
+    def __init__(self, shape, subsample_size, total_size):
+
+        self.subsample_size = subsample_size
+        self.total_size = total_size
+
+        rr.smooth_atom.__init__(self,
+                                shape)
+
+    def set_covariance(self, covariance):
+        """
+        Once covariance has been set, then
+        the usual API of randomization will work.
+        """
+        self._covariance = covariance
+        precision = np.linalg.inv(covariance)
+        sqrt_precision = np.linalg.cholesky(precision).T
+        _det = np.linalg.det(covariance)
+        p = covariance.shape[0]
+        _const = np.sqrt((2*np.pi)**p * _det)
+        self._density = lambda x: np.exp(-(x * precision.dot(x)).sum() / 2) / _const
+        self._grad_negative_log_density = lambda x: precision.dot(x)
+        self._sampler = lambda size: sqrt_precision.dot(np.random.standard_normal((p,) + size))
+        self.lipschitz = np.linalg.svd(precision)[1].max()
+        def _log_density(x):
+            return -np.sum(sqrt_precision.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const)
+        self._log_density = _log_density
+
+    def smooth_objective(self, perturbation, mode='both', check_feasibility=False):
+        if not hasattr(self, "_covariance"):
+            raise ValueError('first set the covariance')
+        return randomization.smooth_objective(self, perturbation, mode=mode, check_feasibility=check_feasibility)
+
+    def sample(self, size=()):
+        if not hasattr(self, "_covariance"):
+            raise ValueError('first set the covariance')
+        return randomization.sample(self, size=size)
+
+    def gradient(self, perturbation):
+        if not hasattr(self, "_covariance"):
+            raise ValueError('first set the covariance')
+        return randomization.gradient(self, perturbation)
+
+    def randomize(self, loss, epsilon):
+        """
+        Parameters
+        ----------
+        loss : rr.glm
+            A glm loss with a `subsample` method.
+        epsilon : float
+            Coefficient in front of quadratic term
+        Returns
+        -------
+
+        Subsampled loss multiplied by `n / m` where
+        m is the subsample size out of a total
+        sample size of n.
+        The quadratic term is not multiplied by `n / m`
+        """
+        n, m = self.total_size, self.subsample_size
+        inv_frac = n / m
+        quadratic = rr.identity_quadratic(epsilon, 0, 0, 0)
+        m, n = self.subsample_size, self.total_size # shorthand
+        idx = np.zeros(n, np.bool)
+        idx[:m] = 1
+        np.random.shuffle(idx)
+
+        randomized_loss = loss.subsample(idx)
+        randomized_loss.coef *= inv_frac
+
+        randomized_loss.quadratic = quadratic
+
+        return randomized_loss
 
 # Conjugate generating function for Gaussian
 
 def isotropic_gaussian_CGF(shape, scale): # scale = SD
     return cumulant(shape,
-                    lambda x: (x**2).sum() * scale**2 / 2., 
+                    lambda x: (x**2).sum() * scale**2 / 2.,
                     lambda x: scale**2 * x)
 
 def isotropic_gaussian_CGF_conjugate(shape, scale):  # scale = SD
     return cumulant_conjugate(shape,
-                              lambda x: (x**2).sum() / (2 * scale**2), 
+                              lambda x: (x**2).sum() / (2 * scale**2),
                               lambda x: x / scale**2)
 
 # Conjugate generating function for Laplace
@@ -196,31 +340,24 @@ def __init__(self,
 
     def smooth_objective(self, param, mode='both', check_feasibility=False):
         """
-
         Evaluate the smooth objective, computing its value, gradient or both.
-
         Parameters
         ----------
-
         mean_param : ndarray
             The current parameter values.
-
         mode : str
-            One of ['func', 'grad', 'both']. 
-
+            One of ['func', 'grad', 'both'].
         check_feasibility : bool
             If True, return `np.inf` when
             point is not feasible, i.e. when `mean_param` is not
             in the domain.
-
         Returns
         -------
-
-        If `mode` is 'func' returns just the objective value 
+        If `mode` is 'func' returns just the objective value
         at `mean_param`, else if `mode` is 'grad' returns the gradient
         else returns both.
         """
-        
+
         param = self.apply_offset(param)
 
         if mode == 'func':
@@ -244,4 +381,3 @@ class cumulant_conjugate(from_grad_func):
     Class for conjugate of a CGF.
     """
     pass
-

From f0267ffd1e986a046d8e959b637ebb4c566867ae Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c765.SUNet>
Date: Tue, 20 Dec 2016 11:21:38 -0800
Subject: [PATCH 002/617] added a file on fixed X and updated threshold score
 file

---
 selection/bayesian/ci_via_approx_density.py   |   3 -
 .../bayesian/fixed_X_ci_via_approx_density.py | 395 ++++++++++++++++++
 selection/randomized/api.py                   |  14 +-
 selection/randomized/threshold_score.py       | 120 ++++++
 4 files changed, 523 insertions(+), 9 deletions(-)
 create mode 100644 selection/bayesian/fixed_X_ci_via_approx_density.py
 create mode 100644 selection/randomized/threshold_score.py

diff --git a/selection/bayesian/ci_via_approx_density.py b/selection/bayesian/ci_via_approx_density.py
index b10095ffd..f7a936d74 100644
--- a/selection/bayesian/ci_via_approx_density.py
+++ b/selection/bayesian/ci_via_approx_density.py
@@ -273,7 +273,6 @@ def solve_approx(self):
             else:
                 self.ind_obs[j] = np.argmin(np.abs(self.grid-obs))
 
-                #self.ind_obs[j] = (np.where(self.grid == obs)[0])[0]
             self.h_approx[j, :] = self.approx_conditional_prob(j)
 
 
@@ -382,8 +381,6 @@ def test_approximate_ci_E(n=200, p=10, s=5, snr=5, rho=0.1,
     else:
         return 0
 
-#test_approximate_ci_E()
-
 def compute_coverage(p=10):
 
     niter = 50
diff --git a/selection/bayesian/fixed_X_ci_via_approx_density.py b/selection/bayesian/fixed_X_ci_via_approx_density.py
new file mode 100644
index 000000000..e2a35736c
--- /dev/null
+++ b/selection/bayesian/fixed_X_ci_via_approx_density.py
@@ -0,0 +1,395 @@
+import time
+import numpy as np
+import regreg.api as rr
+from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
+from scipy.stats import norm
+from selection.randomized.M_estimator import M_estimator
+
+class neg_log_cube_probability(rr.smooth_atom):
+    def __init__(self,
+                 q, #equals p - E in our case
+                 lagrange,
+                 randomization_scale = 1., #equals the randomization variance in our case
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+
+        self.randomization_scale = randomization_scale
+        self.lagrange = lagrange
+        self.q = q
+
+        rr.smooth_atom.__init__(self,
+                                (self.q,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=None,
+                                coef=coef)
+
+    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
+
+        arg = self.apply_offset(arg)
+
+        arg_u = (arg + self.lagrange)/self.randomization_scale
+        arg_l = (arg - self.lagrange)/self.randomization_scale
+        prod_arg = np.exp(-(2. * self.lagrange * arg)/(self.randomization_scale**2))
+        neg_prod_arg = np.exp((2. * self.lagrange * arg)/(self.randomization_scale**2))
+        cube_prob = norm.cdf(arg_u) - norm.cdf(arg_l)
+        log_cube_prob = -np.log(cube_prob).sum()
+        threshold = 10 ** -10
+        indicator = np.zeros(self.q, bool)
+        indicator[(cube_prob > threshold)] = 1
+        positive_arg = np.zeros(self.q, bool)
+        positive_arg[(arg>0)] = 1
+        pos_index = np.logical_and(positive_arg, ~indicator)
+        neg_index = np.logical_and(~positive_arg, ~indicator)
+        log_cube_grad = np.zeros(self.q)
+        log_cube_grad[indicator] = (np.true_divide(-norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
+                                        cube_prob[indicator]))/self.randomization_scale
+
+        log_cube_grad[pos_index] = ((-1. + prod_arg[pos_index])/
+                                     ((prod_arg[pos_index]/arg_u[pos_index])-
+                                      (1./arg_l[pos_index])))/self.randomization_scale
+
+        log_cube_grad[neg_index] = ((arg_u[neg_index] -(arg_l[neg_index]*neg_prod_arg[neg_index]))
+                                    /self.randomization_scale)/(1.- neg_prod_arg[neg_index])
+
+
+        if mode == 'func':
+            return self.scale(log_cube_prob)
+        elif mode == 'grad':
+            return self.scale(log_cube_grad)
+        elif mode == 'both':
+            return self.scale(log_cube_prob), self.scale(log_cube_grad)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+class approximate_conditional_prob_fixedX(rr.smooth_atom):
+
+    def __init__(self,
+                 t, #point at which density is to computed
+                 approx_density,
+                 coef = 1.,
+                 offset= None,
+                 quadratic= None):
+
+        self.t = t
+        self.AD = approx_density
+        self.q = self.AD.p - self.AD.nactive
+        self.inactive_conjugate = self.active_conjugate = approx_density.randomization.CGF_conjugate
+
+        if self.active_conjugate is None:
+            raise ValueError(
+                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates')
+
+        lagrange = []
+        for key, value in self.AD.penalty.weights.iteritems():
+            lagrange.append(value)
+        lagrange = np.asarray(lagrange)
+
+        self.inactive_lagrange = lagrange[~self.AD._overall]
+        self.active_lagrange = lagrange[self.AD._overall]
+
+        rr.smooth_atom.__init__(self,
+                                (self.AD.nactive,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=self.AD.feasible_point,
+                                coef=coef)
+
+        self.coefs[:] = self.AD.feasible_point
+        self.B_active = self.AD.opt_linear_term[:self.AD.nactive, :self.AD.nactive]
+        self.B_inactive = self.AD.opt_linear_term[self.AD.nactive:, :self.AD.nactive]
+
+        self.nonnegative_barrier = nonnegative_softmax_scaled(self.AD.nactive)
+
+
+    def sel_prob_smooth_objective(self, param, j, mode='both', check_feasibility=False):
+
+        param = self.apply_offset(param)
+        index = np.zeros(self.AD.nactive, bool)
+        index[j] = 1
+        data = np.squeeze(self.t * self.AD.target_linear_term[:, index]) \
+               + self.AD.target_linear_term[:, ~index].dot(self.AD.target_observed[~index])
+
+        offset_active = self.AD.opt_affine_term[:self.AD.nactive] + self.AD.null_statistic[:self.AD.nactive] + data[:self.AD.nactive]
+
+        offset_inactive = self.AD.null_statistic[self.AD.nactive:] + data[self.AD.nactive:]
+
+        active_conj_loss = rr.affine_smooth(self.active_conjugate,
+                                            rr.affine_transform(self.B_active, offset_active))
+
+        cube_obj = neg_log_cube_probability(self.q, self.inactive_lagrange, randomization_scale = 1.)
+
+        cube_loss = rr.affine_smooth(cube_obj, rr.affine_transform(self.B_inactive, offset_inactive))
+
+        total_loss = rr.smooth_sum([active_conj_loss,
+                                    cube_loss,
+                                    self.nonnegative_barrier])
+
+        if mode == 'func':
+            f = total_loss.smooth_objective(param, 'func')
+            return self.scale(f)
+        elif mode == 'grad':
+            g = total_loss.smooth_objective(param, 'grad')
+            return self.scale(g)
+        elif mode == 'both':
+            f, g = total_loss.smooth_objective(param, 'both')
+            return self.scale(f), self.scale(g)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+    def minimize2(self, j, step=1, nstep=30, tol=1.e-6):
+
+        current = self.coefs
+        current_value = np.inf
+
+        objective = lambda u: self.sel_prob_smooth_objective(u, j, 'func')
+        grad = lambda u: self.sel_prob_smooth_objective(u, j, 'grad')
+
+        for itercount in range(nstep):
+            newton_step = grad(current)
+
+            # make sure proposal is feasible
+
+            count = 0
+            while True:
+                count += 1
+                proposal = current - step * newton_step
+                #print("current proposal and grad", proposal, newton_step)
+                if np.all(proposal > 0):
+                    break
+                step *= 0.5
+                if count >= 40:
+                    #print(proposal)
+                    raise ValueError('not finding a feasible point')
+
+            # make sure proposal is a descent
+
+            count = 0
+            while True:
+                proposal = current - step * newton_step
+                proposed_value = objective(proposal)
+                #print(current_value, proposed_value, 'minimize')
+                if proposed_value <= current_value:
+                    break
+                step *= 0.5
+
+            # stop if relative decrease is small
+
+            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+                current = proposal
+                current_value = proposed_value
+                break
+
+            current = proposal
+            current_value = proposed_value
+
+            if itercount % 4 == 0:
+                step *= 2
+
+        # print('iter', itercount)
+        value = objective(current)
+
+        return current, value
+
+class approximate_conditional_density_fixedX(rr.smooth_atom, M_estimator):
+
+    def __init__(self, loss, epsilon, penalty, noise_variance, randomization,
+                 coef=1.,
+                 offset=None,
+                 quadratic=None,
+                 nstep=10):
+
+        M_estimator.__init__(self, loss, epsilon, penalty, randomization)
+
+        rr.smooth_atom.__init__(self,
+                                (1,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                coef=coef)
+
+        self.noise_variance = noise_variance
+
+    def solve_approx(self):
+
+        self.Msolve()
+        self.feasible_point = np.abs(self.initial_soln[self._overall])
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        nactive = self._overall.sum()
+
+        score_linear_term = self.score_transform[0]
+        (self.opt_linear_term, self.opt_affine_term) = self.opt_transform
+
+        # decomposition
+        target_linear_term = score_linear_term[:,:nactive]
+        self.var_target = self.noise_variance * np.linalg.inv(-score_linear_term[:nactive,:nactive])
+        # observed target and null statistic
+        target_observed = self.observed_score_state[:nactive]
+        null_statistic = (score_linear_term.dot(self.observed_score_state))-(target_linear_term.dot(target_observed))
+
+        (self.target_linear_term, self.target_observed, self.null_statistic) \
+            = (target_linear_term, target_observed, null_statistic)
+        self.nactive = nactive
+
+        #defining the grid on which marginal conditional densities will be evaluated
+        grid_length = 120
+        self.grid = np.linspace(-4, 8, num=grid_length)
+        #s_obs = np.round(self.target_observed, decimals =1)
+
+        print("observed values", target_observed)
+        self.ind_obs = np.zeros(nactive, int)
+        self.norm = np.zeros(nactive)
+        self.h_approx = np.zeros((nactive, self.grid.shape[0]))
+
+        for j in range(nactive):
+            obs = target_observed[j]
+            self.norm[j] = self.var_target[j,j]
+            if obs < self.grid[0]:
+                self.ind_obs[j] = 0
+            elif obs > np.max(self.grid):
+                self.ind_obs[j] = grid_length
+            else:
+                self.ind_obs[j] = np.argmin(np.abs(self.grid-obs))
+
+            self.h_approx[j, :] = self.approx_conditional_prob(j)
+
+
+    def approx_conditional_prob(self, j):
+        h_hat = []
+
+        for i in range(self.grid.shape[0]):
+
+            approx = approximate_conditional_prob_fixedX(self.grid[i], self)
+            h_hat.append(-(approx.minimize2(j, nstep=50)[::-1])[0])
+
+        return np.array(h_hat)
+
+
+    def area_normalized_density(self, j, mean):
+
+        normalizer = 0.
+
+        approx_nonnormalized = []
+        for i in range(self.grid.shape[0]):
+            approx_density = np.exp(-np.true_divide((self.grid[i] - mean) ** 2, 2 * self.norm[j])
+                                    + (self.h_approx[j,:])[i])
+
+            normalizer += approx_density
+
+            approx_nonnormalized.append(approx_density)
+
+        return np.cumsum(np.array(approx_nonnormalized / normalizer))
+
+    def approximate_ci(self, j):
+
+        param_grid = np.round(np.linspace(-5, 10, num=151), decimals=1)
+
+        area = np.zeros(param_grid.shape[0])
+
+        for k in range(param_grid.shape[0]):
+
+            area_vec = self.area_normalized_density(j, param_grid[k])
+            area[k] = area_vec[self.ind_obs[j]]
+
+        region = param_grid[(area >= 0.05) & (area <= 0.95)]
+
+        if region.size > 0:
+            return np.nanmin(region), np.nanmax(region)
+        else:
+            return 0, 0
+
+def test_approximate_ci_fixedX(n=200, p=10, s=5, snr=5, rho=0.1,
+                          lam_frac=1.,
+                          loss='gaussian'):
+
+    from selection.tests.instance import gaussian_instance
+    from selection.randomized.api import randomization
+
+
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1)
+    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    loss = rr.glm.gaussian(X, y)
+
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    # W[0] = 0 # use at least some unpenalized
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    randomization = randomization.isotropic_gaussian((p,), 1.)
+    ci = approximate_conditional_density_fixedX(loss, epsilon, penalty, randomization)
+
+    ci.solve_approx()
+    print("nactive", ci._overall.sum())
+    active_set = np.asarray([i for i in range(p) if ci._overall[i]])
+
+    true_support = np.asarray([i for i in range(p) if i < s])
+
+    nactive = ci.nactive
+
+    print("active set, true_support", active_set, true_support)
+
+    truth = beta[ci._overall]
+
+    print("true coefficients", truth)
+
+    if (set(active_set).intersection(set(true_support)) == set(true_support))== True:
+
+        ci_active_E = np.zeros((nactive, 2))
+        toc = time.time()
+        for j in range(nactive):
+            ci_active_E[j, :] = np.array(ci.approximate_ci(j))
+            print(ci_active_E[j, :])
+        tic = time.time()
+        print('ci time now', tic - toc)
+
+        return active_set, ci_active_E, truth, nactive
+
+    else:
+        return 0
+
+def compute_coverage(p=10):
+
+    niter = 50
+    coverage = np.zeros(p)
+    nsel = np.zeros(p)
+    nerr = 0
+    for iter in range(niter):
+        print("\n")
+        print("iteration", iter)
+        try:
+            test_ci = test_approximate_ci_fixedX()
+            if test_ci != 0:
+                ci_active = test_ci[1]
+                print("ci", ci_active)
+                active_set = test_ci[0]
+                true_val = test_ci[2]
+                nactive = test_ci[3]
+                toc = time.time()
+                for l in range(nactive):
+                    nsel[active_set[l]] += 1
+                    print(true_val[l])
+                    if (ci_active[l,0]<= true_val[l]) and (true_val[l]<= ci_active[l,1]):
+                        coverage[active_set[l]] += 1
+                tic = time.time()
+                print('ci time', tic - toc)
+
+            print(coverage[~np.isnan(coverage)])
+            print(nsel[~np.isnan(nsel)])
+            print('coverage so far',np.true_divide(np.sum(coverage[~np.isnan(coverage)]), np.sum(nsel[~np.isnan(nsel)])))
+
+        except ValueError:
+            nerr +=1
+            print('ignore iteration raising ValueError')
+            continue
+
+    coverage_prop = np.true_divide(coverage, nsel)
+    coverage_prop[coverage_prop == np.inf] = 0
+    coverage_prop = np.nan_to_num(coverage_prop)
+    return coverage_prop, nsel, nerr
+
+
+print(compute_coverage())
\ No newline at end of file
diff --git a/selection/randomized/api.py b/selection/randomized/api.py
index 5355d3cae..c42ca183d 100644
--- a/selection/randomized/api.py
+++ b/selection/randomized/api.py
@@ -1,11 +1,13 @@
-from .multiple_views import multiple_views
+from .query import multiple_queries, query
 
-from .glm import (glm_group_lasso, 
+from .glm import (glm_group_lasso, split_glm_group_lasso,
                   glm_group_lasso_parametric,
-                  glm_greedy_step, 
-                  pairs_bootstrap_glm, 
+                  glm_greedy_step,
+                  glm_threshold_score,
+                  pairs_bootstrap_glm,
                   pairs_inactive_score_glm,
                   glm_nonparametric_bootstrap,
-                  glm_parametric_covariance)
+                  glm_parametric_covariance,
+                  target as glm_target)
 
-from .randomization import randomization
+from .randomization import randomization
\ No newline at end of file
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
new file mode 100644
index 000000000..4d0a9a4ca
--- /dev/null
+++ b/selection/randomized/threshold_score.py
@@ -0,0 +1,120 @@
+import numpy as np
+import regreg.api as rr
+
+from .query import query
+from .M_estimator import restricted_Mest
+
+
+class threshold_score(query):
+    def __init__(self, loss, threshold, randomization, active, inactive, beta_active=None,
+                 solve_args={'min_its': 50, 'tol': 1.e-10}):
+        """
+        penalty is a group_lasso object that assigns weights to groups
+        """
+
+        query.__init__(self, randomization)
+
+        # threshold could be a vector size inactive
+
+        active_bool = np.zeros(loss.shape, np.bool)
+        active_bool[active] = 1
+        active = active_bool
+        inactive = ~active
+
+        if type(threshold) == type(0.):
+            threshold = np.ones(inactive.sum()) * threshold
+
+        self.epsilon = 0.  # for randomized loss
+
+        (self.loss,
+         self.threshold,
+         self.active,
+         self.inactive,
+         self.beta_active,
+         self.randomization,
+         self.solve_args) = (loss,
+                             threshold,
+                             active,
+                             inactive,
+                             beta_active,
+                             randomization,
+                             solve_args)
+
+    def solve(self):
+
+        (loss,
+         threshold,
+         active,
+         inactive,
+         beta_active,
+         randomization) = (self.loss,
+                           self.threshold,
+                           self.active,
+                           self.inactive,
+                           self.beta_active,
+                           self.randomization)
+
+        if beta_active is None:
+            beta_active = self.beta_active = restricted_Mest(self.loss, active, solve_args=self.solve_args)
+
+        self.randomize()
+
+        beta_full = np.zeros(self.loss.shape)
+        beta_full[active] = beta_active
+
+        inactive_score = self.loss.smooth_objective(beta_full, 'grad')[inactive]
+        randomized_score = self.loss.smooth_objective(beta_full, 'grad')[inactive]
+
+        # find the current active group, i.e.
+        # subset of inactive that pass the threshold
+
+        # TODO: make this test use group LASSO
+
+        self.boundary = np.fabs(randomized_score) > threshold
+        self.boundary_signs = np.sign(randomized_score)[self.boundary]
+        self.interior = ~self.boundary
+
+        self.observed_overshoot = self.boundary_signs * (inactive_score[self.boundary] - threshold[self.boundary])
+        self.observed_below_thresh = inactive_score[self.interior]
+        self.observed_score_state = inactive_score
+
+        self.selection_variable = {'boundary_set': self.boundary,
+                                   'boundary_signs': self.boundary_signs}
+
+        self._solved = True
+
+        self.num_opt_var = self.boundary.shape[0]
+
+    def setup_sampler(self):
+
+        # must set observed_opt_state, opt_transform and score_transform
+
+        p = self.boundary.shape[0]  # shorthand
+        self.observed_opt_state = np.zeros(p)
+        self.observed_opt_state[self.boundary] = self.observed_overshoot
+        self.observed_opt_state[self.interior] = self.observed_below_thresh
+
+        _opt_linear_diag = np.ones(p)
+        _opt_linear_diag[self.boundary] = self.boundary_signs
+        _opt_linear_term = np.diag(_opt_linear_diag)
+        _opt_offset = np.zeros(p)
+        _opt_offset[self.boundary] = self.boundary_signs * self.threshold[self.boundary]
+
+        _score_linear_term = -np.identity(p)
+
+        self.opt_transform = (_opt_linear_term, _opt_offset)
+        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
+
+        self._setup = True
+
+    def projection(self, opt_state):
+        """
+        Full projection for Langevin.
+        The state here will be only the state of the optimization variables.
+        for now, groups are singletons
+        """
+        opt_state[self.boundary] = np.maximum(opt_state[self.boundary], 0.)
+        opt_state[self.interior] = np.clip(opt_state[self.interior],
+                                           -self.threshold[self.interior],
+                                           self.threshold[self.interior])
+        return opt_state

From 95bc3fb804811a937da765b3d0f3b35b50aef1a7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c765.SUNet>
Date: Tue, 20 Dec 2016 12:04:05 -0800
Subject: [PATCH 003/617] small change in fixed X lasso

---
 selection/bayesian/fixed_X_ci_via_approx_density.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/bayesian/fixed_X_ci_via_approx_density.py b/selection/bayesian/fixed_X_ci_via_approx_density.py
index e2a35736c..ead226c3c 100644
--- a/selection/bayesian/fixed_X_ci_via_approx_density.py
+++ b/selection/bayesian/fixed_X_ci_via_approx_density.py
@@ -320,7 +320,7 @@ def test_approximate_ci_fixedX(n=200, p=10, s=5, snr=5, rho=0.1,
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
     randomization = randomization.isotropic_gaussian((p,), 1.)
-    ci = approximate_conditional_density_fixedX(loss, epsilon, penalty, randomization)
+    ci = approximate_conditional_density_fixedX(loss, epsilon, penalty, sigma**2 , randomization)
 
     ci.solve_approx()
     print("nactive", ci._overall.sum())

From 7602baa94d39c004c216c7fca6c39c70d5bf037a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c765.SUNet>
Date: Wed, 4 Jan 2017 11:19:09 -0800
Subject: [PATCH 004/617] added cube probability function for laplace
 randomization

---
 .../bayesian/ci_via_approx_density_laplace.py | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 selection/bayesian/ci_via_approx_density_laplace.py

diff --git a/selection/bayesian/ci_via_approx_density_laplace.py b/selection/bayesian/ci_via_approx_density_laplace.py
new file mode 100644
index 000000000..4ef26925e
--- /dev/null
+++ b/selection/bayesian/ci_via_approx_density_laplace.py
@@ -0,0 +1,72 @@
+import time
+import numpy as np
+import regreg.api as rr
+from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
+from scipy.stats import norm
+from selection.randomized.M_estimator import M_estimator
+from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
+
+
+class neg_log_cube_probability_laplace(rr.smooth_atom):
+    def __init__(self,
+                 q, #equals p - E in our case
+                 lagrange,
+                 randomization_scale = 1., #equals the randomization variance in our case
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+
+        self.b = randomization_scale
+        self.lagrange = lagrange
+        self.q = q
+
+        rr.smooth_atom.__init__(self,
+                                (self.q,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=None,
+                                coef=coef)
+
+    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
+
+        arg = self.apply_offset(arg)
+
+        arg_u = (arg + self.lagrange)/self.b
+        arg_l = (arg - self.lagrange)/self.b
+        scaled_lagrange = (2* self.lagrange)/self.b
+
+        ind_arg_1 = np.zeros(self.q, bool)
+        ind_arg_1[(arg_u <0.)] = 1
+        ind_arg_2 = np.zeros(self.q, bool)
+        ind_arg_2[(arg_l >0.)] = 1
+        ind_arg_3 = np.logical_and(~ind_arg_1, ind_arg_2)
+        cube_prob = np.zeros(self.q)
+        cube_prob[ind_arg_1] = np.exp(arg_u[ind_arg_1])/2. - np.exp(arg_l[ind_arg_1])/2.
+        cube_prob[ind_arg_2] = -np.exp(-arg_u[ind_arg_2])/2. + np.exp(-arg_l[ind_arg_2])/2.
+        cube_prob[ind_arg_3] = 1- np.exp(-arg_u[ind_arg_3])/2. - np.exp(arg_l[ind_arg_3])/2.
+        log_cube_prob = -np.log(cube_prob).sum()
+
+        log_cube_grad = np.zeros(self.q)
+        log_cube_grad[ind_arg_1] = 1./self.b
+        log_cube_grad[ind_arg_2] = np.true_divide((np.exp(-scaled_lagrange[ind_arg_2])-1.)/self.b,
+                                                  1. - np.exp(-scaled_lagrange[ind_arg_2]))
+        num_cube_grad = np.true_divide(np.exp(-scaled_lagrange[ind_arg_3]), 2 * self.b) - \
+                        np.true_divide(np.exp((2* arg_l[ind_arg_3])), 2 * self.b)
+        den_cube_grad = np.exp(arg_l[ind_arg_3]) - np.exp(-scaled_lagrange[ind_arg_3])/2. - \
+                        np.true_divide(np.exp((2* arg_l[ind_arg_3])), 2)
+        log_cube_grad[ind_arg_3] = num_cube_grad/den_cube_grad
+
+        if mode == 'func':
+            return self.scale(log_cube_prob)
+        elif mode == 'grad':
+            return self.scale(log_cube_grad)
+        elif mode == 'both':
+            return self.scale(log_cube_prob), self.scale(log_cube_grad)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+
+
+
+
+

From c9918391fb268659123819543591c950f4dc2334 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c765.SUNet>
Date: Wed, 4 Jan 2017 11:39:59 -0800
Subject: [PATCH 005/617] a change in signof gradient

---
 selection/bayesian/ci_via_approx_density_laplace.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/bayesian/ci_via_approx_density_laplace.py b/selection/bayesian/ci_via_approx_density_laplace.py
index 4ef26925e..2af529c5e 100644
--- a/selection/bayesian/ci_via_approx_density_laplace.py
+++ b/selection/bayesian/ci_via_approx_density_laplace.py
@@ -48,8 +48,8 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
 
         log_cube_grad = np.zeros(self.q)
         log_cube_grad[ind_arg_1] = 1./self.b
-        log_cube_grad[ind_arg_2] = np.true_divide((np.exp(-scaled_lagrange[ind_arg_2])-1.)/self.b,
-                                                  1. - np.exp(-scaled_lagrange[ind_arg_2]))
+        log_cube_grad[ind_arg_2] = np.true_divide((np.exp(-scaled_lagrange[ind_arg_2])+ 1.)/self.b,
+                                                  np.exp(-scaled_lagrange[ind_arg_2])-1.)
         num_cube_grad = np.true_divide(np.exp(-scaled_lagrange[ind_arg_3]), 2 * self.b) - \
                         np.true_divide(np.exp((2* arg_l[ind_arg_3])), 2 * self.b)
         den_cube_grad = np.exp(arg_l[ind_arg_3]) - np.exp(-scaled_lagrange[ind_arg_3])/2. - \

From 0e6aeb928c90313bc962a3272d2ced6d9823fd1e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c765.SUNet>
Date: Wed, 4 Jan 2017 11:50:15 -0800
Subject: [PATCH 006/617] more corrections to gradient of cube prob

---
 .../bayesian/ci_via_approx_density_laplace.py     | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/selection/bayesian/ci_via_approx_density_laplace.py b/selection/bayesian/ci_via_approx_density_laplace.py
index 2af529c5e..8e6ec323f 100644
--- a/selection/bayesian/ci_via_approx_density_laplace.py
+++ b/selection/bayesian/ci_via_approx_density_laplace.py
@@ -39,12 +39,12 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         ind_arg_1[(arg_u <0.)] = 1
         ind_arg_2 = np.zeros(self.q, bool)
         ind_arg_2[(arg_l >0.)] = 1
-        ind_arg_3 = np.logical_and(~ind_arg_1, ind_arg_2)
+        ind_arg_3 = np.logical_and(~ind_arg_1, ~ind_arg_2)
         cube_prob = np.zeros(self.q)
         cube_prob[ind_arg_1] = np.exp(arg_u[ind_arg_1])/2. - np.exp(arg_l[ind_arg_1])/2.
         cube_prob[ind_arg_2] = -np.exp(-arg_u[ind_arg_2])/2. + np.exp(-arg_l[ind_arg_2])/2.
         cube_prob[ind_arg_3] = 1- np.exp(-arg_u[ind_arg_3])/2. - np.exp(arg_l[ind_arg_3])/2.
-        log_cube_prob = -np.log(cube_prob).sum()
+        neg_log_cube_prob = -np.log(cube_prob).sum()
 
         log_cube_grad = np.zeros(self.q)
         log_cube_grad[ind_arg_1] = 1./self.b
@@ -53,15 +53,16 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         num_cube_grad = np.true_divide(np.exp(-scaled_lagrange[ind_arg_3]), 2 * self.b) - \
                         np.true_divide(np.exp((2* arg_l[ind_arg_3])), 2 * self.b)
         den_cube_grad = np.exp(arg_l[ind_arg_3]) - np.exp(-scaled_lagrange[ind_arg_3])/2. - \
-                        np.true_divide(np.exp((2* arg_l[ind_arg_3])), 2)
-        log_cube_grad[ind_arg_3] = num_cube_grad/den_cube_grad
+                        np.exp(2* arg_l[ind_arg_3])/2.
+        log_cube_grad[ind_arg_3] = np.true_divide(num_cube_grad,den_cube_grad)
+        neg_log_cube_grad = -log_cube_grad
 
         if mode == 'func':
-            return self.scale(log_cube_prob)
+            return self.scale(neg_log_cube_prob)
         elif mode == 'grad':
-            return self.scale(log_cube_grad)
+            return self.scale(neg_log_cube_grad)
         elif mode == 'both':
-            return self.scale(log_cube_prob), self.scale(log_cube_grad)
+            return self.scale(neg_log_cube_prob), self.scale(neg_log_cube_grad)
         else:
             raise ValueError("mode incorrectly specified")
 

From a93f3d1ec0df7ed79528cd74f49652890db397ae Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c765.SUNet>
Date: Wed, 4 Jan 2017 12:36:14 -0800
Subject: [PATCH 007/617] code for laplace noise re-written

---
 .../bayesian/ci_via_approx_density_laplace.py | 347 ++++++++++++++++++
 1 file changed, 347 insertions(+)

diff --git a/selection/bayesian/ci_via_approx_density_laplace.py b/selection/bayesian/ci_via_approx_density_laplace.py
index 8e6ec323f..62c4fd687 100644
--- a/selection/bayesian/ci_via_approx_density_laplace.py
+++ b/selection/bayesian/ci_via_approx_density_laplace.py
@@ -66,6 +66,353 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         else:
             raise ValueError("mode incorrectly specified")
 
+class approximate_conditional_prob_E(rr.smooth_atom):
+
+    def __init__(self,
+                 t, #point at which density is to computed
+                 approx_density,
+                 coef = 1.,
+                 offset= None,
+                 quadratic= None):
+
+        self.t = t
+        self.AD = approx_density
+        self.q = self.AD.p - self.AD.nactive
+        self.inactive_conjugate = self.active_conjugate = approx_density.randomization.CGF_conjugate
+
+        if self.active_conjugate is None:
+            raise ValueError(
+                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates')
+
+        lagrange = []
+        for key, value in self.AD.penalty.weights.iteritems():
+            lagrange.append(value)
+        lagrange = np.asarray(lagrange)
+
+        self.inactive_lagrange = lagrange[~self.AD._overall]
+        self.active_lagrange = lagrange[self.AD._overall]
+
+        rr.smooth_atom.__init__(self,
+                                (self.AD.nactive,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=self.AD.feasible_point,
+                                coef=coef)
+
+        self.coefs[:] = self.AD.feasible_point
+        self.B_active = self.AD.opt_linear_term[:self.AD.nactive, :self.AD.nactive]
+        self.B_inactive = self.AD.opt_linear_term[self.AD.nactive:, :self.AD.nactive]
+
+        self.nonnegative_barrier = nonnegative_softmax_scaled(self.AD.nactive)
+
+
+    def sel_prob_smooth_objective(self, param, j, mode='both', check_feasibility=False):
+
+        param = self.apply_offset(param)
+        index = np.zeros(self.AD.nactive, bool)
+        index[j] = 1
+        data = np.squeeze(self.t * self.AD.target_linear_term[:, index]) \
+               + self.AD.target_linear_term[:, ~index].dot(self.AD.target_observed[~index])
+
+        offset_active = self.AD.opt_affine_term[:self.AD.nactive] + self.AD.null_statistic[:self.AD.nactive] + data[:self.AD.nactive]
+
+        offset_inactive = self.AD.null_statistic[self.AD.nactive:] + data[self.AD.nactive:]
+
+        active_conj_loss = rr.affine_smooth(self.active_conjugate,
+                                            rr.affine_transform(self.B_active, offset_active))
+
+        cube_obj = neg_log_cube_probability_laplace(self.q, self.inactive_lagrange, randomization_scale = 1.)
+
+        cube_loss = rr.affine_smooth(cube_obj, rr.affine_transform(self.B_inactive, offset_inactive))
+
+        total_loss = rr.smooth_sum([active_conj_loss,
+                                    cube_loss,
+                                    self.nonnegative_barrier])
+
+        if mode == 'func':
+            f = total_loss.smooth_objective(param, 'func')
+            return self.scale(f)
+        elif mode == 'grad':
+            g = total_loss.smooth_objective(param, 'grad')
+            return self.scale(g)
+        elif mode == 'both':
+            f, g = total_loss.smooth_objective(param, 'both')
+            return self.scale(f), self.scale(g)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+    def minimize2(self, j, step=1, nstep=30, tol=1.e-6):
+
+        current = self.coefs
+        current_value = np.inf
+
+        objective = lambda u: self.sel_prob_smooth_objective(u, j, 'func')
+        grad = lambda u: self.sel_prob_smooth_objective(u, j, 'grad')
+
+        for itercount in range(nstep):
+            newton_step = grad(current)
+
+            # make sure proposal is feasible
+
+            count = 0
+            while True:
+                count += 1
+                proposal = current - step * newton_step
+                #print("current proposal and grad", proposal, newton_step)
+                if np.all(proposal > 0):
+                    break
+                step *= 0.5
+                if count >= 40:
+                    #print(proposal)
+                    raise ValueError('not finding a feasible point')
+
+            # make sure proposal is a descent
+
+            count = 0
+            while True:
+                proposal = current - step * newton_step
+                proposed_value = objective(proposal)
+                #print(current_value, proposed_value, 'minimize')
+                if proposed_value <= current_value:
+                    break
+                step *= 0.5
+
+            # stop if relative decrease is small
+
+            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+                current = proposal
+                current_value = proposed_value
+                break
+
+            current = proposal
+            current_value = proposed_value
+
+            if itercount % 4 == 0:
+                step *= 2
+
+        # print('iter', itercount)
+        value = objective(current)
+
+        return current, value
+
+class approximate_conditional_density_E(rr.smooth_atom, M_estimator):
+
+    def __init__(self, loss, epsilon, penalty, randomization,
+                 coef=1.,
+                 offset=None,
+                 quadratic=None,
+                 nstep=10):
+
+        M_estimator.__init__(self, loss, epsilon, penalty, randomization)
+
+        rr.smooth_atom.__init__(self,
+                                (1,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                coef=coef)
+
+    def solve_approx(self):
+
+        self.Msolve()
+        self.feasible_point = np.abs(self.initial_soln[self._overall])
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self._overall,
+                                              beta_full=self._beta_full,
+                                              inactive=~self._overall)[0]
+
+        score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), bootstrap_score)
+
+        nactive = self._overall.sum()
+
+        Sigma_D_T = score_cov[:, :nactive]
+        Sigma_T = score_cov[:nactive, :nactive]
+        Sigma_T_inv = np.linalg.inv(Sigma_T)
+
+        score_linear_term = self.score_transform[0]
+        (self.opt_linear_term, self.opt_affine_term) = self.opt_transform
+
+        # decomposition
+        #print(self.opt_affine_term[nactive:])
+        target_linear_term = (score_linear_term.dot(Sigma_D_T)).dot(Sigma_T_inv)
+
+        # observed target and null statistic
+        target_observed = self.observed_score_state[:nactive]
+        null_statistic = (score_linear_term.dot(self.observed_score_state))-(target_linear_term.dot(target_observed))
+
+        (self.target_linear_term, self.target_observed, self.null_statistic) \
+            = (target_linear_term, target_observed, null_statistic)
+        self.nactive = nactive
+
+        #defining the grid on which marginal conditional densities will be evaluated
+        grid_length = 120
+        self.grid = np.linspace(-4, 8, num=grid_length)
+        #s_obs = np.round(self.target_observed, decimals =1)
+
+        print("observed values", target_observed)
+        self.ind_obs = np.zeros(nactive, int)
+        self.norm = np.zeros(nactive)
+        self.h_approx = np.zeros((nactive, self.grid.shape[0]))
+
+        for j in range(nactive):
+            obs = target_observed[j]
+            self.norm[j] = Sigma_T[j,j]
+            if obs < self.grid[0]:
+                self.ind_obs[j] = 0
+            elif obs > np.max(self.grid):
+                self.ind_obs[j] = grid_length-1
+            else:
+                self.ind_obs[j] = np.argmin(np.abs(self.grid-obs))
+
+            self.h_approx[j, :] = self.approx_conditional_prob(j)
+
+
+    def approx_conditional_prob(self, j):
+        h_hat = []
+
+        for i in range(self.grid.shape[0]):
+
+            approx = approximate_conditional_prob_E(self.grid[i], self)
+            h_hat.append(-(approx.minimize2(j, nstep=50)[::-1])[0])
+
+        return np.array(h_hat)
+
+
+    def area_normalized_density(self, j, mean):
+
+        normalizer = 0.
+
+        approx_nonnormalized = []
+        for i in range(self.grid.shape[0]):
+            approx_density = np.exp(-np.true_divide((self.grid[i] - mean) ** 2, 2 * self.norm[j])
+                                    + (self.h_approx[j,:])[i])
+
+            normalizer += approx_density
+
+            approx_nonnormalized.append(approx_density)
+
+        return np.cumsum(np.array(approx_nonnormalized / normalizer))
+
+    def approximate_ci(self, j):
+
+        param_grid = np.round(np.linspace(-5, 10, num=151), decimals=1)
+
+        area = np.zeros(param_grid.shape[0])
+
+        for k in range(param_grid.shape[0]):
+
+            area_vec = self.area_normalized_density(j, param_grid[k])
+            area[k] = area_vec[self.ind_obs[j]]
+
+        region = param_grid[(area >= 0.05) & (area <= 0.95)]
+
+        if region.size > 0:
+            return np.nanmin(region), np.nanmax(region)
+        else:
+            return 0, 0
+
+def test_approximate_ci_E(n=200, p=10, s=5, snr=5, rho=0.1,
+                          lam_frac=1.,
+                          loss='gaussian'):
+
+    from selection.tests.instance import logistic_instance, gaussian_instance
+    from selection.randomized.api import randomization
+
+    if loss == "gaussian":
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+        loss = rr.glm.gaussian(X, y)
+    elif loss == "logistic":
+        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
+        loss = rr.glm.logistic(X, y)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
+
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    # W[0] = 0 # use at least some unpenalized
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    randomization = randomization.laplace((p,), 1.)
+    ci = approximate_conditional_density_E(loss, epsilon, penalty, randomization)
+
+    ci.solve_approx()
+    print("nactive", ci._overall.sum())
+    active_set = np.asarray([i for i in range(p) if ci._overall[i]])
+
+    true_support = np.asarray([i for i in range(p) if i < s])
+
+    nactive = ci.nactive
+
+    print("active set, true_support", active_set, true_support)
+
+    # truth = np.round((np.linalg.pinv(X_1[:, active])).dot(X_1[:, active].dot(true_beta[active])))
+    truth = beta[ci._overall]
+
+    print("true coefficients", truth)
+
+    if (set(active_set).intersection(set(true_support)) == set(true_support)) == True:
+
+        ci_active_E = np.zeros((nactive, 2))
+        toc = time.time()
+        for j in range(nactive):
+            ci_active_E[j, :] = np.array(ci.approximate_ci(j))
+            print(ci_active_E[j, :])
+        tic = time.time()
+        print('ci time now', tic - toc)
+        # print('ci intervals now', ci_active_E)
+
+        return active_set, ci_active_E, truth, nactive
+
+    else:
+        return 0
+
+
+def compute_coverage(p=10):
+    niter = 100
+    coverage = np.zeros(p)
+    nsel = np.zeros(p)
+    nerr = 0
+    for iter in range(niter):
+        print("\n")
+        print("iteration", iter)
+        try:
+            test_ci = test_approximate_ci_E()
+            if test_ci != 0:
+                ci_active = test_ci[1]
+                print("ci", ci_active)
+                active_set = test_ci[0]
+                true_val = test_ci[2]
+                nactive = test_ci[3]
+                toc = time.time()
+                for l in range(nactive):
+                    nsel[active_set[l]] += 1
+                    print(true_val[l])
+                    if (ci_active[l, 0] <= true_val[l]) and (true_val[l] <= ci_active[l, 1]):
+                        coverage[active_set[l]] += 1
+                tic = time.time()
+                print('ci time', tic - toc)
+
+            print(coverage[~np.isnan(coverage)])
+            print(nsel[~np.isnan(nsel)])
+            print(
+            'coverage so far', np.true_divide(np.sum(coverage[~np.isnan(coverage)]), np.sum(nsel[~np.isnan(nsel)])))
+
+        except ValueError:
+            nerr += 1
+            print('ignore iteration raising ValueError')
+            continue
+
+    coverage_prop = np.true_divide(coverage, nsel)
+    coverage_prop[coverage_prop == np.inf] = 0
+    coverage_prop = np.nan_to_num(coverage_prop)
+    return coverage_prop, nsel, nerr
+
+
+print(compute_coverage())
 
 
 

From 3f56a8c7c9faacde2e7e2109410078bb4453f748 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c765.SUNet>
Date: Wed, 4 Jan 2017 14:25:35 -0800
Subject: [PATCH 008/617] updated M.estimator code

---
 selection/randomized/M_estimator.py | 99 ++++++++++++++---------------
 1 file changed, 49 insertions(+), 50 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index e07ccfa9f..170f9306d 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -4,9 +4,9 @@
 from .query import query
 from .randomization import split
 
-class M_estimator(query):
 
-    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
+class M_estimator(query):
+    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
         """
         Fits the logistic regression to a candidate active set, without penalty.
         Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
@@ -41,7 +41,7 @@ def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':
 
     # Methods needed for subclassing a query
 
-    def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
+    def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
 
         self.randomize()
 
@@ -77,7 +77,8 @@ def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
 
         for i, g in enumerate(groups):
             group = penalty.groups == g
-            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) > 1.e-6 * penalty.weights[g]) and (penalty.weights[g] > 0)
+            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) > 1.e-6 * penalty.weights[g]) and (
+            penalty.weights[g] > 0)
             unpenalized_groups[i] = (penalty.weights[g] == 0)
             if active_groups[i]:
                 active[group] = True
@@ -97,15 +98,15 @@ def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
         self._active_groups = np.array(active_groups, np.bool)
         self._unpenalized_groups = np.array(unpenalized_groups, np.bool)
 
-        self.selection_variable = {'groups':self._active_groups,
-                                   'variables':self._overall,
-                                   'directions':self._active_directions}
+        self.selection_variable = {'groups': self._active_groups,
+                                   'variables': self._overall,
+                                   'directions': self._active_directions}
 
         # initial state for opt variables
 
         initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
                             self.randomized_loss.quadratic.objective(self.initial_soln, 'grad'))
-                          # the quadratic of a smooth_atom is not included in computing the smooth_objective
+        # the quadratic of a smooth_atom is not included in computing the smooth_objective
 
         initial_subgrad = initial_subgrad[self._inactive]
         initial_unpenalized = self.initial_soln[self._unpenalized]
@@ -158,7 +159,7 @@ def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
 
         # form linear part
 
-        self.num_opt_var = p = loss.shape[0] # shorthand for p
+        self.num_opt_var = p = loss.shape[0]  # shorthand for p
 
         # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
         # E for active
@@ -171,42 +172,45 @@ def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
         # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
 
         Mest_slice = slice(0, overall.sum())
-        _Mest_hessian = _hessian[:,overall]
-        _score_linear_term[:,Mest_slice] = -_Mest_hessian / _sqrt_scaling
+        _Mest_hessian = _hessian[:, overall]
+        _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
 
         # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
 
         null_idx = range(overall.sum(), p)
         inactive_idx = np.nonzero(inactive)[0]
         for _i, _n in zip(inactive_idx, null_idx):
-            _score_linear_term[_i,_n] = -_sqrt_scaling
+            _score_linear_term[_i, _n] = -_sqrt_scaling
 
         # c_E piece
 
         scaling_slice = slice(0, active_groups.sum())
-        if len(active_directions)==0:
-            _opt_hessian=0
+        if len(active_directions) == 0:
+            _opt_hessian = 0
         else:
             _opt_hessian = (_hessian + epsilon * np.identity(p)).dot(active_directions)
-        _opt_linear_term[:,scaling_slice] = _opt_hessian / _sqrt_scaling
+        _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling
 
         self.observed_opt_state[scaling_slice] *= _sqrt_scaling
 
         # beta_U piece
 
         unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
-        unpenalized_directions = np.identity(p)[:,unpenalized]
+        unpenalized_directions = np.identity(p)[:, unpenalized]
         if unpenalized.sum():
-            _opt_linear_term[:,unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
+            _opt_linear_term[:, unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(
+                unpenalized_directions) / _sqrt_scaling
 
         self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling
 
         # subgrad piece
 
-        subgrad_idx = range(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
-        subgrad_slice = slice(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
+        subgrad_idx = range(active_groups.sum() + unpenalized.sum(),
+                            active_groups.sum() + inactive.sum() + unpenalized.sum())
+        subgrad_slice = slice(active_groups.sum() + unpenalized.sum(),
+                              active_groups.sum() + inactive.sum() + unpenalized.sum())
         for _i, _s in zip(inactive_idx, subgrad_idx):
-            _opt_linear_term[_i,_s] = _sqrt_scaling
+            _opt_linear_term[_i, _s] = _sqrt_scaling
 
         self.observed_opt_state[subgrad_slice] /= _sqrt_scaling
 
@@ -218,7 +222,7 @@ def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
         for i, g in enumerate(groups):
             if active_groups[i]:
                 group = penalty.groups == g
-                _opt_affine_term[group] = active_directions[:,idx][group] * penalty.weights[g]
+                _opt_affine_term[group] = active_directions[:, idx][group] * penalty.weights[g]
                 idx += 1
 
         # two transforms that encode score and optimization
@@ -227,11 +231,7 @@ def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
         # later, we will modify `score_transform`
         # in `linear_decomposition`
 
-        _opt_linear_term = np.concatenate((_opt_linear_term[self._overall,:], _opt_linear_term[~self._overall,:]), 0)
-        _opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]),0)
         self.opt_transform = (_opt_linear_term, _opt_affine_term)
-
-        _score_linear_term = np.concatenate((_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 
         # now store everything needed for the projections
@@ -243,7 +243,8 @@ def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
         # weights are scaled here because the linear terms scales them by scaling
 
         new_groups = penalty.groups[inactive]
-        new_weights = dict([(g, penalty.weights[g] / _sqrt_scaling) for g in penalty.weights.keys() if g in np.unique(new_groups)])
+        new_weights = dict(
+            [(g, penalty.weights[g] / _sqrt_scaling) for g in penalty.weights.keys() if g in np.unique(new_groups)])
 
         # we form a dual group lasso object
         # to do the projection
@@ -253,7 +254,7 @@ def Msolve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
 
         self._setup = True
 
-    def setup_sampler(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
+    def setup_sampler(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
         pass
 
     def projection(self, opt_state):
@@ -265,19 +266,18 @@ def projection(self, opt_state):
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-
         if ('subgradient' not in self.selection_variable and
-            'scaling' not in self.selection_variable): # have not conditioned on any thing else
-            new_state = opt_state.copy() # not really necessary to copy
+                    'scaling' not in self.selection_variable):  # have not conditioned on any thing else
+            new_state = opt_state.copy()  # not really necessary to copy
             new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
             new_state[self.subgrad_slice] = self.group_lasso_dual.bound_prox(opt_state[self.subgrad_slice])
         elif ('subgradient' not in self.selection_variable and
-              'scaling' in self.selection_variable): # conditioned on the initial scalings
-                                                     # only the subgradient in opt_state
+                      'scaling' in self.selection_variable):  # conditioned on the initial scalings
+            # only the subgradient in opt_state
             new_state = self.group_lasso_dual.bound_prox(opt_state)
         elif ('subgradient' in self.selection_variable and
-              'scaling' not in self.selection_variable): # conditioned on the subgradient
-                                                         # only the scaling in opt_state
+                      'scaling' not in self.selection_variable):  # conditioned on the subgradient
+            # only the scaling in opt_state
             new_state = np.maximum(opt_state, 0)
         else:
             new_state = opt_state
@@ -294,8 +294,8 @@ def condition_on_subgradient(self):
 
         opt_linear, opt_offset = self.opt_transform
 
-        new_offset = opt_linear[:,self.subgrad_slice].dot(self.observed_opt_state[self.subgrad_slice]) + opt_offset
-        new_linear = opt_linear[:,self.scaling_slice]
+        new_offset = opt_linear[:, self.subgrad_slice].dot(self.observed_opt_state[self.subgrad_slice]) + opt_offset
+        new_linear = opt_linear[:, self.scaling_slice]
 
         self.opt_transform = (new_linear, new_offset)
 
@@ -319,8 +319,8 @@ def condition_on_scalings(self):
 
         opt_linear, opt_offset = self.opt_transform
 
-        new_offset = opt_linear[:,self.scaling_slice].dot(self.observed_opt_state[self.scaling_slice]) + opt_offset
-        new_linear = opt_linear[:,self.subgrad_slice]
+        new_offset = opt_linear[:, self.scaling_slice].dot(self.observed_opt_state[self.scaling_slice]) + opt_offset
+        new_linear = opt_linear[:, self.subgrad_slice]
 
         self.opt_transform = (new_linear, new_offset)
 
@@ -335,25 +335,24 @@ def condition_on_scalings(self):
         self.num_opt_var = new_linear.shape[1]
 
 
-
-def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
-
+def restricted_Mest(Mest_loss, active, solve_args={'min_its': 50, 'tol': 1.e-10}):
     X, Y = Mest_loss.data
 
     if Mest_loss._is_transform:
-        raise NotImplementedError('to fit restricted model, X must be an ndarray or scipy.sparse; general transforms not implemented')
-    X_restricted = X[:,active]
+        raise NotImplementedError(
+            'to fit restricted model, X must be an ndarray or scipy.sparse; general transforms not implemented')
+    X_restricted = X[:, active]
     loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
     beta_E = loss_restricted.solve(**solve_args)
 
     return beta_E
 
-class M_estimator_split(M_estimator):
 
-    def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10}):
+class M_estimator_split(M_estimator):
+    def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its': 50, 'tol': 1.e-10}):
         total_size = loss.saturated_loss.shape[0]
         self.randomization = split(loss.shape, subsample_size, total_size)
-        M_estimator.__init__(self,loss, epsilon, penalty, self.randomization, solve_args=solve_args)
+        M_estimator.__init__(self, loss, epsilon, penalty, self.randomization, solve_args=solve_args)
 
         total_size = loss.saturated_loss.shape[0]
         if subsample_size > total_size:
@@ -370,9 +369,9 @@ def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B
         # now we need to estimate covariance of
         # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)
 
-        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0] # shorthand
+        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0]  # shorthand
 
-        from .glm import pairs_bootstrap_score # need to correct these imports!!!
+        from .glm import pairs_bootstrap_score  # need to correct these imports!!!
 
         bootstrap_score = pairs_bootstrap_score(self.loss,
                                                 self._overall,
@@ -396,8 +395,8 @@ def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B
 
         def subsample_diff(m, n, indices):
             subsample = np.random.choice(indices, size=m, replace=False)
-            full_score = bootstrap_score(indices) # a sum of n terms
-            randomized_score = bootstrap_score_split(subsample) # a sum of m terms
+            full_score = bootstrap_score(indices)  # a sum of n terms
+            randomized_score = bootstrap_score_split(subsample)  # a sum of m terms
             return full_score - randomized_score * inv_frac
 
         first_moment = np.zeros(p)

From 8d399abd8de2601a65978683f3707dab7714ca4f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22ca02.SUNet>
Date: Tue, 10 Jan 2017 13:37:13 -0800
Subject: [PATCH 009/617] restructured the files and made changes in approx_ci
 folder

---
 selection/approx_ci/__init__.py              |   0
 selection/approx_ci/api.py                   |   0
 selection/approx_ci/ci_via_approx_density.py | 346 +++++++++++++++++++
 selection/approx_ci/estimator_approx.py      | 114 ++++++
 selection/randomized/M_estimator.py          | 103 +++---
 selection/randomized/glm.py                  |   2 +-
 selection/randomized/threshold_score.py      |  15 +-
 7 files changed, 522 insertions(+), 58 deletions(-)
 create mode 100644 selection/approx_ci/__init__.py
 create mode 100644 selection/approx_ci/api.py
 create mode 100644 selection/approx_ci/ci_via_approx_density.py
 create mode 100644 selection/approx_ci/estimator_approx.py

diff --git a/selection/approx_ci/__init__.py b/selection/approx_ci/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/approx_ci/api.py b/selection/approx_ci/api.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
new file mode 100644
index 000000000..37130df51
--- /dev/null
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -0,0 +1,346 @@
+import numpy as np
+import regreg.api as rr
+from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
+from scipy.stats import norm
+
+def myround(a, decimals=1):
+    a_x = np.round(a, decimals=1)* 10.
+    rem = np.zeros(a.shape[0], bool)
+    rem[(np.remainder(a_x, 2) == 1)] = 1
+    a_x[rem] = a_x[rem] + 1.
+    return a_x/10.
+
+
+class neg_log_cube_probability_laplace(rr.smooth_atom):
+    def __init__(self,
+                 q, #equals p - E in our case
+                 lagrange,
+                 randomization_scale = 1., #equals the randomization variance in our case
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+
+        self.b = randomization_scale
+        self.lagrange = lagrange
+        self.q = q
+
+        rr.smooth_atom.__init__(self,
+                                (self.q,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=None,
+                                coef=coef)
+
+    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
+
+        arg = self.apply_offset(arg)
+
+        arg_u = (arg + self.lagrange)/self.b
+        arg_l = (arg - self.lagrange)/self.b
+        scaled_lagrange = (2* self.lagrange)/self.b
+
+        ind_arg_1 = np.zeros(self.q, bool)
+        ind_arg_1[(arg_u <0.)] = 1
+        ind_arg_2 = np.zeros(self.q, bool)
+        ind_arg_2[(arg_l >0.)] = 1
+        ind_arg_3 = np.logical_and(~ind_arg_1, ~ind_arg_2)
+        cube_prob = np.zeros(self.q)
+        cube_prob[ind_arg_1] = np.exp(arg_u[ind_arg_1])/2. - np.exp(arg_l[ind_arg_1])/2.
+        cube_prob[ind_arg_2] = -np.exp(-arg_u[ind_arg_2])/2. + np.exp(-arg_l[ind_arg_2])/2.
+        cube_prob[ind_arg_3] = 1- np.exp(-arg_u[ind_arg_3])/2. - np.exp(arg_l[ind_arg_3])/2.
+        neg_log_cube_prob = -np.log(cube_prob).sum()
+
+        log_cube_grad = np.zeros(self.q)
+        log_cube_grad[ind_arg_1] = 1./self.b
+        log_cube_grad[ind_arg_2] = np.true_divide((np.exp(-scaled_lagrange[ind_arg_2])+ 1.)/self.b,
+                                                  np.exp(-scaled_lagrange[ind_arg_2])-1.)
+        num_cube_grad = np.true_divide(np.exp(-scaled_lagrange[ind_arg_3]), 2 * self.b) - \
+                        np.true_divide(np.exp((2* arg_l[ind_arg_3])), 2 * self.b)
+        den_cube_grad = np.exp(arg_l[ind_arg_3]) - np.exp(-scaled_lagrange[ind_arg_3])/2. - \
+                        np.exp(2* arg_l[ind_arg_3])/2.
+        log_cube_grad[ind_arg_3] = np.true_divide(num_cube_grad,den_cube_grad)
+        neg_log_cube_grad = -log_cube_grad
+
+        if mode == 'func':
+            return self.scale(neg_log_cube_prob)
+        elif mode == 'grad':
+            return self.scale(neg_log_cube_grad)
+        elif mode == 'both':
+            return self.scale(neg_log_cube_prob), self.scale(neg_log_cube_grad)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+
+class neg_log_cube_probability(rr.smooth_atom):
+    def __init__(self,
+                 q, #equals p - E in our case
+                 lagrange,
+                 randomization_scale = 1., #equals the randomization variance in our case
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+
+        self.randomization_scale = randomization_scale
+        self.lagrange = lagrange
+        self.q = q
+
+        rr.smooth_atom.__init__(self,
+                                (self.q,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=None,
+                                coef=coef)
+
+    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
+
+        arg = self.apply_offset(arg)
+
+        arg_u = (arg + self.lagrange)/self.randomization_scale
+        arg_l = (arg - self.lagrange)/self.randomization_scale
+        prod_arg = np.exp(-(2. * self.lagrange * arg)/(self.randomization_scale**2))
+        neg_prod_arg = np.exp((2. * self.lagrange * arg)/(self.randomization_scale**2))
+        cube_prob = norm.cdf(arg_u) - norm.cdf(arg_l)
+        log_cube_prob = -np.log(cube_prob).sum()
+        threshold = 10 ** -10
+        indicator = np.zeros(self.q, bool)
+        indicator[(cube_prob > threshold)] = 1
+        positive_arg = np.zeros(self.q, bool)
+        positive_arg[(arg>0)] = 1
+        pos_index = np.logical_and(positive_arg, ~indicator)
+        neg_index = np.logical_and(~positive_arg, ~indicator)
+        log_cube_grad = np.zeros(self.q)
+        log_cube_grad[indicator] = (np.true_divide(-norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
+                                        cube_prob[indicator]))/self.randomization_scale
+
+        log_cube_grad[pos_index] = ((-1. + prod_arg[pos_index])/
+                                     ((prod_arg[pos_index]/arg_u[pos_index])-
+                                      (1./arg_l[pos_index])))/self.randomization_scale
+
+        log_cube_grad[neg_index] = ((arg_u[neg_index] -(arg_l[neg_index]*neg_prod_arg[neg_index]))
+                                    /self.randomization_scale)/(1.- neg_prod_arg[neg_index])
+
+
+        if mode == 'func':
+            return self.scale(log_cube_prob)
+        elif mode == 'grad':
+            return self.scale(log_cube_grad)
+        elif mode == 'both':
+            return self.scale(log_cube_prob), self.scale(log_cube_grad)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+
+class approximate_conditional_prob(rr.smooth_atom):
+
+    def __init__(self,
+                 t, #point at which density is to computed
+                 map,
+                 coef = 1.,
+                 offset= None,
+                 quadratic= None):
+
+        self.t = t
+        self.map = map
+        self.q = map.p - map.nactive
+        self.inactive_conjugate = self.active_conjugate = map.randomization.CGF_conjugate
+
+        if self.active_conjugate is None:
+            raise ValueError(
+                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates')
+
+        self.inactive_lagrange = self.map.inactive_lagrange
+
+        rr.smooth_atom.__init__(self,
+                                (map.nactive,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=self.map.feasible_point,
+                                coef=coef)
+
+        self.coefs[:] = map.feasible_point
+
+        self.nonnegative_barrier = nonnegative_softmax_scaled(self.map.nactive)
+
+
+    def sel_prob_smooth_objective(self, param, mode='both', check_feasibility=False):
+
+        param = self.apply_offset(param)
+
+        data = np.squeeze(self.t *  self.map.A)
+
+        offset_active = self.map.offset_active + data[:self.map.nactive]
+        offset_inactive = self.map.offset_inactive + data[self.map.nactive:]
+
+        active_conj_loss = rr.affine_smooth(self.active_conjugate,
+                                            rr.affine_transform(self.map.B_active, offset_active))
+
+        if self.map.randomizer == 'laplace':
+            cube_obj = neg_log_cube_probability_laplace(self.q, self.inactive_lagrange, randomization_scale = 1.)
+        elif self.map.randomizer == 'gaussian':
+            cube_obj = neg_log_cube_probability(self.q, self.inactive_lagrange, randomization_scale = 1.)
+
+        cube_loss = rr.affine_smooth(cube_obj, rr.affine_transform(self.map.B_inactive, offset_inactive))
+
+        total_loss = rr.smooth_sum([active_conj_loss,
+                                    cube_loss,
+                                    self.nonnegative_barrier])
+
+        if mode == 'func':
+            f = total_loss.smooth_objective(param, 'func')
+            return self.scale(f)
+        elif mode == 'grad':
+            g = total_loss.smooth_objective(param, 'grad')
+            return self.scale(g)
+        elif mode == 'both':
+            f, g = total_loss.smooth_objective(param, 'both')
+            return self.scale(f), self.scale(g)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+    def minimize2(self, step=1, nstep=30, tol=1.e-6):
+
+        current = self.coefs
+        current_value = np.inf
+
+        objective = lambda u: self.sel_prob_smooth_objective(u, 'func')
+        grad = lambda u: self.sel_prob_smooth_objective(u, 'grad')
+
+        for itercount in range(nstep):
+            newton_step = grad(current)
+
+            # make sure proposal is feasible
+
+            count = 0
+            while True:
+                count += 1
+                proposal = current - step * newton_step
+                #print("current proposal and grad", proposal, newton_step)
+                if np.all(proposal > 0):
+                    break
+                step *= 0.5
+                if count >= 40:
+                    #print(proposal)
+                    raise ValueError('not finding a feasible point')
+
+            # make sure proposal is a descent
+
+            count = 0
+            while True:
+                proposal = current - step * newton_step
+                proposed_value = objective(proposal)
+                #print(current_value, proposed_value, 'minimize')
+                if proposed_value <= current_value:
+                    break
+                step *= 0.5
+
+            # stop if relative decrease is small
+
+            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+                current = proposal
+                current_value = proposed_value
+                break
+
+            current = proposal
+            current_value = proposed_value
+
+            if itercount % 4 == 0:
+                step *= 2
+
+        # print('iter', itercount)
+        value = objective(current)
+
+        return current, value
+
+class approximate_conditional_density(rr.smooth_atom):
+
+    def __init__(self, sel_alg,
+                       coef=1.,
+                       offset=None,
+                       quadratic=None,
+                       nstep=10):
+
+        self.sel_alg = sel_alg
+
+        rr.smooth_atom.__init__(self,
+                                (1,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                coef=coef)
+
+        self.target_observed = self.sel_alg.target_observed
+        self.nactive = self.target_observed.shape[0]
+        self.target_cov = self.sel_alg.target_cov
+
+    def solve_approx(self):
+
+        #defining the grid on which marginal conditional densities will be evaluated
+        grid_length = 201
+        self.grid = np.linspace(-5, 15, num=grid_length)
+        #self.grid = np.linspace(-5*np.amax(np.absolute(target_observed)), 5*np.amax(np.absolute(target_observed)), num=grid_length)
+        #s_obs = np.round(self.target_observed, decimals =1)
+
+        print("observed values", self.target_observed)
+        self.ind_obs = np.zeros(self.nactive, int)
+        self.norm = np.zeros(self.nactive)
+        self.h_approx = np.zeros((self.nactive, self.grid.shape[0]))
+
+        for j in range(self.nactive):
+            obs = self.target_observed[j]
+            self.norm[j] = self.target_cov[j,j]
+            if obs < self.grid[0]:
+                self.ind_obs[j] = 0
+            elif obs > np.max(self.grid):
+                self.ind_obs[j] = grid_length-1
+            else:
+                self.ind_obs[j] = np.argmin(np.abs(self.grid-obs))
+            self.h_approx[j, :] = self.approx_conditional_prob(j)
+
+
+    def approx_conditional_prob(self, j):
+        h_hat = []
+
+        self.sel_alg.setup_map(j)
+
+        for i in range(self.grid.shape[0]):
+
+            approx = approximate_conditional_prob(self.grid[i], self.sel_alg)
+            h_hat.append(-(approx.minimize2(j, nstep=50)[::-1])[0])
+
+        return np.array(h_hat)
+
+    def area_normalized_density(self, j, mean):
+
+        normalizer = 0.
+        approx_nonnormalized = []
+
+        for i in range(self.grid.shape[0]):
+            approx_density = np.exp(-np.true_divide((self.grid[i] - mean) ** 2, 2 * self.norm[j])
+                                    + (self.h_approx[j,:])[i])
+            normalizer += approx_density
+            approx_nonnormalized.append(approx_density)
+
+        return np.cumsum(np.array(approx_nonnormalized / normalizer))
+
+    def approximate_ci(self, j):
+
+        grid_length = 201
+        #param_grid = np.linspace(-5*np.amax(np.absolute(self.target_observed)), 5*np.amax(np.absolute(self.target_observed)), num=grid_length)
+        param_grid = np.linspace(-5, 15, num=201)
+        area = np.zeros(param_grid.shape[0])
+
+        for k in range(param_grid.shape[0]):
+            area_vec = self.area_normalized_density(j, param_grid[k])
+            area[k] = area_vec[self.ind_obs[j]]
+
+        region = param_grid[(area >= 0.05) & (area <= 0.95)]
+        if region.size > 0:
+            return np.nanmin(region), np.nanmax(region)
+        else:
+            return 0, 0
+
+    def approximate_pvalue(self, j, param):
+
+        area_vec = self.area_normalized_density(j, param)
+        area = area_vec[self.ind_obs[j]]
+
+        return 2*min(area, 1-area)
\ No newline at end of file
diff --git a/selection/approx_ci/estimator_approx.py b/selection/approx_ci/estimator_approx.py
new file mode 100644
index 000000000..5c63e8147
--- /dev/null
+++ b/selection/approx_ci/estimator_approx.py
@@ -0,0 +1,114 @@
+import numpy as np
+from selection.randomized.M_estimator import M_estimator
+from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
+
+from selection.randomized.threshold_score import threshold_score
+
+class M_estimator_approx(M_estimator):
+
+    def __init__(self, loss, epsilon, penalty, randomization, randomizer):
+        M_estimator.__init__(self, loss, epsilon, penalty, randomization)
+        self.randomizer = randomizer
+
+    def solve_approx(self):
+        self.solve()
+        (_opt_linear_term, _opt_affine_term) = self.opt_transform
+        self._opt_linear_term = np.concatenate(
+            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
+        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]), 0)
+        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
+
+        (_score_linear_term, _) = self.score_transform
+        self._score_linear_term = np.concatenate(
+            (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
+        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
+        self.feasible_point = np.abs(self.initial_soln[self._overall])
+        lagrange = []
+        for key, value in self.penalty.weights.iteritems():
+            lagrange.append(value)
+        lagrange = np.asarray(lagrange)
+        self.inactive_lagrange = lagrange[~self._overall]
+
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self._overall,
+                                              beta_full=self._beta_full,
+                                              inactive=~self._overall)[0]
+
+        score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), bootstrap_score)
+        nactive = self._overall.sum()
+        self.score_target_cov = score_cov[:, :nactive]
+        self.target_cov = score_cov[:nactive, :nactive]
+        self.target_observed = self.observed_score_state[:nactive]
+        self.nactive = nactive
+
+        self.B_active = self._opt_linear_term[:nactive, :nactive]
+        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
+
+
+    def setup_map(self, j):
+
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+
+        self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
+        self.offset_inactive = self.null_statistic[self.nactive:]
+
+
+class threshold_score_approx(threshold_score):
+
+    def __init__(self, loss,
+                 threshold,
+                 randomization,
+                 active_bool,
+                 inactive_bool,
+                 randomizer):
+
+        threshold_score.__init__(self, loss, threshold, randomization, active_bool, inactive_bool)
+        self.randomizer = randomizer
+
+    def solve_approx(self):
+        self.solve()
+        self.setup_sampler()
+        self.feasible_point = self.observed_opt_state[self.boundary]
+        (_opt_linear_term, _opt_offset) = self.opt_transform
+        self._opt_linear_term = np.concatenate((_opt_linear_term[self.boundary, :], _opt_linear_term[self.interior, :]),
+                                               0)
+        self._opt_affine_term = np.concatenate((_opt_offset[self.boundary], _opt_offset[self.interior]), 0)
+        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
+
+        (_score_linear_term, _) = self.score_transform
+        self._score_linear_term = np.concatenate(
+            (_score_linear_term[self.boundary, :], _score_linear_term[self.interior, :]), 0)
+        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
+        self._overall = self.boundary
+        self.inactive_lagrange = self.threshold[0] * np.ones(np.sum(~self.boundary))
+
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self._overall,
+                                              beta_full=self._beta_full,
+                                              inactive=~self._overall)[0]
+
+        score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), bootstrap_score)
+        nactive = self._overall.sum()
+        self.score_target_cov = score_cov[:, :nactive]
+        self.target_cov = score_cov[:nactive, :nactive]
+        self.target_observed = self.observed_score_state[:nactive]
+        self.nactive = nactive
+
+        self.B_active = self._opt_linear_term[:nactive, :nactive]
+        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
+
+
+    def setup_map(self, j):
+
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+
+        self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
+        self.offset_inactive = self.null_statistic[self.nactive:]
diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 170f9306d..8fbedb9d2 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -4,9 +4,9 @@
 from .query import query
 from .randomization import split
 
-
 class M_estimator(query):
-    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
+
+    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
         """
         Fits the logistic regression to a candidate active set, without penalty.
         Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
@@ -41,7 +41,7 @@ def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':
 
     # Methods needed for subclassing a query
 
-    def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
+    def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
 
         self.randomize()
 
@@ -77,8 +77,7 @@ def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
 
         for i, g in enumerate(groups):
             group = penalty.groups == g
-            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) > 1.e-6 * penalty.weights[g]) and (
-            penalty.weights[g] > 0)
+            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) > 1.e-6 * penalty.weights[g]) and (penalty.weights[g] > 0)
             unpenalized_groups[i] = (penalty.weights[g] == 0)
             if active_groups[i]:
                 active[group] = True
@@ -98,15 +97,15 @@ def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
         self._active_groups = np.array(active_groups, np.bool)
         self._unpenalized_groups = np.array(unpenalized_groups, np.bool)
 
-        self.selection_variable = {'groups': self._active_groups,
-                                   'variables': self._overall,
-                                   'directions': self._active_directions}
+        self.selection_variable = {'groups':self._active_groups,
+                                   'variables':self._overall,
+                                   'directions':self._active_directions}
 
         # initial state for opt variables
 
         initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
                             self.randomized_loss.quadratic.objective(self.initial_soln, 'grad'))
-        # the quadratic of a smooth_atom is not included in computing the smooth_objective
+                          # the quadratic of a smooth_atom is not included in computing the smooth_objective
 
         initial_subgrad = initial_subgrad[self._inactive]
         initial_unpenalized = self.initial_soln[self._unpenalized]
@@ -159,7 +158,7 @@ def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
 
         # form linear part
 
-        self.num_opt_var = p = loss.shape[0]  # shorthand for p
+        self.num_opt_var = p = loss.shape[0] # shorthand for p
 
         # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
         # E for active
@@ -172,45 +171,42 @@ def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
         # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
 
         Mest_slice = slice(0, overall.sum())
-        _Mest_hessian = _hessian[:, overall]
-        _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
+        _Mest_hessian = _hessian[:,overall]
+        _score_linear_term[:,Mest_slice] = -_Mest_hessian / _sqrt_scaling
 
         # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
 
         null_idx = range(overall.sum(), p)
         inactive_idx = np.nonzero(inactive)[0]
         for _i, _n in zip(inactive_idx, null_idx):
-            _score_linear_term[_i, _n] = -_sqrt_scaling
+            _score_linear_term[_i,_n] = -_sqrt_scaling
 
         # c_E piece
 
         scaling_slice = slice(0, active_groups.sum())
-        if len(active_directions) == 0:
-            _opt_hessian = 0
+        if len(active_directions)==0:
+            _opt_hessian=0
         else:
             _opt_hessian = (_hessian + epsilon * np.identity(p)).dot(active_directions)
-        _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling
+        _opt_linear_term[:,scaling_slice] = _opt_hessian / _sqrt_scaling
 
         self.observed_opt_state[scaling_slice] *= _sqrt_scaling
 
         # beta_U piece
 
         unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
-        unpenalized_directions = np.identity(p)[:, unpenalized]
+        unpenalized_directions = np.identity(p)[:,unpenalized]
         if unpenalized.sum():
-            _opt_linear_term[:, unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(
-                unpenalized_directions) / _sqrt_scaling
+            _opt_linear_term[:,unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
 
         self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling
 
         # subgrad piece
 
-        subgrad_idx = range(active_groups.sum() + unpenalized.sum(),
-                            active_groups.sum() + inactive.sum() + unpenalized.sum())
-        subgrad_slice = slice(active_groups.sum() + unpenalized.sum(),
-                              active_groups.sum() + inactive.sum() + unpenalized.sum())
+        subgrad_idx = range(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
+        subgrad_slice = slice(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
         for _i, _s in zip(inactive_idx, subgrad_idx):
-            _opt_linear_term[_i, _s] = _sqrt_scaling
+            _opt_linear_term[_i,_s] = _sqrt_scaling
 
         self.observed_opt_state[subgrad_slice] /= _sqrt_scaling
 
@@ -222,18 +218,18 @@ def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
         for i, g in enumerate(groups):
             if active_groups[i]:
                 group = penalty.groups == g
-                _opt_affine_term[group] = active_directions[:, idx][group] * penalty.weights[g]
+                _opt_affine_term[group] = active_directions[:,idx][group] * penalty.weights[g]
                 idx += 1
 
         # two transforms that encode score and optimization
         # variable roles
 
-        # later, we will modify `score_transform`
-        # in `linear_decomposition`
-
         self.opt_transform = (_opt_linear_term, _opt_affine_term)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 
+        # later, we will modify `score_transform`
+        # in `linear_decomposition`
+
         # now store everything needed for the projections
         # the projection acts only on the optimization
         # variables
@@ -243,8 +239,7 @@ def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
         # weights are scaled here because the linear terms scales them by scaling
 
         new_groups = penalty.groups[inactive]
-        new_weights = dict(
-            [(g, penalty.weights[g] / _sqrt_scaling) for g in penalty.weights.keys() if g in np.unique(new_groups)])
+        new_weights = dict([(g, penalty.weights[g] / _sqrt_scaling) for g in penalty.weights.keys() if g in np.unique(new_groups)])
 
         # we form a dual group lasso object
         # to do the projection
@@ -254,7 +249,7 @@ def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
 
         self._setup = True
 
-    def setup_sampler(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):
+    def setup_sampler(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
         pass
 
     def projection(self, opt_state):
@@ -266,18 +261,19 @@ def projection(self, opt_state):
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
+
         if ('subgradient' not in self.selection_variable and
-                    'scaling' not in self.selection_variable):  # have not conditioned on any thing else
-            new_state = opt_state.copy()  # not really necessary to copy
+            'scaling' not in self.selection_variable): # have not conditioned on any thing else
+            new_state = opt_state.copy() # not really necessary to copy
             new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
             new_state[self.subgrad_slice] = self.group_lasso_dual.bound_prox(opt_state[self.subgrad_slice])
         elif ('subgradient' not in self.selection_variable and
-                      'scaling' in self.selection_variable):  # conditioned on the initial scalings
-            # only the subgradient in opt_state
+              'scaling' in self.selection_variable): # conditioned on the initial scalings
+                                                     # only the subgradient in opt_state
             new_state = self.group_lasso_dual.bound_prox(opt_state)
         elif ('subgradient' in self.selection_variable and
-                      'scaling' not in self.selection_variable):  # conditioned on the subgradient
-            # only the scaling in opt_state
+              'scaling' not in self.selection_variable): # conditioned on the subgradient
+                                                         # only the scaling in opt_state
             new_state = np.maximum(opt_state, 0)
         else:
             new_state = opt_state
@@ -294,8 +290,8 @@ def condition_on_subgradient(self):
 
         opt_linear, opt_offset = self.opt_transform
 
-        new_offset = opt_linear[:, self.subgrad_slice].dot(self.observed_opt_state[self.subgrad_slice]) + opt_offset
-        new_linear = opt_linear[:, self.scaling_slice]
+        new_offset = opt_linear[:,self.subgrad_slice].dot(self.observed_opt_state[self.subgrad_slice]) + opt_offset
+        new_linear = opt_linear[:,self.scaling_slice]
 
         self.opt_transform = (new_linear, new_offset)
 
@@ -319,8 +315,8 @@ def condition_on_scalings(self):
 
         opt_linear, opt_offset = self.opt_transform
 
-        new_offset = opt_linear[:, self.scaling_slice].dot(self.observed_opt_state[self.scaling_slice]) + opt_offset
-        new_linear = opt_linear[:, self.subgrad_slice]
+        new_offset = opt_linear[:,self.scaling_slice].dot(self.observed_opt_state[self.scaling_slice]) + opt_offset
+        new_linear = opt_linear[:,self.subgrad_slice]
 
         self.opt_transform = (new_linear, new_offset)
 
@@ -335,24 +331,25 @@ def condition_on_scalings(self):
         self.num_opt_var = new_linear.shape[1]
 
 
-def restricted_Mest(Mest_loss, active, solve_args={'min_its': 50, 'tol': 1.e-10}):
+
+def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
+
     X, Y = Mest_loss.data
 
     if Mest_loss._is_transform:
-        raise NotImplementedError(
-            'to fit restricted model, X must be an ndarray or scipy.sparse; general transforms not implemented')
-    X_restricted = X[:, active]
+        raise NotImplementedError('to fit restricted model, X must be an ndarray or scipy.sparse; general transforms not implemented')
+    X_restricted = X[:,active]
     loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
     beta_E = loss_restricted.solve(**solve_args)
 
     return beta_E
 
-
 class M_estimator_split(M_estimator):
-    def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its': 50, 'tol': 1.e-10}):
+
+    def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10}):
         total_size = loss.saturated_loss.shape[0]
         self.randomization = split(loss.shape, subsample_size, total_size)
-        M_estimator.__init__(self, loss, epsilon, penalty, self.randomization, solve_args=solve_args)
+        M_estimator.__init__(self,loss, epsilon, penalty, self.randomization, solve_args=solve_args)
 
         total_size = loss.saturated_loss.shape[0]
         if subsample_size > total_size:
@@ -369,9 +366,9 @@ def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B
         # now we need to estimate covariance of
         # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)
 
-        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0]  # shorthand
+        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0] # shorthand
 
-        from .glm import pairs_bootstrap_score  # need to correct these imports!!!
+        from .glm import pairs_bootstrap_score # need to correct these imports!!!
 
         bootstrap_score = pairs_bootstrap_score(self.loss,
                                                 self._overall,
@@ -395,8 +392,8 @@ def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B
 
         def subsample_diff(m, n, indices):
             subsample = np.random.choice(indices, size=m, replace=False)
-            full_score = bootstrap_score(indices)  # a sum of n terms
-            randomized_score = bootstrap_score_split(subsample)  # a sum of m terms
+            full_score = bootstrap_score(indices) # a sum of n terms
+            randomized_score = bootstrap_score_split(subsample) # a sum of m terms
             return full_score - randomized_score * inv_frac
 
         first_moment = np.zeros(p)
@@ -415,4 +412,4 @@ def subsample_diff(m, n, indices):
         cov = second_moment - np.multiply.outer(first_moment,
                                                 first_moment)
 
-        self.randomization.set_covariance(cov)
\ No newline at end of file
+        self.randomization.set_covariance(cov)
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index a445d1bb5..4e57b7fd3 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -536,4 +536,4 @@ def standard_ci_sm(X, y, active, leftout_indices, alpha=0.1):
     logit = sm.Logit(y2, X2)
     result = logit.fit(disp=0)
     LU = result.conf_int(alpha=alpha)
-    return LU.T
+    return LU.T
\ No newline at end of file
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index 4d0a9a4ca..6732f06ae 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -61,9 +61,10 @@ def solve(self):
 
         beta_full = np.zeros(self.loss.shape)
         beta_full[active] = beta_active
+        self._beta_full = beta_full
 
         inactive_score = self.loss.smooth_objective(beta_full, 'grad')[inactive]
-        randomized_score = self.loss.smooth_objective(beta_full, 'grad')[inactive]
+        randomized_score = self.loss.smooth_objective(beta_full, 'grad')[inactive]+randomization.sample()
 
         # find the current active group, i.e.
         # subset of inactive that pass the threshold
@@ -74,8 +75,9 @@ def solve(self):
         self.boundary_signs = np.sign(randomized_score)[self.boundary]
         self.interior = ~self.boundary
 
-        self.observed_overshoot = self.boundary_signs * (inactive_score[self.boundary] - threshold[self.boundary])
-        self.observed_below_thresh = inactive_score[self.interior]
+        #self.observed_overshoot = self.boundary_signs * (inactive_score[self.boundary] - threshold[self.boundary])
+        self.observed_overshoot = np.abs(randomized_score[self.boundary]-np.multiply(self.boundary_signs, self.threshold[self.boundary]))
+        self.observed_below_thresh = randomized_score[self.interior]
         self.observed_score_state = inactive_score
 
         self.selection_variable = {'boundary_set': self.boundary,
@@ -83,14 +85,16 @@ def solve(self):
 
         self._solved = True
 
-        self.num_opt_var = self.boundary.shape[0]
+        #self.num_opt_var = self.boundary.shape[0]
 
     def setup_sampler(self):
 
         # must set observed_opt_state, opt_transform and score_transform
 
         p = self.boundary.shape[0]  # shorthand
+        self.num_opt_var = p
         self.observed_opt_state = np.zeros(p)
+        #self.feasible_point = self.observed_opt_state[self.boundary] = self.observed_overshoot
         self.observed_opt_state[self.boundary] = self.observed_overshoot
         self.observed_opt_state[self.interior] = self.observed_below_thresh
 
@@ -107,6 +111,9 @@ def setup_sampler(self):
 
         self._setup = True
 
+        ## permuted
+
+
     def projection(self, opt_state):
         """
         Full projection for Langevin.

From 849ffe0a112fdb6fedd296e9c668d98af9328808 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22ca02.SUNet>
Date: Tue, 10 Jan 2017 13:43:53 -0800
Subject: [PATCH 010/617] added test folder

---
 selection/approx_ci/tests/__init__.py         |   0
 selection/approx_ci/tests/api.py              |   0
 selection/approx_ci/tests/plot_intervals.py   |   0
 selection/approx_ci/tests/test_glm.py         | 119 +++++++++++++++++
 .../approx_ci/tests/test_threshold_score.py   | 120 ++++++++++++++++++
 5 files changed, 239 insertions(+)
 create mode 100644 selection/approx_ci/tests/__init__.py
 create mode 100644 selection/approx_ci/tests/api.py
 create mode 100644 selection/approx_ci/tests/plot_intervals.py
 create mode 100644 selection/approx_ci/tests/test_glm.py
 create mode 100644 selection/approx_ci/tests/test_threshold_score.py

diff --git a/selection/approx_ci/tests/__init__.py b/selection/approx_ci/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/approx_ci/tests/api.py b/selection/approx_ci/tests/api.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/approx_ci/tests/plot_intervals.py b/selection/approx_ci/tests/plot_intervals.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
new file mode 100644
index 000000000..8e3b0f6f4
--- /dev/null
+++ b/selection/approx_ci/tests/test_glm.py
@@ -0,0 +1,119 @@
+from __future__ import print_function
+import numpy as np
+import time
+import regreg.api as rr
+import selection.tests.reports as reports
+from selection.tests.instance import logistic_instance, gaussian_instance
+from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
+from selection.approx_ci.estimator_approx import M_estimator_approx
+
+from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+from selection.randomized.query import naive_confidence_intervals
+from selection.randomized.query import naive_pvalues
+
+
+@register_report(['cover', 'ci_length', 'truth', 'naive_cover', 'naive_pvalues'])
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+@wait_for_return_value()
+def test_approximate_ci(n=200,
+                        p=10,
+                        s=3,
+                        snr=5,
+                        rho=0.1,
+                        lam_frac = 1.,
+                        loss='gaussian',
+                        randomizer='gaussian'):
+
+    from selection.api import randomization
+
+    if loss == "gaussian":
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+        loss = rr.glm.gaussian(X, y)
+    elif loss == "logistic":
+        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
+        loss = rr.glm.logistic(X, y)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
+
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+    if randomizer=='gaussian':
+        randomization = randomization.isotropic_gaussian((p,), scale=1.)
+    elif randomizer=='laplace':
+        randomization = randomization.laplace((p,), scale=1.)
+
+    M_est = M_estimator_approx(loss, epsilon, penalty, randomization, randomizer)
+    M_est.solve_approx()
+    ci = approximate_conditional_density(M_est)
+    ci.solve_approx()
+
+    active = M_est._overall
+    active_set = np.asarray([i for i in range(p) if active[i]])
+
+    true_support = np.asarray([i for i in range(p) if i < s])
+
+    nactive = np.sum(active)
+
+    print("active set, true_support", active_set, true_support)
+
+    true_vec = beta[active]
+
+    print("true coefficients", true_vec)
+
+    if (set(active_set).intersection(set(true_support)) == set(true_support))== True:
+
+        ci_active = np.zeros((nactive, 2))
+        covered = np.zeros(nactive, np.bool)
+        ci_length = np.zeros(nactive)
+        pivots = np.zeros(nactive)
+
+        class target_class(object):
+            def __init__(self, target_cov):
+                self.target_cov = target_cov
+                self.shape = target_cov.shape
+        target = target_class(M_est.target_cov)
+
+        ci_naive = naive_confidence_intervals(target, M_est.target_observed)
+        naive_pvals = naive_pvalues(target, M_est.target_observed, true_vec)
+        naive_covered = np.zeros(nactive)
+        toc = time.time()
+
+        for j in range(nactive):
+            ci_active[j, :] = np.array(ci.approximate_ci(j))
+            if (ci_active[j, 0] <= true_vec[j]) and (ci_active[j,1] >= true_vec[j]):
+                covered[j] = 1
+            ci_length[j] = ci_active[j,1] - ci_active[j,0]
+            print(ci_active[j, :])
+            pivots[j] = ci.approximate_pvalue(j, true_vec[j])
+
+            # naive ci
+            if (ci_naive[j,0]<=true_vec[j]) and (ci_naive[j,1]>=true_vec[j]):
+                naive_covered[j]+=1
+
+        tic = time.time()
+        print('ci time now', tic - toc)
+
+        return covered, ci_length, pivots, naive_covered, naive_pvals
+    #else:
+    #    return 0
+
+def report(niter=50, **kwargs):
+
+    kwargs = {'s': 0, 'n': 200, 'p': 30, 'snr': 7, 'loss': 'gaussian', 'randomizer':'gaussian'}
+    split_report = reports.reports['test_approximate_ci']
+    screened_results = reports.collect_multiple_runs(split_report['test'],
+                                                     split_report['columns'],
+                                                     niter,
+                                                     reports.summarize_all,
+                                                     **kwargs)
+
+    fig = reports.pivot_plot_plus_naive(screened_results)
+    fig.savefig('approx_pivots_glm.pdf')
+
+
+if __name__=='__main__':
+    report()
\ No newline at end of file
diff --git a/selection/approx_ci/tests/test_threshold_score.py b/selection/approx_ci/tests/test_threshold_score.py
new file mode 100644
index 000000000..263c72a1e
--- /dev/null
+++ b/selection/approx_ci/tests/test_threshold_score.py
@@ -0,0 +1,120 @@
+from __future__ import print_function
+import numpy as np
+import time
+import regreg.api as rr
+import selection.tests.reports as reports
+from selection.tests.instance import logistic_instance, gaussian_instance
+from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
+from selection.approx_ci.estimator_approx import threshold_score_approx
+
+from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+from selection.randomized.query import naive_confidence_intervals
+from selection.randomized.query import naive_pvalues
+
+
+@register_report(['cover', 'ci_length', 'truth', 'naive_cover', 'naive_pvalues'])
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+@wait_for_return_value()
+def test_approximate_ci(n=200,
+                        p=50,
+                        s=0,
+                        snr=5,
+                        threshold = 3.,
+                        rho=0.1,
+                        lam_frac = 1.,
+                        loss='gaussian',
+                        randomizer='gaussian'):
+
+    from selection.api import randomization
+
+    if loss == "gaussian":
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
+        loss = rr.glm.gaussian(X, y)
+    elif loss == "logistic":
+        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
+        loss = rr.glm.logistic(X, y)
+
+    if randomizer=='gaussian':
+        randomization = randomization.isotropic_gaussian((p,), scale=1.)
+    elif randomizer=='laplace':
+        randomization = randomization.laplace((p,), scale=1.)
+
+    active_bool = np.zeros(p, np.bool)
+    #active_bool[range(3)] = 1
+    inactive_bool = ~active_bool
+
+    TS = threshold_score_approx(loss,
+                                threshold,
+                                randomization,
+                                active_bool,
+                                inactive_bool,
+                                randomizer)
+
+    TS.solve_approx()
+    active = TS._overall
+    print("nactive", active.sum())
+
+    ci = approximate_conditional_density(TS)
+    ci.solve_approx()
+
+    active_set = np.asarray([i for i in range(p) if active[i]])
+    true_support = np.asarray([i for i in range(p) if i < s])
+    nactive = np.sum(active)
+    print("active set, true_support", active_set, true_support)
+    true_vec = beta[active]
+    print("true coefficients", true_vec)
+
+    if (set(active_set).intersection(set(true_support)) == set(true_support))== True:
+
+        ci_active = np.zeros((nactive, 2))
+        covered = np.zeros(nactive, np.bool)
+        ci_length = np.zeros(nactive)
+        pivots = np.zeros(nactive)
+
+        class target_class(object):
+            def __init__(self, target_cov):
+                self.target_cov = target_cov
+                self.shape = target_cov.shape
+
+        target = target_class(TS.target_cov)
+        ci_naive = naive_confidence_intervals(target, TS.target_observed)
+        naive_pvals = naive_pvalues(target, TS.target_observed, true_vec)
+        naive_covered = np.zeros(nactive)
+        toc = time.time()
+
+        for j in range(nactive):
+            ci_active[j, :] = np.array(ci.approximate_ci(j))
+            if (ci_active[j, 0] <= true_vec[j]) and (ci_active[j,1] >= true_vec[j]):
+                covered[j] = 1
+            ci_length[j] = ci_active[j,1] - ci_active[j,0]
+            print(ci_active[j, :])
+            pivots[j] = ci.approximate_pvalue(j, true_vec[j])
+
+            # naive ci
+            if (ci_naive[j,0]<=true_vec[j]) and (ci_naive[j,1]>=true_vec[j]):
+                naive_covered[j]+=1
+
+        tic = time.time()
+        print('ci time now', tic - toc)
+
+        return covered, ci_length, pivots, naive_covered, naive_pvals
+    #else:
+    #    return 0
+
+def report(niter=200, **kwargs):
+
+    kwargs = {'s': 0, 'n': 200, 'p': 20, 'snr': 7, 'loss': 'gaussian', 'randomizer': 'gaussian'}
+    split_report = reports.reports['test_approximate_ci']
+    screened_results = reports.collect_multiple_runs(split_report['test'],
+                                                     split_report['columns'],
+                                                     niter,
+                                                     reports.summarize_all,
+                                                     **kwargs)
+
+    fig = reports.pivot_plot_plus_naive(screened_results)
+    fig.savefig('approx_pivots_threshold.pdf')
+
+
+if __name__=='__main__':
+    report()
\ No newline at end of file

From 50864ff628c318c1308f14f200ad26c60f1ab5b4 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22ca02.SUNet>
Date: Tue, 10 Jan 2017 14:48:33 -0800
Subject: [PATCH 011/617] added solver for approximate MLE

---
 selection/approx_ci/ci_via_approx_density.py | 68 +++++++++++++++++++-
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index 37130df51..04e0d33b5 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -1,3 +1,4 @@
+from math import log
 import numpy as np
 import regreg.api as rr
 from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
@@ -267,6 +268,8 @@ def __init__(self, sel_alg,
                                 quadratic=quadratic,
                                 coef=coef)
 
+        self.coefs[:] = 0.
+
         self.target_observed = self.sel_alg.target_observed
         self.nactive = self.target_observed.shape[0]
         self.target_cov = self.sel_alg.target_cov
@@ -311,15 +314,74 @@ def approx_conditional_prob(self, j):
     def area_normalized_density(self, j, mean):
 
         normalizer = 0.
+        grad_normalizer = 0.
         approx_nonnormalized = []
 
         for i in range(self.grid.shape[0]):
             approx_density = np.exp(-np.true_divide((self.grid[i] - mean) ** 2, 2 * self.norm[j])
                                     + (self.h_approx[j,:])[i])
             normalizer += approx_density
+            grad_normalizer +=  (-mean/self.norm[j] + self.grid[i]/self.norm[j])* approx_density
             approx_nonnormalized.append(approx_density)
 
-        return np.cumsum(np.array(approx_nonnormalized / normalizer))
+        return np.cumsum(np.array(approx_nonnormalized / normalizer)), normalizer, grad_normalizer
+
+    def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
+
+        param = self.apply_offset(param)
+
+        f =  (param**2)/(2*self.norm[j]) - (self.target_observed[j]*param)/self.norm[j] + \
+             log(self.area_normalized_density(j,param)[1])
+
+        g =  param/self.norm[j] - self.target_observed[j]/self.norm[j] + \
+             self.area_normalized_density[2]/self.area_normalized_density(j,param)[1]
+
+        if mode == 'func':
+            return self.scale(f)
+        elif mode == 'grad':
+            return self.scale(g)
+        elif mode == 'both':
+            return self.scale(f), self.scale(g)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+    def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
+
+        current = self.coefs[:]
+        current_value = np.inf
+
+        objective = lambda u: self.smooth_objective_MLE(u, j, 'func')
+        grad = lambda u: self.smooth_objective_MLE(u, j, 'grad')
+
+        for itercount in range(nstep):
+
+            newton_step = grad(current) * self.norm[j]
+
+            # make sure proposal is a descent
+            count = 0
+            while True:
+                proposal = current - step * newton_step
+                proposed_value = objective(proposal)
+
+                if proposed_value <= current_value:
+                    break
+                step *= 0.5
+
+            # stop if relative decrease is small
+
+            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+                current = proposal
+                current_value = proposed_value
+                break
+
+            current = proposal
+            current_value = proposed_value
+
+            if itercount % 4 == 0:
+                step *= 2
+
+        value = objective(current)
+        return current, value
 
     def approximate_ci(self, j):
 
@@ -329,7 +391,7 @@ def approximate_ci(self, j):
         area = np.zeros(param_grid.shape[0])
 
         for k in range(param_grid.shape[0]):
-            area_vec = self.area_normalized_density(j, param_grid[k])
+            area_vec = self.area_normalized_density(j, param_grid[k])[0]
             area[k] = area_vec[self.ind_obs[j]]
 
         region = param_grid[(area >= 0.05) & (area <= 0.95)]
@@ -340,7 +402,7 @@ def approximate_ci(self, j):
 
     def approximate_pvalue(self, j, param):
 
-        area_vec = self.area_normalized_density(j, param)
+        area_vec = self.area_normalized_density(j, param)[0]
         area = area_vec[self.ind_obs[j]]
 
         return 2*min(area, 1-area)
\ No newline at end of file

From e0daca541ff468e0ab5ce82eb455d6106dc3c5ed Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22ca02.SUNet>
Date: Tue, 10 Jan 2017 15:40:00 -0800
Subject: [PATCH 012/617] mle solver working

---
 selection/approx_ci/ci_via_approx_density.py |  12 +-
 selection/approx_ci/tests/test_glm.py        |   8 +-
 selection/approx_ci/tests/test_mle_approx.py |  69 +++
 selection/tests/flags.py                     |  10 +
 selection/tests/reports.py                   | 473 +++++++++++++++++++
 5 files changed, 563 insertions(+), 9 deletions(-)
 create mode 100644 selection/approx_ci/tests/test_mle_approx.py
 create mode 100644 selection/tests/flags.py
 create mode 100644 selection/tests/reports.py

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index 04e0d33b5..17155c524 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -330,11 +330,13 @@ def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
 
         param = self.apply_offset(param)
 
-        f =  (param**2)/(2*self.norm[j]) - (self.target_observed[j]*param)/self.norm[j] + \
-             log(self.area_normalized_density(j,param)[1])
+        approx_normalizer = self.area_normalized_density(j,param)
 
-        g =  param/self.norm[j] - self.target_observed[j]/self.norm[j] + \
-             self.area_normalized_density[2]/self.area_normalized_density(j,param)[1]
+        f = (param**2)/(2*self.norm[j]) - (self.target_observed[j]*param)/self.norm[j] + \
+            log(approx_normalizer[1])
+
+        g = param/self.norm[j] - self.target_observed[j]/self.norm[j] + \
+            approx_normalizer[2]/approx_normalizer[1]
 
         if mode == 'func':
             return self.scale(f)
@@ -347,7 +349,7 @@ def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
 
     def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
 
-        current = self.coefs[:]
+        current = self.target_observed[j]
         current_value = np.inf
 
         objective = lambda u: self.smooth_objective_MLE(u, j, 'func')
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index 8e3b0f6f4..8a007bd7b 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -16,7 +16,7 @@
 @register_report(['cover', 'ci_length', 'truth', 'naive_cover', 'naive_pvalues'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @wait_for_return_value()
-def test_approximate_ci(n=200,
+def test_approximate_ci(n=100,
                         p=10,
                         s=3,
                         snr=5,
@@ -67,6 +67,7 @@ def test_approximate_ci(n=200,
     if (set(active_set).intersection(set(true_support)) == set(true_support))== True:
 
         ci_active = np.zeros((nactive, 2))
+        #mle_active = np.zeros(nactive)
         covered = np.zeros(nactive, np.bool)
         ci_length = np.zeros(nactive)
         pivots = np.zeros(nactive)
@@ -84,6 +85,7 @@ def __init__(self, target_cov):
 
         for j in range(nactive):
             ci_active[j, :] = np.array(ci.approximate_ci(j))
+            #mle_active[j] = ci.approx_MLE_solver(j, nstep= 100)[0]
             if (ci_active[j, 0] <= true_vec[j]) and (ci_active[j,1] >= true_vec[j]):
                 covered[j] = 1
             ci_length[j] = ci_active[j,1] - ci_active[j,0]
@@ -96,10 +98,8 @@ def __init__(self, target_cov):
 
         tic = time.time()
         print('ci time now', tic - toc)
-
         return covered, ci_length, pivots, naive_covered, naive_pvals
-    #else:
-    #    return 0
+
 
 def report(niter=50, **kwargs):
 
diff --git a/selection/approx_ci/tests/test_mle_approx.py b/selection/approx_ci/tests/test_mle_approx.py
new file mode 100644
index 000000000..104f8d070
--- /dev/null
+++ b/selection/approx_ci/tests/test_mle_approx.py
@@ -0,0 +1,69 @@
+from __future__ import print_function
+import numpy as np
+import time
+import regreg.api as rr
+
+from selection.tests.instance import logistic_instance, gaussian_instance
+from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
+from selection.approx_ci.estimator_approx import M_estimator_approx
+
+def test_approximate_mle(n=100,
+                         p=10,
+                         s=3,
+                         snr=5,
+                         rho=0.1,
+                         lam_frac = 1.,
+                         loss='gaussian',
+                         randomizer='gaussian'):
+
+    from selection.api import randomization
+
+    if loss == "gaussian":
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+        loss = rr.glm.gaussian(X, y)
+    elif loss == "logistic":
+        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
+        loss = rr.glm.logistic(X, y)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
+
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+    if randomizer == 'gaussian':
+        randomization = randomization.isotropic_gaussian((p,), scale=1.)
+    elif randomizer == 'laplace':
+        randomization = randomization.laplace((p,), scale=1.)
+
+    M_est = M_estimator_approx(loss, epsilon, penalty, randomization, randomizer)
+    M_est.solve_approx()
+
+    inf = approximate_conditional_density(M_est)
+    inf.solve_approx()
+
+    active = M_est._overall
+    active_set = np.asarray([i for i in range(p) if active[i]])
+
+    true_support = np.asarray([i for i in range(p) if i < s])
+
+    nactive = np.sum(active)
+
+    print("active set, true_support", active_set, true_support)
+
+    true_vec = beta[active]
+
+    print("true coefficients", true_vec)
+
+    if (set(active_set).intersection(set(true_support)) == set(true_support)) == True:
+
+        mle_active = np.zeros(nactive)
+
+        for j in range(nactive):
+            mle_active[j] = inf.approx_MLE_solver(j, nstep=100)[0]
+
+        print("mle for target", mle_active)
+
+test_approximate_mle()
+
diff --git a/selection/tests/flags.py b/selection/tests/flags.py
new file mode 100644
index 000000000..0cbc0cb6f
--- /dev/null
+++ b/selection/tests/flags.py
@@ -0,0 +1,10 @@
+import os
+
+SMALL_SAMPLES = False
+SET_SEED = False
+
+if "USE_SMALL_SAMPLES" in os.environ:
+    SMALL_SAMPLES = True
+
+if "USE_TEST_SEED" in os.environ:
+    SET_SEED = True
\ No newline at end of file
diff --git a/selection/tests/reports.py b/selection/tests/reports.py
new file mode 100644
index 000000000..a3f727fd1
--- /dev/null
+++ b/selection/tests/reports.py
@@ -0,0 +1,473 @@
+"""
+special column names:
+mle -- pivot at unpenalized MLE
+truth -- pivot at true parameter
+pvalue -- tests of H0 for each variable
+count -- how many runs (including last one) until success
+active -- was variable truly active
+naive_pvalue --
+cover --
+naive_cover --
+"""
+from __future__ import division
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import probplot, uniform
+import statsmodels.api as sm
+
+def collect_multiple_runs(test_fn, columns, nrun, summary_fn, *args, **kwargs):
+    """
+    Assumes a wait_for_return_value test...
+    """
+    dfs = []
+    for i in range(nrun):
+        print(i)
+        count, result = test_fn(*args, **kwargs)
+        #print(result)
+        #print(len(np.atleast_1d(result[0])))
+        if hasattr(result, "__len__"):
+            df_i = pd.DataFrame(index=np.arange(len(np.atleast_1d(result[0]))),
+                                columns=columns + ['count', 'run'])
+        else:
+            df_i = pd.DataFrame(index=np.arange(1),
+                                columns=columns + ['count', 'run'])
+
+        df_i = pd.DataFrame(index=np.arange(len(np.atleast_1d(result[0]))),
+                            columns=columns + ['count', 'run'])
+
+        df_i.loc[:,'count'] = count
+        df_i.loc[:,'run'] = i
+
+        for col, v in zip(columns, result):
+            df_i.loc[:,col] = np.atleast_1d(v)
+
+        df_i['func'] = [str(test_fn)] * len(df_i)
+        dfs.append(df_i)
+        if summary_fn is not None:
+            summary_fn(pd.concat(dfs))
+    return pd.concat(dfs)
+
+def pvalue_plot(multiple_results, screening=False, fig=None, colors=['r','g']):
+    """
+    Extract pvalues and group by
+    null and alternative.
+    """
+
+    P0 = multiple_results['pvalue'][~multiple_results['active']]
+    P0 = P0[~pd.isnull(P0)]
+    PA = multiple_results['pvalue'][multiple_results['active']]
+    PA = PA[~pd.isnull(PA)]
+
+    if fig is None:
+        fig = plt.figure()
+    ax = fig.gca()
+
+    fig.suptitle('Null and alternative p-values')
+
+    grid = np.linspace(0, 1, 51)
+
+    if len(P0) > 0:
+        ecdf0 = sm.distributions.ECDF(P0)
+        F0 = ecdf0(grid)
+        ax.plot(grid, F0, '--o', c=colors[0], lw=2, label=r'$H_0$')
+    if len(PA) > 0:
+        ecdfA = sm.distributions.ECDF(PA)
+        FA = ecdfA(grid)
+        ax.plot(grid, FA, '--o', c=colors[1], lw=2, label=r'$H_A$')
+
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+    ax.legend(loc='lower right')
+
+    if screening:
+        screen = 1. / np.mean(multiple_results.loc[multiple_results.index == 0,'count'])
+        ax.set_title('Screening: %0.2f' % screen)
+    return fig
+
+def naive_pvalue_plot(multiple_results, screening=False, fig=None, colors=['r', 'g']):
+    """
+    Extract naive pvalues and group by
+    null and alternative.
+    """
+
+    P0 = multiple_results['naive_pvalue'][~multiple_results['active']]
+    P0 = P0[~pd.isnull(P0)]
+    PA = multiple_results['naive_pvalue'][multiple_results['active']]
+    PA = PA[~pd.isnull(PA)]
+
+    if fig is None:
+        fig = plt.figure()
+    ax = fig.gca()
+
+    fig.suptitle('Null and alternative p-values')
+
+    grid = np.linspace(0, 1, 51)
+
+    if len(P0) > 0:
+        ecdf0 = sm.distributions.ECDF(P0)
+        F0 = ecdf0(grid)
+        ax.plot(grid, F0, '--o', c=colors[0], lw=2, label=r'$H_0$ naive')
+    if len(PA) > 0:
+        ecdfA = sm.distributions.ECDF(PA)
+        FA = ecdfA(grid)
+        ax.plot(grid, FA, '--o', c=colors[1], lw=2, label=r'$H_A$ naive')
+
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+    ax.legend(loc='lower right')
+
+    if screening:
+        screen = 1. / np.mean(multiple_results.loc[multiple_results.index == 0,'count'])
+        ax.set_title('Screening: %0.2f' % screen)
+
+    return fig
+
+def split_pvalue_plot(multiple_results, screening=False, fig=None):
+    """
+    Compare pvalues where we have a split_pvalue
+    """
+
+    have_split = ~pd.isnull(multiple_results['split_pvalue'])
+    multiple_results = multiple_results.loc[have_split]
+
+    P0_s = multiple_results['split_pvalue'][~multiple_results['active']]
+    PA_s = multiple_results['split_pvalue'][multiple_results['active']]
+
+    # presumes we also have a pvalue
+    P0 = multiple_results['pvalue'][~multiple_results['active']]
+    PA = multiple_results['pvalue'][multiple_results['active']]
+
+    if fig is None:
+        fig = plt.figure()
+    ax = fig.gca()
+
+    fig.suptitle('Null and alternative p-values')
+
+    grid = np.linspace(0, 1, 51)
+
+    if len(P0) > 0:
+        ecdf0 = sm.distributions.ECDF(P0)
+        F0 = ecdf0(grid)
+        ax.plot(grid, F0, '--o', c='r', lw=2, label=r'$H_0$')
+    if len(PA) > 0:
+        ecdfA = sm.distributions.ECDF(PA)
+        FA = ecdfA(grid)
+        ax.plot(grid, FA, '--o', c='g', lw=2, label=r'$H_A$')
+
+    if len(P0_s) > 0:
+        ecdf0 = sm.distributions.ECDF(P0_s)
+        F0 = ecdf0(grid)
+        ax.plot(grid, F0, '-+', c='r', lw=2, label=r'$H_0$ split')
+    if len(PA) > 0:
+        ecdfA = sm.distributions.ECDF(PA_s)
+        FA = ecdfA(grid)
+        ax.plot(grid, FA, '-+', c='g', lw=2, label=r'$H_A$ split')
+
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+    ax.legend(loc='lower right')
+
+    if screening:
+        screen = 1. / np.mean(multiple_results.loc[multiple_results.index == 0,'count'])
+        ax.set_title('Screening: %0.2f' % screen)
+
+def pivot_plot_simple(multiple_results, coverage=True, color='b', label=None, fig=None):
+    """
+    Extract pivots at truth and mle.
+    """
+
+    if fig is None:
+        fig, _ = plt.subplots(nrows=1, ncols=2)
+        plot_pivots, _ = fig.axes
+        plot_pivots.set_title("CLT Pivots")
+    else:
+        _, plot_pivots = fig.axes
+        plot_pivots.set_title("Bootstrap Pivots")
+
+    if 'pivot' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['pivot'])
+    elif 'truth' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['truth'])
+
+    G = np.linspace(0, 1)
+    F_pivot = ecdf(G)
+    #print(color)
+    plot_pivots.plot(G, F_pivot, '-o', c=color, lw=2, label=label)
+    plot_pivots.plot([0, 1], [0, 1], 'k-', lw=2)
+    plot_pivots.set_xlim([0, 1])
+    plot_pivots.set_ylim([0, 1])
+
+    return fig
+
+
+def pivot_plot_2in1(multiple_results, coverage=True, color='b', label=None, fig=None):
+    """
+    Extract pivots at truth and mle.
+    """
+
+    if fig is None:
+        fig = plt.figure()
+    ax = fig.gca()
+
+    fig.suptitle('Plugin CLT and bootstrap pivots')
+
+    if 'pivot' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['pivot'])
+    elif 'truth' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['truth'])
+    elif 'pvalue' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['pvalue'])
+
+    G = np.linspace(0, 1)
+    F_pivot = ecdf(G)
+    #print(color)
+    ax.plot(G, F_pivot, '-o', c=color, lw=2, label=label)
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+    ax.set_xlim([0, 1])
+    ax.set_ylim([0, 1])
+    ax.legend(loc='lower right')
+
+    return fig
+
+
+def pivot_plot_2in1(multiple_results, coverage=True, color='b', label=None, fig=None):
+    """
+    Extract pivots at truth and mle.
+    """
+
+    if fig is None:
+        fig = plt.figure()
+    ax = fig.gca()
+
+    fig.suptitle('Plugin CLT and bootstrap pivots')
+
+    if 'pivot' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['pivot'])
+    elif 'truth' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['truth'])
+    elif 'pvalue' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['pvalue'])
+
+    G = np.linspace(0, 1)
+    F_pivot = ecdf(G)
+    #print(color)
+    ax.plot(G, F_pivot, '-o', c=color, lw=2, label=label)
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+    ax.set_xlim([0, 1])
+    ax.set_ylim([0, 1])
+    ax.legend(loc='lower right')
+
+    return fig
+
+
+def pivot_plot_plus_naive(multiple_results, coverage=True, color='b', label=None, fig=None):
+    """
+    Extract pivots at truth and mle.
+    """
+
+    if fig is None:
+        fig = plt.figure()
+    ax = fig.gca()
+
+    fig.suptitle('Selective and naive pivots')
+
+    if 'pivot' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['pivot'])
+    elif 'truth' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['truth'])
+    elif 'pvalue' in multiple_results.columns:
+        ecdf = sm.distributions.ECDF(multiple_results['pvalue'])
+
+    G = np.linspace(0, 1)
+    F_pivot = ecdf(G)
+    #print(color)
+    ax.plot(G, F_pivot, '-o', c=color, lw=2, label="Selective pivots")
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+
+    if 'naive_pvalues' in multiple_results.columns:
+        ecdf_naive = sm.distributions.ECDF(multiple_results['naive_pvalues'])
+    F_naive = ecdf_naive(G)
+    ax.plot(G, F_naive, '-o', c='r', lw=2, label="Naive pivots")
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+
+    ax.set_xlim([0, 1])
+    ax.set_ylim([0, 1])
+    ax.legend(loc='lower right')
+
+    return fig
+
+
+
+
+def pivot_plot(multiple_results, coverage=True, color='b', label=None, fig=None):
+    """
+    Extract pivots at truth and mle.
+    """
+
+    if fig is None:
+        fig, _ = plt.subplots(nrows=1, ncols=2)
+    plot_pvalues_mle, plot_pvalues_truth = fig.axes
+
+    ecdf_mle = sm.distributions.ECDF(multiple_results['mle'])
+    G = np.linspace(0, 1)
+    F_MLE = ecdf_mle(G)
+    print(color)
+    plot_pvalues_mle.plot(G, F_MLE, '-o', c=color, lw=2, label=label)
+    plot_pvalues_mle.plot([0, 1], [0, 1], 'k-', lw=2)
+    plot_pvalues_mle.set_title("Pivots at the unpenalized MLE")
+    plot_pvalues_mle.set_xlim([0, 1])
+    plot_pvalues_mle.set_ylim([0, 1])
+    plot_pvalues_mle.legend(loc='lower right')
+
+    ecdf_truth = sm.distributions.ECDF(multiple_results['truth'])
+    F_true = ecdf_truth(G)
+    plot_pvalues_truth.plot(G, F_true, '-o', c=color, lw=2, label=label)
+    plot_pvalues_truth.plot([0, 1], [0, 1], 'k-', lw=2)
+    plot_pvalues_truth.set_title("Pivots at the truth (by tilting)")
+    plot_pvalues_truth.set_xlim([0, 1])
+    plot_pvalues_truth.set_ylim([0, 1])
+    plot_pvalues_truth.legend(loc='lower right')
+
+    if coverage:
+        if 'naive_cover' in multiple_results.columns:
+            fig.suptitle('Coverage: %0.2f, Naive: %0.2f' % (np.mean(multiple_results['cover']),
+                                                            np.mean(multiple_results['naive_cover'])))
+        else:
+            fig.suptitle('Coverage: %0.2f' % np.mean(multiple_results['cover']))
+
+    return fig
+
+def boot_clt_plot(multiple_results, coverage=True, label=None, fig=None, active=True, inactive=True):
+    """
+    Extract pivots at truth and mle.
+    """
+
+    test = np.zeros_like(multiple_results['active'])
+    if active:
+        test += multiple_results['active']
+    if inactive:
+        test += ~multiple_results['active']
+    multiple_results = multiple_results[test]
+    print(test.sum(), test.shape)
+
+    if fig is None:
+        fig = plt.figure()
+    ax = fig.gca()
+
+    ecdf_clt = sm.distributions.ECDF(multiple_results['pivots_clt'])
+    G = np.linspace(0, 1)
+    F_MLE = ecdf_clt(G)
+    ax.plot(G, F_MLE, '-o', c='b', lw=2, label='CLT')
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+    ax.set_xlim([0, 1])
+    ax.set_ylim([0, 1])
+
+    ecdf_boot = sm.distributions.ECDF(multiple_results['pivots_boot'])
+    F_true = ecdf_boot(G)
+    ax.plot(G, F_true, '-o', c='g', lw=2, label='Bootstrap')
+    ax.plot([0, 1], [0, 1], 'k-', lw=2)
+    ax.set_xlim([0, 1])
+    ax.set_ylim([0, 1])
+    ax.legend(loc='lower right')
+    #plot_pvalues_boot.legend(loc='lower right')
+
+    if coverage:
+        if 'covered_split' in multiple_results.columns:
+            fig.suptitle('CLT Coverage: %0.2f, Boot: %0.2f, Naive: %0.2f, Split: %0.2f' % (np.mean(multiple_results['covered_clt']),
+                            np.mean(multiple_results['covered_boot']), np.mean(multiple_results['covered_naive']),
+                                                                      np.mean(multiple_results['covered_split'])))
+        else:
+
+            fig.suptitle('CLT Coverage: %0.2f, Boot: %0.2f, Naive: %0.2f' % (np.mean(multiple_results['covered_clt']),
+                                                                             np.mean(multiple_results['covered_boot']),
+                                                                             np.mean(multiple_results['covered_naive'])))
+    return fig
+
+def compute_pivots(multiple_results):
+    if 'truth' in multiple_results.columns:
+        pivots = multiple_results['truth']
+        return {'pivot (mean, SD, type I):': (np.mean(pivots), np.std(pivots), np.mean(pivots < 0.05))}
+    return {}
+
+def boot_clt_pivots(multiple_results):
+    pivot_summary = {}
+    if 'pivots_clt' in multiple_results.columns:
+        pivots_clt = multiple_results['pivots_clt']
+        pivot_summary['pivots_clt'] = {'CLT pivots (mean, SD, type I):': (np.mean(pivots_clt), np.std(pivots_clt), np.mean(pivots_clt < 0.05))}
+    if 'pivots_boot' in multiple_results.columns:
+        pivots_boot = multiple_results['pivots_boot']
+        pivot_summary['pivots_boot'] = {'Bootstrap pivots (mean, SD, type I):': (np.mean(pivots_boot), np.std(pivots_boot), np.mean(pivots_boot < 0.05))}
+    if 'pivot' in multiple_results.columns:
+        pivots = multiple_results['pivot']
+        pivot_summary['pivots'] = {'pivots (mean, SD, type I):': (np.mean(pivots), np.std(pivots), np.mean(pivots < 0.05))}
+    if 'naive_pvalues' in multiple_results.columns:
+        naive_pvalues = multiple_results['naive_pvalues']
+        pivot_summary['naive_pvalues'] = {'pivots (mean, SD, type I):': (np.mean(naive_pvalues), np.std(naive_pvalues), np.mean(naive_pvalues < 0.05))}
+
+    return pivot_summary
+
+def compute_coverage(multiple_results):
+    result = {}
+    if 'naive_cover' in multiple_results.columns:
+        result['naive coverage'] = np.mean(multiple_results['naive_cover'])
+    if 'cover' in multiple_results.columns:
+        result['selective coverage'] = np.mean(multiple_results['cover'])
+    return result
+
+def boot_clt_coverage(multiple_results): #
+    result = {}
+    if 'covered_naive' in multiple_results.columns:
+        result['naive coverage'] = np.mean(multiple_results['covered_naive'])
+    if 'covered_boot' in multiple_results.columns:
+        result['boot coverage'] = np.mean(multiple_results['covered_boot'])
+    if 'covered_clt' in multiple_results.columns:
+        result['clt coverage'] = np.mean(multiple_results['covered_clt'])
+    if 'covered_split' in multiple_results.columns:
+        result['split coverage'] = np.mean(multiple_results['covered_split'])
+    return result
+
+
+def compute_lengths(multiple_results):
+    result = {}
+    if 'ci_length_clt' in multiple_results.columns:
+        result['ci_length_clt'] = np.mean(multiple_results['ci_length_clt'])
+    if 'ci_length_boot' in multiple_results.columns:
+        result['ci_length_boot'] = np.mean(multiple_results['ci_length_boot'])
+    if 'ci_length_split' in multiple_results.columns:
+        result['ci_length_split'] = np.mean(multiple_results['ci_length_split'])
+    if 'ci_length_naive' in multiple_results.columns:
+        result['ci_length_naive'] = np.mean(multiple_results['ci_length_naive'])
+    if 'ci_length' in multiple_results.columns:
+        result['ci_length'] = np.mean(multiple_results['ci_length'])
+    return result
+
+def compute_length_frac(multiple_results):
+    result = {}
+    if 'ci_length_clt' and 'ci_length_split' in multiple_results.columns:
+        split = multiple_results['ci_length_split']
+        clt = multiple_results['ci_length_clt']
+        split = split[~np.isnan(clt)]
+        clt = clt[~np.isnan(clt)]
+        result['split/clt'] = np.median(np.divide(split, clt))
+    if 'ci_length_boot' and 'ci_length_split' in multiple_results.columns:
+        split = multiple_results['ci_length_split']
+        boot = multiple_results['ci_length_boot']
+        split = split[~np.isnan(boot)]
+        boot = clt[~np.isnan(boot)]
+        result['split/boot'] = np.median(np.divide(split, boot))
+    return result
+
+def compute_screening(multiple_results):
+    return {'screening:': 1. / np.mean(multiple_results.loc[multiple_results.index == 0,'count'])}
+
+def summarize_all(multiple_results):
+    result = {}
+    result.update(boot_clt_pivots(multiple_results))
+    result.update(compute_pivots(multiple_results))
+    result.update(boot_clt_coverage(multiple_results))
+    result.update(compute_coverage(multiple_results))
+    result.update(compute_screening(multiple_results))
+    result.update(compute_lengths(multiple_results))
+    result.update(compute_length_frac(multiple_results))
+    for i in result:
+        print(i, result[i])
+
+reports = {}
\ No newline at end of file

From d2d4660d7a78a6a6e43638a8c29b27a2ad164fbe Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51skuj.SUNet>
Date: Wed, 11 Jan 2017 19:06:49 -0800
Subject: [PATCH 013/617] updated greedy_step file

---
 selection/approx_ci/estimator_approx.py | 71 +++++++++++++++++++++++++
 selection/randomized/greedy_step.py     | 46 ++++++++++------
 2 files changed, 101 insertions(+), 16 deletions(-)

diff --git a/selection/approx_ci/estimator_approx.py b/selection/approx_ci/estimator_approx.py
index 5c63e8147..5d1624af4 100644
--- a/selection/approx_ci/estimator_approx.py
+++ b/selection/approx_ci/estimator_approx.py
@@ -3,6 +3,7 @@
 from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
 
 from selection.randomized.threshold_score import threshold_score
+from selection.randomized.greedy_step import greedy_score_step
 
 class M_estimator_approx(M_estimator):
 
@@ -112,3 +113,73 @@ def setup_map(self, j):
 
         self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
         self.offset_inactive = self.null_statistic[self.nactive:]
+
+class greedy_score_step_approx(greedy_score_step):
+
+    def __init__(self, loss,
+                 penalty,
+                 active_groups,
+                 inactive_groups,
+                 randomization,
+                 randomizer):
+
+        greedy_score_step.__init__(self, loss,
+                                 penalty,
+                                 active_groups,
+                                 inactive_groups,
+                                 randomization)
+        self.randomizer = randomizer
+
+
+    def solve_approx(self):
+
+        self.solve()
+        self.setup_sampler()
+        p = self.inactive.sum()
+        self.feasible_point = self.observed_scaling
+        self._overall = np.zeros(p, dtype=bool)
+        #print(self.selection_variable['variables'])
+        self._overall[self.selection_variable['variables']] = 1
+
+        self.observed_opt_state = np.hstack([self.observed_scaling, self.observed_subgradients])
+
+        _opt_linear_term = np.concatenate((np.atleast_2d(self.maximizing_subgrad).T, self.losing_padding_map), 1)
+        self._opt_linear_term = np.concatenate((_opt_linear_term[self._overall,:], _opt_linear_term[~self._overall,:]), 0)
+
+        self.opt_transform = (self._opt_linear_term, np.zeros(p))
+
+        (self._score_linear_term, _) = self.score_transform
+
+        self.inactive_lagrange = self.observed_scaling * self.penalty.weights[0] * np.ones(p-1)
+
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self.active,
+                                              inactive=~self.active)[0]
+
+        bootstrap_target, target_observed = pairs_bootstrap_glm(self.loss,
+                                                             self._overall,
+                                                             beta_full=None,
+                                                             inactive=None)
+
+        sampler = lambda : np.random.choice(n, size=(n,), replace=True)
+        self.target_cov, target_score_cov = bootstrap_cov(sampler, bootstrap_target, cross_terms=(bootstrap_score,))
+        self.score_target_cov = np.atleast_2d(target_score_cov).T
+        self.target_observed = target_observed
+
+        nactive = self._overall.sum()
+        self.nactive = nactive
+
+        self.B_active = self._opt_linear_term[:nactive, :nactive]
+        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
+
+
+    def setup_map(self, j):
+
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+
+        self.offset_active = self.null_statistic[:self.nactive]
+        self.offset_inactive = self.null_statistic[self.nactive:]
\ No newline at end of file
diff --git a/selection/randomized/greedy_step.py b/selection/randomized/greedy_step.py
index 9a974520d..ca2c924e2 100644
--- a/selection/randomized/greedy_step.py
+++ b/selection/randomized/greedy_step.py
@@ -1,16 +1,25 @@
 import numpy as np
 import regreg.api as rr
 
-from .M_estimator import M_estimator, restricted_Mest
-
-class greedy_score_step(M_estimator):
-
-    def __init__(self, loss, penalty, active_groups, inactive_groups, randomization, solve_args={'min_its':50, 'tol':1.e-10},
+from .query import query
+from .M_estimator import restricted_Mest
+
+class greedy_score_step(query):
+
+    def __init__(self,
+                 loss,
+                 penalty,
+                 active_groups,
+                 inactive_groups,
+                 randomization,
+                 solve_args={'min_its':50, 'tol':1.e-10},
                  beta_active=None):
         """
         penalty is a group_lasso object that assigns weights to groups
         """
 
+        query.__init__(self, randomization)
+
         (self.loss,
          self.penalty,
          self.active_groups,
@@ -24,7 +33,7 @@ def __init__(self, loss, penalty, active_groups, inactive_groups, randomization,
                               randomization,
                               solve_args,
                               beta_active)
-         
+
         self.active = np.zeros(self.loss.shape, np.bool)
         for i, g in enumerate(np.unique(self.penalty.groups)):
             if self.active_groups[i]:
@@ -58,10 +67,10 @@ def solve(self):
 
         if beta_active is None:
             beta_active = self.beta_active = restricted_Mest(self.loss, active, solve_args=solve_args)
-            
+
         beta_full = np.zeros(loss.shape)
         beta_full[active] = beta_active
-            
+
         # score at unpenalized M-estimator
 
         self.observed_score_state = - self.loss.smooth_objective(beta_full, 'grad')[inactive]
@@ -75,12 +84,12 @@ def solve(self):
         # assuming a.s. unique maximizing group here
 
         maximizing_group = np.unique(self.group_lasso_dual.groups)[np.argmax(terms)]
-        maximizing_subgrad = self.observed_score_state[self.group_lasso_dual.groups == maximizing_group]
+        maximizing_subgrad = randomized_score[self.group_lasso_dual.groups == maximizing_group]
         maximizing_subgrad /= np.linalg.norm(maximizing_subgrad) # this is now a unit vector
         maximizing_subgrad *= self.group_lasso_dual.weights[maximizing_group] # now a vector of length given by weight of maximizing group
         self.maximizing_subgrad = np.zeros(inactive.sum())
         self.maximizing_subgrad[self.group_lasso_dual.groups == maximizing_group] = maximizing_subgrad
-        self.observed_scaling = np.max(terms) / self.group_lasso_dual.weights[maximizing_group]
+        self.observed_scaling = np.max(terms) #/ self.group_lasso_dual.weights[maximizing_group]
 
         # which groups did not win
 
@@ -92,8 +101,8 @@ def solve(self):
         # (inactive_subgradients, scaling) are in this epigraph:
         losing_weights = dict([(g, self.group_lasso_dual.weights[g]) for g in self.group_lasso_dual.weights.keys() if g in losing_groups])
         self.group_lasso_dual_epigraph = rr.group_lasso_dual_epigraph(self.group_lasso_dual.groups[losing_set], weights=losing_weights)
-        
-        self.observed_subgradients = -randomized_score[losing_set]
+
+        self.observed_subgradients = randomized_score[losing_set]
         self.losing_padding_map = np.identity(losing_set.shape[0])[:,losing_set]
 
         # which variables are added to the model
@@ -101,9 +110,12 @@ def solve(self):
         winning_variables = self.group_lasso_dual.groups == maximizing_group
         padding_map = np.identity(self.active.shape[0])[:,self.inactive]
         self.maximizing_variables = padding_map.dot(winning_variables) > 0
-        
-        self.selection_variable = {'maximizing_group':maximizing_group, 
-                                   'maximizing_direction':self.maximizing_subgrad}
+
+        self.selection_variable = {'maximizing_group':maximizing_group,
+                                   'maximizing_direction':self.maximizing_subgrad,
+                                   'variables':self.maximizing_variables}
+
+        # need to implement Jacobian
 
     def setup_sampler(self):
 
@@ -120,10 +132,12 @@ def setup_sampler(self):
         self.opt_transform = (_opt_linear_term, np.zeros(_opt_linear_term.shape[0]))
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 
+        self._solved = True
+        self._setup = True
+
     def projection(self, opt_state):
         """
         Full projection for Langevin.
-
         The state here will be only the state of the optimization variables.
         """
         return self.group_lasso_dual_epigraph.cone_prox(opt_state)

From 7c9fad0f930cb1a47bc18bd17ccf35f7f71c5dcc Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51skuj.SUNet>
Date: Wed, 11 Jan 2017 20:19:11 -0800
Subject: [PATCH 014/617] made changes for fs

---
 selection/approx_ci/ci_approx_greedy_step.py | 183 +++++++++++++++++++
 1 file changed, 183 insertions(+)
 create mode 100644 selection/approx_ci/ci_approx_greedy_step.py

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
new file mode 100644
index 000000000..4d9372869
--- /dev/null
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -0,0 +1,183 @@
+import numpy as np
+import regreg.api as rr
+from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
+from scipy.stats import norm
+
+
+class neg_log_cube_probability_fs(rr.smooth_atom):
+    def __init__(self,
+                 q, #equals p - E in our case
+                 mu,
+                 randomization_scale = 1., #equals the randomization variance in our case
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+
+        self.randomization_scale = randomization_scale
+        self.q = q
+        self.mu = mu
+
+        rr.smooth_atom.__init__(self,
+                                (self.q,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=None,
+                                coef=coef)
+
+    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
+
+        arg = self.apply_offset(arg)
+
+        arg_u = (arg + self.mu)/self.randomization_scale
+        arg_l = (-arg + self.mu)/self.randomization_scale
+        prod_arg = np.exp(-(2. * self.mu * arg)/(self.randomization_scale**2))
+        neg_prod_arg = np.exp((2. * self.mu * arg)/(self.randomization_scale**2))
+        cube_prob = norm.cdf(arg_u) - norm.cdf(arg_l)
+        log_cube_prob = -np.log(cube_prob).sum()
+        threshold = 10 ** -10
+        indicator = np.zeros(self.q, bool)
+        indicator[(cube_prob > threshold)] = 1
+        positive_arg = np.zeros(self.q, bool)
+        positive_arg[(self.mu>0)] = 1
+        pos_index = np.logical_and(positive_arg, ~indicator)
+        neg_index = np.logical_and(~positive_arg, ~indicator)
+        log_cube_grad = np.zeros(self.q)
+        log_cube_grad[indicator] = (np.true_divide(norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
+                                        cube_prob[indicator]))/self.randomization_scale
+
+        log_cube_grad[pos_index] = ((1. + prod_arg[pos_index])/
+                                     ((prod_arg[pos_index]/arg_u[pos_index])+
+                                      (1./arg_l[pos_index])))/(self.randomization_scale **2)
+
+        log_cube_grad[neg_index] = ((arg_u[neg_index] -(arg_l[neg_index]*neg_prod_arg[neg_index]))
+                                    /(self.randomization_scale**2))/(1.- neg_prod_arg[neg_index])
+
+
+        if mode == 'func':
+            return self.scale(log_cube_prob)
+        elif mode == 'grad':
+            return self.scale(log_cube_grad)
+        elif mode == 'both':
+            return self.scale(log_cube_prob), self.scale(log_cube_grad)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+
+class approximate_conditional_prob(rr.smooth_atom):
+
+    def __init__(self,
+                 t, #point at which density is to computed
+                 map,
+                 coef = 1.,
+                 offset= None,
+                 quadratic= None):
+
+        self.t = t
+        self.map = map
+        self.q = map.p - map.nactive
+        self.inactive_conjugate = self.active_conjugate = map.randomization.CGF_conjugate
+
+        if self.active_conjugate is None:
+            raise ValueError(
+                'randomization must know its CGF_conjugate -- currently only isotropic_gaussian and laplace are implemented and are assumed to be randomization with IID coordinates')
+
+        #self.inactive_lagrange = self.map.inactive_lagrange
+
+        rr.smooth_atom.__init__(self,
+                                (map.nactive,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=self.map.feasible_point,
+                                coef=coef)
+
+        self.coefs[:] = map.feasible_point
+
+        self.nonnegative_barrier = nonnegative_softmax_scaled(self.map.nactive)
+
+
+    def sel_prob_smooth_objective(self, param, mode='both', check_feasibility=False):
+
+        param = self.apply_offset(param)
+
+        data = np.squeeze(self.t *  self.map.A)
+
+        offset_active = self.map.offset_active + data[:self.map.nactive]
+        offset_inactive = self.map.offset_inactive + data[self.map.nactive:]
+
+        active_conj_loss = rr.affine_smooth(self.active_conjugate,
+                                            rr.affine_transform(self.map.B_active, offset_active))
+
+        #if self.map.randomizer == 'laplace':
+        #    cube_obj = neg_log_cube_probability_laplace(self.q, self.inactive_lagrange, randomization_scale = 1.)
+        #elif self.map.randomizer == 'gaussian':
+        cube_loss = neg_log_cube_probability_fs(self.q, offset_inactive, randomization_scale = 1.)
+
+        total_loss = rr.smooth_sum([active_conj_loss,
+                                    cube_loss,
+                                    self.nonnegative_barrier])
+
+        if mode == 'func':
+            f = total_loss.smooth_objective(param, 'func')
+            return self.scale(f)
+        elif mode == 'grad':
+            g = total_loss.smooth_objective(param, 'grad')
+            return self.scale(g)
+        elif mode == 'both':
+            f, g = total_loss.smooth_objective(param, 'both')
+            return self.scale(f), self.scale(g)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+    def minimize2(self, step=1, nstep=30, tol=1.e-6):
+
+        current = self.coefs
+        current_value = np.inf
+
+        objective = lambda u: self.sel_prob_smooth_objective(u, 'func')
+        grad = lambda u: self.sel_prob_smooth_objective(u, 'grad')
+
+        for itercount in range(nstep):
+            newton_step = grad(current)
+
+            # make sure proposal is feasible
+
+            count = 0
+            while True:
+                count += 1
+                proposal = current - step * newton_step
+                #print("current proposal and grad", proposal, newton_step)
+                if np.all(proposal > 0):
+                    break
+                step *= 0.5
+                if count >= 40:
+                    #print(proposal)
+                    raise ValueError('not finding a feasible point')
+
+            # make sure proposal is a descent
+
+            count = 0
+            while True:
+                proposal = current - step * newton_step
+                proposed_value = objective(proposal)
+                #print(current_value, proposed_value, 'minimize')
+                if proposed_value <= current_value:
+                    break
+                step *= 0.5
+
+            # stop if relative decrease is small
+
+            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+                current = proposal
+                current_value = proposed_value
+                break
+
+            current = proposal
+            current_value = proposed_value
+
+            if itercount % 4 == 0:
+                step *= 2
+
+        # print('iter', itercount)
+        value = objective(current)
+
+        return current, value
\ No newline at end of file

From faac18dd10658ec91393e46931f5ad0659ce977f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c627.SUNet>
Date: Thu, 12 Jan 2017 09:50:20 -0800
Subject: [PATCH 015/617] corrected sign in gradient

---
 selection/approx_ci/ci_approx_greedy_step.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index 4d9372869..bcc685f88 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -32,8 +32,10 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         arg_l = (-arg + self.mu)/self.randomization_scale
         prod_arg = np.exp(-(2. * self.mu * arg)/(self.randomization_scale**2))
         neg_prod_arg = np.exp((2. * self.mu * arg)/(self.randomization_scale**2))
+
         cube_prob = norm.cdf(arg_u) - norm.cdf(arg_l)
         log_cube_prob = -np.log(cube_prob).sum()
+
         threshold = 10 ** -10
         indicator = np.zeros(self.q, bool)
         indicator[(cube_prob > threshold)] = 1
@@ -41,8 +43,9 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         positive_arg[(self.mu>0)] = 1
         pos_index = np.logical_and(positive_arg, ~indicator)
         neg_index = np.logical_and(~positive_arg, ~indicator)
+
         log_cube_grad = np.zeros(self.q)
-        log_cube_grad[indicator] = (np.true_divide(norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
+        log_cube_grad[indicator] = -(np.true_divide(norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
                                         cube_prob[indicator]))/self.randomization_scale
 
         log_cube_grad[pos_index] = ((1. + prod_arg[pos_index])/
@@ -50,7 +53,7 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
                                       (1./arg_l[pos_index])))/(self.randomization_scale **2)
 
         log_cube_grad[neg_index] = ((arg_u[neg_index] -(arg_l[neg_index]*neg_prod_arg[neg_index]))
-                                    /(self.randomization_scale**2))/(1.- neg_prod_arg[neg_index])
+                                    /(self.randomization_scale**2))/(1.+ neg_prod_arg[neg_index])
 
 
         if mode == 'func':

From c16abde414883285ccb2a0914431badd10b34134 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c627.SUNet>
Date: Thu, 12 Jan 2017 10:40:14 -0800
Subject: [PATCH 016/617] changes in fs

---
 selection/approx_ci/ci_approx_greedy_step.py  | 189 ++++++++++++++++--
 selection/approx_ci/tests/test_greedy_step.py |  86 ++++++++
 2 files changed, 260 insertions(+), 15 deletions(-)
 create mode 100644 selection/approx_ci/tests/test_greedy_step.py

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index bcc685f88..c3627001a 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -1,3 +1,4 @@
+from math import log
 import numpy as np
 import regreg.api as rr
 from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
@@ -28,10 +29,10 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
 
         arg = self.apply_offset(arg)
 
-        arg_u = (arg + self.mu)/self.randomization_scale
-        arg_l = (-arg + self.mu)/self.randomization_scale
-        prod_arg = np.exp(-(2. * self.mu * arg)/(self.randomization_scale**2))
-        neg_prod_arg = np.exp((2. * self.mu * arg)/(self.randomization_scale**2))
+        arg_u = ((arg *np.ones(self.q)) + self.mu) / self.randomization_scale
+        arg_l = (-(arg *np.ones(self.q)) + self.mu) / self.randomization_scale
+        prod_arg = np.exp(-(2. * self.mu * (arg *np.ones(self.q))) / (self.randomization_scale ** 2))
+        neg_prod_arg = np.exp((2. * self.mu * (arg *np.ones(self.q))) / (self.randomization_scale ** 2))
 
         cube_prob = norm.cdf(arg_u) - norm.cdf(arg_l)
         log_cube_prob = -np.log(cube_prob).sum()
@@ -40,21 +41,22 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         indicator = np.zeros(self.q, bool)
         indicator[(cube_prob > threshold)] = 1
         positive_arg = np.zeros(self.q, bool)
-        positive_arg[(self.mu>0)] = 1
+        positive_arg[(self.mu > 0)] = 1
         pos_index = np.logical_and(positive_arg, ~indicator)
         neg_index = np.logical_and(~positive_arg, ~indicator)
 
-        log_cube_grad = np.zeros(self.q)
-        log_cube_grad[indicator] = -(np.true_divide(norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
-                                        cube_prob[indicator]))/self.randomization_scale
+        log_cube_grad_vec = np.zeros(self.q)
+        log_cube_grad_vec[indicator] = -(np.true_divide(norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
+                                                    cube_prob[indicator])) / self.randomization_scale
 
-        log_cube_grad[pos_index] = ((1. + prod_arg[pos_index])/
-                                     ((prod_arg[pos_index]/arg_u[pos_index])+
-                                      (1./arg_l[pos_index])))/(self.randomization_scale **2)
+        log_cube_grad_vec[pos_index] = ((1. + prod_arg[pos_index]) /
+                                    ((prod_arg[pos_index] / arg_u[pos_index]) +
+                                     (1. / arg_l[pos_index]))) / (self.randomization_scale ** 2)
 
-        log_cube_grad[neg_index] = ((arg_u[neg_index] -(arg_l[neg_index]*neg_prod_arg[neg_index]))
-                                    /(self.randomization_scale**2))/(1.+ neg_prod_arg[neg_index])
+        log_cube_grad_vec[neg_index] = ((arg_u[neg_index] - (arg_l[neg_index] * neg_prod_arg[neg_index]))
+                                    / (self.randomization_scale ** 2)) / (1. + neg_prod_arg[neg_index])
 
+        log_cube_grad = log_cube_grad_vec.sum()
 
         if mode == 'func':
             return self.scale(log_cube_prob)
@@ -66,7 +68,7 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
             raise ValueError("mode incorrectly specified")
 
 
-class approximate_conditional_prob(rr.smooth_atom):
+class approximate_conditional_prob_fs(rr.smooth_atom):
 
     def __init__(self,
                  t, #point at which density is to computed
@@ -183,4 +185,161 @@ def minimize2(self, step=1, nstep=30, tol=1.e-6):
         # print('iter', itercount)
         value = objective(current)
 
-        return current, value
\ No newline at end of file
+        return current, value
+
+class approximate_conditional_density(rr.smooth_atom):
+
+    def __init__(self, sel_alg,
+                       coef=1.,
+                       offset=None,
+                       quadratic=None,
+                       nstep=10):
+
+        self.sel_alg = sel_alg
+
+        rr.smooth_atom.__init__(self,
+                                (1,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                coef=coef)
+
+        self.coefs[:] = 0.
+
+        self.target_observed = self.sel_alg.target_observed
+        self.nactive = self.target_observed.shape[0]
+        self.target_cov = self.sel_alg.target_cov
+
+    def solve_approx(self):
+
+        #defining the grid on which marginal conditional densities will be evaluated
+        grid_length = 201
+        self.grid = np.linspace(-5, 15, num=grid_length)
+        #self.grid = np.linspace(-5*np.amax(np.absolute(target_observed)), 5*np.amax(np.absolute(target_observed)), num=grid_length)
+        #s_obs = np.round(self.target_observed, decimals =1)
+
+        print("observed values", self.target_observed)
+        self.ind_obs = np.zeros(self.nactive, int)
+        self.norm = np.zeros(self.nactive)
+        self.h_approx = np.zeros((self.nactive, self.grid.shape[0]))
+
+        for j in range(self.nactive):
+            obs = self.target_observed[j]
+            self.norm[j] = self.target_cov[j,j]
+            if obs < self.grid[0]:
+                self.ind_obs[j] = 0
+            elif obs > np.max(self.grid):
+                self.ind_obs[j] = grid_length-1
+            else:
+                self.ind_obs[j] = np.argmin(np.abs(self.grid-obs))
+            self.h_approx[j, :] = self.approx_conditional_prob(j)
+
+
+    def approx_conditional_prob(self, j):
+        h_hat = []
+
+        self.sel_alg.setup_map(j)
+
+        for i in range(self.grid.shape[0]):
+
+            approx = approximate_conditional_prob_fs(self.grid[i], self.sel_alg)
+            h_hat.append(-(approx.minimize2(j, nstep=50)[::-1])[0])
+
+        return np.array(h_hat)
+
+    def area_normalized_density(self, j, mean):
+
+        normalizer = 0.
+        grad_normalizer = 0.
+        approx_nonnormalized = []
+
+        for i in range(self.grid.shape[0]):
+            approx_density = np.exp(-np.true_divide((self.grid[i] - mean) ** 2, 2 * self.norm[j])
+                                    + (self.h_approx[j,:])[i])
+            normalizer += approx_density
+            grad_normalizer +=  (-mean/self.norm[j] + self.grid[i]/self.norm[j])* approx_density
+            approx_nonnormalized.append(approx_density)
+
+        return np.cumsum(np.array(approx_nonnormalized / normalizer)), normalizer, grad_normalizer
+
+    def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
+
+        param = self.apply_offset(param)
+
+        approx_normalizer = self.area_normalized_density(j,param)
+
+        f = (param**2)/(2*self.norm[j]) - (self.target_observed[j]*param)/self.norm[j] + \
+            log(approx_normalizer[1])
+
+        g = param/self.norm[j] - self.target_observed[j]/self.norm[j] + \
+            approx_normalizer[2]/approx_normalizer[1]
+
+        if mode == 'func':
+            return self.scale(f)
+        elif mode == 'grad':
+            return self.scale(g)
+        elif mode == 'both':
+            return self.scale(f), self.scale(g)
+        else:
+            raise ValueError("mode incorrectly specified")
+
+    def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
+
+        current = self.target_observed[j]
+        current_value = np.inf
+
+        objective = lambda u: self.smooth_objective_MLE(u, j, 'func')
+        grad = lambda u: self.smooth_objective_MLE(u, j, 'grad')
+
+        for itercount in range(nstep):
+
+            newton_step = grad(current) * self.norm[j]
+
+            # make sure proposal is a descent
+            count = 0
+            while True:
+                proposal = current - step * newton_step
+                proposed_value = objective(proposal)
+
+                if proposed_value <= current_value:
+                    break
+                step *= 0.5
+
+            # stop if relative decrease is small
+
+            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+                current = proposal
+                current_value = proposed_value
+                break
+
+            current = proposal
+            current_value = proposed_value
+
+            if itercount % 4 == 0:
+                step *= 2
+
+        value = objective(current)
+        return current, value
+
+    def approximate_ci(self, j):
+
+        grid_length = 201
+        #param_grid = np.linspace(-5*np.amax(np.absolute(self.target_observed)), 5*np.amax(np.absolute(self.target_observed)), num=grid_length)
+        param_grid = np.linspace(-5, 15, num=201)
+        area = np.zeros(param_grid.shape[0])
+
+        for k in range(param_grid.shape[0]):
+            area_vec = self.area_normalized_density(j, param_grid[k])[0]
+            area[k] = area_vec[self.ind_obs[j]]
+
+        region = param_grid[(area >= 0.05) & (area <= 0.95)]
+        if region.size > 0:
+            return np.nanmin(region), np.nanmax(region)
+        else:
+            return 0, 0
+
+    def approximate_pvalue(self, j, param):
+
+        area_vec = self.area_normalized_density(j, param)[0]
+        area = area_vec[self.ind_obs[j]]
+
+        return 2*min(area, 1-area)
diff --git a/selection/approx_ci/tests/test_greedy_step.py b/selection/approx_ci/tests/test_greedy_step.py
new file mode 100644
index 000000000..9d50d3446
--- /dev/null
+++ b/selection/approx_ci/tests/test_greedy_step.py
@@ -0,0 +1,86 @@
+from __future__ import print_function
+import numpy as np
+import time
+import regreg.api as rr
+from selection.tests.instance import logistic_instance, gaussian_instance
+from selection.approx_ci.ci_approx_greedy_step import neg_log_cube_probability_fs, approximate_conditional_prob_fs, \
+    approximate_conditional_density
+from selection.approx_ci.estimator_approx import greedy_score_step_approx
+
+def test_approximate_ci(n=100,
+                        p=10,
+                        s=0,
+                        snr=5,
+                        rho=0.1,
+                        lam_frac = 1.,
+                        loss='gaussian',
+                        randomizer='gaussian'):
+
+    from selection.api import randomization
+
+    if loss == "gaussian":
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
+        loss = rr.glm.gaussian(X, y)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    elif loss == "logistic":
+        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
+        loss = rr.glm.logistic(X, y)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
+
+    if randomizer == 'gaussian':
+        randomization = randomization.isotropic_gaussian((p,), scale=1.)
+    elif randomizer == 'laplace':
+        randomization = randomization.laplace((p,), scale=1.)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    # active_bool = np.zeros(p, np.bool)
+    # active_bool[range(3)] = 1
+    # inactive_bool = ~active_bool
+
+    GS = greedy_score_step_approx(loss,
+                                  penalty,
+                                  np.zeros(p, dtype=bool),
+                                  np.ones(p, dtype=bool),
+                                  randomization,
+                                  randomizer)
+
+    GS.solve_approx()
+    active = GS._overall
+    print("nactive", active.sum())
+
+    ci = approximate_conditional_density(GS)
+    ci.solve_approx()
+
+    active_set = np.asarray([i for i in range(p) if active[i]])
+    true_support = np.asarray([i for i in range(p) if i < s])
+    nactive = np.sum(active)
+    print("active set, true_support", active_set, true_support)
+    true_vec = beta[active]
+    print("true coefficients", true_vec)
+
+    if (set(active_set).intersection(set(true_support)) == set(true_support)) == True:
+
+        ci_active = np.zeros((nactive, 2))
+        covered = np.zeros(nactive, np.bool)
+        ci_length = np.zeros(nactive)
+        pivots = np.zeros(nactive)
+
+        toc = time.time()
+
+        for j in range(nactive):
+            ci_active[j, :] = np.array(ci.approximate_ci(j))
+            if (ci_active[j, 0] <= true_vec[j]) and (ci_active[j, 1] >= true_vec[j]):
+                covered[j] = 1
+            ci_length[j] = ci_active[j, 1] - ci_active[j, 0]
+            # print(ci_active[j, :])
+            pivots[j] = ci.approximate_pvalue(j, true_vec[j])
+
+        print("confidence intervals", ci_active)
+        tic = time.time()
+        print('ci time now', tic - toc)
+
+
+test_approximate_ci()

From dfea0e847e18fdb2cae09560b9a7a64eb0bad504 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c16e.SUNet>
Date: Wed, 8 Feb 2017 11:27:16 -0800
Subject: [PATCH 017/617] added laplace cube loss for fs

---
 selection/approx_ci/ci_approx_greedy_step.py | 41 ++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index c3627001a..ed6f0c017 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -67,6 +67,47 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         else:
             raise ValueError("mode incorrectly specified")
 
+class neg_log_cube_probability_fs_laplace(rr.smooth_atom):
+
+    def __init__(self,
+                 q, #equals p - E in our case
+                 mu,
+                 randomization_scale = 1., #equals the randomization variance in our case
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+        self.randomization_scale = randomization_scale
+        self.q = q
+        self.mu = mu
+
+        rr.smooth_atom.__init__(self,
+                                (self.q,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=None,
+                                coef=coef)
+
+    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
+
+        arg = self.apply_offset(arg)
+
+        arg_u = ((arg * np.ones(self.q)) + self.mu) / self.randomization_scale
+        arg_l = (-(arg * np.ones(self.q)) + self.mu) / self.randomization_scale
+
+        ind_arg_1 = np.zeros(self.q, bool)
+        ind_arg_1[(arg_u < 0.)] = 1
+        ind_arg_2 = np.zeros(self.q, bool)
+        ind_arg_2[(arg_l > 0.)] = 1
+        ind_arg_3 = np.logical_and(~ind_arg_1, ~ind_arg_2)
+        cube_prob = np.zeros(self.q)
+        cube_prob[ind_arg_1] = np.exp(arg_u[ind_arg_1]) / 2. - np.exp(arg_l[ind_arg_1]) / 2.
+        cube_prob[ind_arg_2] = -np.exp(-arg_u[ind_arg_2]) / 2. + np.exp(-arg_l[ind_arg_2]) / 2.
+        cube_prob[ind_arg_3] = 1 - np.exp(-arg_u[ind_arg_3]) / 2. - np.exp(arg_l[ind_arg_3]) / 2.
+        neg_log_cube_prob = -np.log(cube_prob).sum()
+
+
+
+
 
 class approximate_conditional_prob_fs(rr.smooth_atom):
 

From 1c7e1bcb6161a460352dff9b9672f68b17e544ae Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c16e.SUNet>
Date: Wed, 8 Feb 2017 11:49:22 -0800
Subject: [PATCH 018/617] added gradient of cube loss for laplace

---
 selection/approx_ci/ci_approx_greedy_step.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index ed6f0c017..56ee898d9 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -93,6 +93,7 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
 
         arg_u = ((arg * np.ones(self.q)) + self.mu) / self.randomization_scale
         arg_l = (-(arg * np.ones(self.q)) + self.mu) / self.randomization_scale
+        prod_arg = -(2 * arg * np.ones(self.q)) / self.randomization_scale
 
         ind_arg_1 = np.zeros(self.q, bool)
         ind_arg_1[(arg_u < 0.)] = 1
@@ -103,10 +104,25 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         cube_prob[ind_arg_1] = np.exp(arg_u[ind_arg_1]) / 2. - np.exp(arg_l[ind_arg_1]) / 2.
         cube_prob[ind_arg_2] = -np.exp(-arg_u[ind_arg_2]) / 2. + np.exp(-arg_l[ind_arg_2]) / 2.
         cube_prob[ind_arg_3] = 1 - np.exp(-arg_u[ind_arg_3]) / 2. - np.exp(arg_l[ind_arg_3]) / 2.
-        neg_log_cube_prob = -np.log(cube_prob).sum()
+        log_cube_prob = -np.log(cube_prob).sum()
 
+        log_cube_grad_vec = np.zeros(self.q)
+        log_cube_grad_vec[~ind_arg_3] = np.true_divide(1.+ prod_arg[ind_arg_1],-1. + prod_arg[ind_arg_1])/\
+                                       self.randomization_scale
+        num_vec = 0.5* np.exp(-arg_u[ind_arg_3]) + 0.5* np.exp(arg_l[ind_arg_3])
+        den_vec = -1. + 0.5* np.exp(-arg_u[ind_arg_3]) + 0.5* np.exp(arg_l[ind_arg_3])
+        log_cube_grad_vec[ind_arg_3] = np.true_divide(num_vec, den_vec)/self.randomization_scale
 
+        log_cube_grad = log_cube_grad_vec.sum()
 
+        if mode == 'func':
+            return self.scale(log_cube_prob)
+        elif mode == 'grad':
+            return self.scale(log_cube_grad)
+        elif mode == 'both':
+            return self.scale(log_cube_prob), self.scale(log_cube_grad)
+        else:
+            raise ValueError("mode incorrectly specified")
 
 
 class approximate_conditional_prob_fs(rr.smooth_atom):

From dfd16f4089bcc3c428ffa0e87da784c483477bd5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51skuf.SUNet>
Date: Fri, 10 Feb 2017 01:15:39 -0800
Subject: [PATCH 019/617] plots for ci

---
 selection/approx_ci/ci_via_approx_density.py  |   7 +-
 .../approx_ci/tests/inference_hiv_data.py     | 225 ++++++++++++++++++
 2 files changed, 228 insertions(+), 4 deletions(-)
 create mode 100644 selection/approx_ci/tests/inference_hiv_data.py

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index 17155c524..29eaad4e0 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -277,8 +277,8 @@ def __init__(self, sel_alg,
     def solve_approx(self):
 
         #defining the grid on which marginal conditional densities will be evaluated
-        grid_length = 201
-        self.grid = np.linspace(-5, 15, num=grid_length)
+        grid_length = 1601
+        self.grid = np.linspace(-15,65, num=grid_length)
         #self.grid = np.linspace(-5*np.amax(np.absolute(target_observed)), 5*np.amax(np.absolute(target_observed)), num=grid_length)
         #s_obs = np.round(self.target_observed, decimals =1)
 
@@ -387,9 +387,8 @@ def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
 
     def approximate_ci(self, j):
 
-        grid_length = 201
         #param_grid = np.linspace(-5*np.amax(np.absolute(self.target_observed)), 5*np.amax(np.absolute(self.target_observed)), num=grid_length)
-        param_grid = np.linspace(-5, 15, num=201)
+        param_grid = np.linspace(-15, 65, num=1601)
         area = np.zeros(param_grid.shape[0])
 
         for k in range(param_grid.shape[0]):
diff --git a/selection/approx_ci/tests/inference_hiv_data.py b/selection/approx_ci/tests/inference_hiv_data.py
new file mode 100644
index 000000000..3eb9fd2ca
--- /dev/null
+++ b/selection/approx_ci/tests/inference_hiv_data.py
@@ -0,0 +1,225 @@
+from __future__ import print_function
+import os, numpy as np, pandas, statsmodels.api as sm
+import time
+import regreg.api as rr
+from selection.tests.instance import logistic_instance, gaussian_instance
+from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
+from selection.approx_ci.estimator_approx import M_estimator_approx
+
+from selection.randomized.query import naive_confidence_intervals
+from selection.api import randomization
+import matplotlib.pyplot as plt
+
+
+if not os.path.exists("NRTI_DATA.txt"):
+    NRTI = pandas.read_table("http://hivdb.stanford.edu/pages/published_analysis/genophenoPNAS2006/DATA/NRTI_DATA.txt", na_values="NA")
+else:
+    NRTI = pandas.read_table("NRTI_DATA.txt")
+
+NRTI_specific = []
+NRTI_muts = []
+mixtures = np.zeros(NRTI.shape[0])
+for i in range(1,241):
+    d = NRTI['P%d' % i]
+    for mut in np.unique(d):
+        if mut not in ['-','.'] and len(mut) == 1:
+            test = np.equal(d, mut)
+            if test.sum() > 10:
+                NRTI_specific.append(np.array(np.equal(d, mut)))
+                NRTI_muts.append("P%d%s" % (i,mut))
+
+NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts)
+
+X_NRTI = np.array(NRTI_specific, np.float)
+Y = NRTI['3TC'] # shorthand
+keep = ~np.isnan(Y).astype(np.bool)
+X_NRTI = X_NRTI[np.nonzero(keep)]; Y=Y[keep]
+Y = np.array(np.log(Y), np.float); Y -= Y.mean()
+X_NRTI -= X_NRTI.mean(0)[None, :]; X_NRTI /= X_NRTI.std(0)[None,:]
+X = X_NRTI # shorthand
+n, p = X.shape
+X /= np.sqrt(n)
+
+ols_fit = sm.OLS(Y, X).fit()
+sigma_3TC = np.linalg.norm(ols_fit.resid) / np.sqrt(n-p-1)
+OLS_3TC = ols_fit.params
+
+lam_frac = 1.
+loss = rr.glm.gaussian(X, Y)
+epsilon = 1. / np.sqrt(n)
+lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_3TC
+print(lam)
+
+W = np.ones(p) * lam
+penalty = rr.group_lasso(np.arange(p),weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+randomization = randomization.isotropic_gaussian((p,), scale=1.)
+
+M_est = M_estimator_approx(loss, epsilon, penalty, randomization, randomizer='gaussian')
+M_est.solve_approx()
+active = M_est._overall
+active_set = np.asarray([i for i in range(p) if active[i]])
+nactive = np.sum(active)
+
+active_set_0 = [NRTI_muts[i] for i in range(p) if active[i]]
+
+ci_active = np.zeros((nactive, 2))
+ci_length = np.zeros(nactive)
+mle_active = np.zeros((nactive,1))
+
+ci = approximate_conditional_density(M_est)
+ci.solve_approx()
+
+class target_class(object):
+    def __init__(self, target_cov):
+        self.target_cov = target_cov
+        self.shape = target_cov.shape
+
+
+target = target_class(M_est.target_cov)
+ci_naive = naive_confidence_intervals(target, M_est.target_observed)
+
+for j in range(nactive):
+    ci_active[j, :] = np.array(ci.approximate_ci(j))
+    ci_length[j] = ci_active[j,1] - ci_active[j,0]
+    mle_active[j, :] = ci.approx_MLE_solver(j, nstep=100)[0]
+
+unadjusted_mle = np.zeros((nactive,1))
+for j in range(nactive):
+    unadjusted_mle[j, :] = ci.target_observed[j]
+
+adjusted_intervals = np.hstack([mle_active, ci_active]).T
+unadjusted_intervals = np.hstack([unadjusted_mle, ci_naive]).T
+
+print("adjusted confidence", adjusted_intervals)
+print("naive confidence", unadjusted_intervals)
+
+intervals = np.vstack([unadjusted_intervals, adjusted_intervals])
+
+un_mean = intervals[0,:]
+un_lower_error = list(un_mean-intervals[1,:])
+un_upper_error = list(intervals[2,:]-un_mean)
+unStd = [un_lower_error, un_upper_error]
+
+ad_mean = intervals[3,:]
+ad_lower_error = list(ad_mean-intervals[4,:])
+ad_upper_error = list(intervals[5,:]- ad_mean)
+adStd = [ad_lower_error, ad_upper_error]
+
+
+N = len(un_mean)               # number of data entries
+ind = np.arange(N)              # the x locations for the groups
+width = 0.35                    # bar width
+
+width_0 = 0.10
+
+print('here')
+
+fig, ax = plt.subplots()
+
+rects1 = ax.bar(ind, un_mean,                  # data
+                width,                          # bar width
+                color='darkgrey',        # bar colour
+                yerr=unStd,  # data for error bars
+                error_kw={'ecolor':'dimgrey',    # error-bars colour
+                          'linewidth':2})       # error-bar width
+
+rects2 = ax.bar(ind + width, ad_mean,
+                width,
+                color='thistle',
+                yerr=adStd,
+                error_kw={'ecolor':'darkmagenta',
+                          'linewidth':2})
+
+axes = plt.gca()
+axes.set_ylim([-6, 60])             # y-axis bounds
+
+ax.set_ylabel('Credible')
+ax.set_title('selected variables'.format(active_set))
+ax.set_xticks(ind + 1.2* width)
+
+ax.set_xticklabels(active_set_0, rotation=90)
+
+
+#ax.set_xticklabels(('Coef1', 'Coef2', 'Coef3', 'Coef4', 'Coef5', 'Coef6'))
+
+ax.legend((rects1[0], rects2[0]), ('Unadjusted', 'Adjusted'), loc='upper left')
+
+print('here')
+
+#def autolabel(rects):
+#    for rect in rects:
+#        height = rect.get_height()
+#        ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
+#                '%d' % int(height),
+#                ha='center',            # vertical alignment
+#                va='bottom'             # horizontal alignment
+#                )
+
+#autolabel(rects1)
+#autolabel(rects2)
+
+#plt.show()                              # render the plot
+
+plt.savefig('/Users/snigdhapanigrahi/Documents/Research/Python_plots/icml_hiv_plots.pdf', bbox_inches='tight')
+
+##################################################
+ind = np.zeros(len(active_set), np.bool)
+
+index = active_set_0.index('P184V')
+ind[index] = 1
+
+active_set_0.pop(index)
+
+active_set = [i for i in range(p) if active[i]]
+active_set.pop(index)
+
+intervals = intervals[:, ~ind]
+
+
+un_mean = intervals[0,:]
+un_lower_error = list(un_mean-intervals[1,:])
+un_upper_error = list(intervals[2,:]-un_mean)
+unStd = [un_lower_error, un_upper_error]
+ad_mean = intervals[3,:]
+ad_lower_error = list(ad_mean-intervals[4,:])
+ad_upper_error = list(intervals[5,:]- ad_mean)
+adStd = [ad_lower_error, ad_upper_error]
+
+
+N = len(un_mean)               # number of data entries
+ind = np.arange(N)              # the x locations for the groups
+width = 0.35                    # bar width
+
+print('here')
+
+fig, ax = plt.subplots()
+
+rects1 = ax.bar(ind, un_mean,                  # data
+                width,                          # bar width
+                color='darkgrey',        # bar colour
+                yerr=unStd,  # data for error bars
+                error_kw={'ecolor':'dimgrey',    # error-bars colour
+                          'linewidth':2})       # error-bar width
+
+rects2 = ax.bar(ind + width, ad_mean,
+                width,
+                color='thistle',
+                yerr=adStd,
+                error_kw={'ecolor':'darkmagenta',
+                          'linewidth':2})
+
+axes = plt.gca()
+axes.set_ylim([-6, 12])             # y-axis bounds
+
+ax.set_ylabel('Credible')
+ax.set_title('selected variables'.format(active_set))
+ax.set_xticks(ind + 1.2* width)
+
+ax.set_xticklabels(active_set_0, rotation=90)
+
+ax.legend((rects1[0], rects2[0]), ('Unadjusted', 'Adjusted'), loc='upper right')
+
+print('here')
+
+plt.savefig('/Users/snigdhapanigrahi/Documents/Research/Python_plots/icml_hiv_plots_0.pdf', bbox_inches='tight')
\ No newline at end of file

From e5ff73d02f9ef5462e0e5d34492506a927a87df2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j11.SUNet>
Date: Fri, 10 Feb 2017 10:50:45 -0800
Subject: [PATCH 020/617] made small correction to gradient

---
 selection/approx_ci/ci_approx_greedy_step.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index 56ee898d9..50fa32b11 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -93,7 +93,7 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
 
         arg_u = ((arg * np.ones(self.q)) + self.mu) / self.randomization_scale
         arg_l = (-(arg * np.ones(self.q)) + self.mu) / self.randomization_scale
-        prod_arg = -(2 * arg * np.ones(self.q)) / self.randomization_scale
+        prod_arg = np.exp(-(2 * arg * np.ones(self.q))) / self.randomization_scale
 
         ind_arg_1 = np.zeros(self.q, bool)
         ind_arg_1[(arg_u < 0.)] = 1

From 4c80e97f947da44f50a1433c07c6be7e396a76f2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j11.SUNet>
Date: Fri, 10 Feb 2017 10:53:59 -0800
Subject: [PATCH 021/617] another correction to indexing in gradient cube loss

---
 selection/approx_ci/ci_approx_greedy_step.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index 50fa32b11..b97e46f40 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -107,7 +107,7 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
         log_cube_prob = -np.log(cube_prob).sum()
 
         log_cube_grad_vec = np.zeros(self.q)
-        log_cube_grad_vec[~ind_arg_3] = np.true_divide(1.+ prod_arg[ind_arg_1],-1. + prod_arg[ind_arg_1])/\
+        log_cube_grad_vec[~ind_arg_3] = np.true_divide(1.+ prod_arg[~ind_arg_3],-1. + prod_arg[~ind_arg_3])/\
                                        self.randomization_scale
         num_vec = 0.5* np.exp(-arg_u[ind_arg_3]) + 0.5* np.exp(arg_l[ind_arg_3])
         den_vec = -1. + 0.5* np.exp(-arg_u[ind_arg_3]) + 0.5* np.exp(arg_l[ind_arg_3])

From deee0fe49b333257da096a57abf367917b4d6314 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jul 2017 12:31:10 -0700
Subject: [PATCH 022/617] RM: removed SLOPE test, moved it to regreg

---
 selection/SLOPE/tests/slope_run_test.py | 120 ------------------------
 1 file changed, 120 deletions(-)
 delete mode 100644 selection/SLOPE/tests/slope_run_test.py

diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
deleted file mode 100644
index 904cc3758..000000000
--- a/selection/SLOPE/tests/slope_run_test.py
+++ /dev/null
@@ -1,120 +0,0 @@
-
-from rpy2.robjects.packages import importr
-from rpy2 import robjects
-
-SLOPE = importr('SLOPE')
-
-import rpy2.robjects.numpy2ri
-rpy2.robjects.numpy2ri.activate()
-
-import numpy as np
-import sys
-
-from regreg.atoms.slope import slope
-
-import regreg.api as rr
-
-
-def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian"):
-    robjects.r('''
-    slope = function(X, Y, W=NA, normalize, choice_weights, fdr = NA, sigma = 1){
-
-      if(is.na(sigma)){
-      sigma = NULL}
-
-      if(is.na(fdr)){
-      fdr = 0.1 }
-
-      if(normalize=="TRUE"){
-       normalize = TRUE} else{
-       normalize = FALSE}
-
-      if(is.na(W))
-      {
-        if(choice_weights == "gaussian"){
-        lambda = "gaussian"} else{
-        lambda = "bhq"}
-        result = SLOPE(X, Y, fdr = fdr, lambda = lambda, sigma = sigma, normalize = normalize)
-       } else{
-        result = SLOPE(X, Y, fdr = fdr, lambda = W, sigma = sigma, normalize = normalize)
-      }
-
-      return(list(beta = result$beta, E = result$selected, lambda_seq = result$lambda, sigma = result$sigma))
-    }''')
-
-    r_slope = robjects.globalenv['slope']
-
-    n, p = X.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_Y = robjects.r.matrix(Y, nrow=n, ncol=1)
-
-    if normalize is True:
-        r_normalize = robjects.StrVector('True')
-    else:
-        r_normalize = robjects.StrVector('False')
-
-    if W is None:
-        r_W = robjects.NA_Logical
-        if choice_weights is "gaussian":
-            r_choice_weights  = robjects.StrVector('gaussian')
-        elif choice_weights is "bhq":
-            r_choice_weights = robjects.StrVector('bhq')
-
-    else:
-        r_W = robjects.r.matrix(W, nrow=p, ncol=1)
-
-    result = r_slope(r_X, r_Y, r_W, r_normalize, r_choice_weights)
-
-    return result[0], result[1], result[2], result[3]
-
-def compare_outputs_prechosen_weights():
-
-    n, p = 500, 50
-
-    X = np.random.standard_normal((n, p))
-    Y = np.random.standard_normal(n)
-    W = np.linspace(3, 3.5, p)[::-1]
-
-    output_R = test_slope_R(X, Y, W)
-    r_beta = output_R[0]
-    print("output of est coefs R", r_beta)
-
-    pen = slope(W, lagrange=1.)
-    loss = rr.squared_error(X, Y)
-    problem = rr.simple_problem(loss, pen)
-    soln = problem.solve()
-    print("output of est coefs python", soln)
-
-    print("difference in solns", soln-r_beta)
-
-#compare_outputs_prechosen_weights()
-
-def compare_outputs_SLOPE_weights():
-
-    n, p = 500, 50
-
-    X = np.random.standard_normal((n, p))
-    #Y = np.random.standard_normal(n)
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n))
-    beta = np.zeros(p)
-    beta[:5] = 5.
-
-    Y = X.dot(beta) + np.random.standard_normal(n)
-
-    output_R = test_slope_R(X, Y, W = None, normalize = True, choice_weights = "bhq")
-    r_beta = output_R[0]
-    r_lambda_seq = output_R[2]
-    print("output of est coefs R", r_beta)
-
-    W = r_lambda_seq
-    pen = slope(W, lagrange=1.)
-
-    loss = rr.squared_error(X, Y)
-    problem = rr.simple_problem(loss, pen)
-    soln = problem.solve()
-    print("output of est coefs python", soln)
-
-    print("difference in solns", soln-r_beta)
-
-compare_outputs_SLOPE_weights()

From d203e9bfabcc81e1e5c5d28bec013e37c885620b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jul 2017 12:36:12 -0700
Subject: [PATCH 023/617] removing duplicated regreg code

---
 selection/SLOPE/slope.py | 221 +--------------------------------------
 1 file changed, 4 insertions(+), 217 deletions(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index 393a0eec7..3d9185511 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -1,219 +1,16 @@
 """
-Implementation of the SLOPE proximal operator of
-https://statweb.stanford.edu/~candes/papers/SLOPE.pdf
+Projection onto selected subgradients of SLOPE
 """
-from copy import copy
 import numpy as np
-import regreg.api as rr
-from scipy import sparse
 
 have_isotonic = False
 try:
     from sklearn.isotonic import IsotonicRegression
-
     have_isotonic = True
 except ImportError:
     raise ValueError('unable to import isotonic regression from sklearn')
 
-
-from regreg.atoms.seminorms import seminorm
-
-from regreg.atoms import _work_out_conjugate
-from regreg.objdoctemplates import objective_doc_templater
-from regreg.doctemplates import (doc_template_user, doc_template_provider)
-
-
-@objective_doc_templater()
-class slope(seminorm):
-    """
-    The SLOPE penalty
-    """
-
-    objective_template = r"""\sum_j \lambda_j |(var)s_{(j)}|"""
-
-    def __init__(self, weights, lagrange=None, bound=None,
-                 offset=None,
-                 quadratic=None,
-                 initial=None):
-
-        weights = np.array(weights, np.float)
-        if not np.allclose(-weights, np.sort(-weights)):
-            raise ValueError('weights should be non-increasing')
-        if not np.all(weights > 0):
-            raise ValueError('weights must be positive')
-
-        self.weights = weights
-        self._dummy = np.arange(self.weights.shape[0])
-
-        seminorm.__init__(self, self.weights.shape,
-                          lagrange=lagrange,
-                          bound=bound,
-                          quadratic=quadratic,
-                          initial=initial,
-                          offset=offset)
-
-    def seminorm(self, x, lagrange=None, check_feasibility=False):
-        lagrange = seminorm.seminorm(self, x,
-                                     check_feasibility=check_feasibility,
-                                     lagrange=lagrange)
-        xsort = np.sort(np.fabs(x))[::-1]
-        return lagrange * np.fabs(xsort * self.weights).sum()
-
-    @doc_template_user
-    def constraint(self, x, bound=None):
-        bound = seminorm.constraint(self, x, bound=bound)
-        inbox = self.seminorm(x, lagrange=1,
-                              check_feasibility=True) <= bound * (1 + self.tol)
-        if inbox:
-            return 0
-        else:
-            return np.inf
-
-    @doc_template_user
-    def lagrange_prox(self, x, lipschitz=1, lagrange=None):
-        lagrange = seminorm.lagrange_prox(self, x, lipschitz, lagrange)
-        return _basic_proximal_map(x, self.weights * lagrange / lipschitz)
-
-    @doc_template_user
-    def bound_prox(self, x, bound=None):
-        raise NotImplementedError
-
-    def __copy__(self):
-        return self.__class__(self.weights.copy(),
-                              quadratic=self.quadratic,
-                              initial=self.coefs,
-                              bound=copy(self.bound),
-                              lagrange=copy(self.lagrange),
-                              offset=copy(self.offset))
-
-    def __repr__(self):
-        if self.lagrange is not None:
-            if not self.quadratic.iszero:
-                return "%s(%s, lagrange=%f, offset=%s)" % \
-                       (self.__class__.__name__,
-                        str(self.weights),
-                        self.lagrange,
-                        str(self.offset))
-            else:
-                return "%s(%s, lagrange=%f, offset=%s, quadratic=%s)" % \
-                       (self.__class__.__name__,
-                        str(self.weights),
-                        self.lagrange,
-                        str(self.offset),
-                        self.quadratic)
-        else:
-            if not self.quadratic.iszero:
-                return "%s(%s, bound=%f, offset=%s)" % \
-                       (self.__class__.__name__,
-                        str(self.weights),
-                        self.bound,
-                        str(self.offset))
-            else:
-                return "%s(%s, bound=%f, offset=%s, quadratic=%s)" % \
-                       (self.__class__.__name__,
-                        str(self.weights),
-                        self.bound,
-                        str(self.offset),
-                        self.quadratic)
-
-    def get_conjugate(self):
-        if self.quadratic.coef == 0:
-
-            offset, outq = _work_out_conjugate(self.offset, self.quadratic)
-
-            if self.bound is None:
-                cls = conjugate_slope_pairs[self.__class__]
-                atom = cls(self.weights,
-                           bound=self.lagrange,
-                           lagrange=None,
-                           offset=offset,
-                           quadratic=outq)
-            else:
-                cls = conjugate_slope_pairs[self.__class__]
-                atom = cls(self.weights,
-                           lagrange=self.bound,
-                           bound=None,
-                           offset=offset,
-                           quadratic=outq)
-        else:
-            atom = smooth_conjugate(self)
-
-        self._conjugate = atom
-        self._conjugate._conjugate = self
-        return self._conjugate
-
-    conjugate = property(get_conjugate)
-
-
-@objective_doc_templater()
-class slope_conjugate(slope):
-    r"""
-    The dual of the slope penalty:math:`\ell_{\infty}` norm
-    """
-
-    objective_template = r"""P^*(%(var)s)"""
-
-    @doc_template_user
-    def seminorm(self, x, lagrange=None, check_feasibility=False):
-        lagrange = seminorm.seminorm(self, x,
-                                     check_feasibility=check_feasibility,
-                                     lagrange=lagrange)
-        xsort = np.sort(np.fabs(x))[::-1]
-        return lagrange * np.fabs(xsort / self.weights).max()
-
-    @doc_template_user
-    def constraint(self, x, bound=None):
-        bound = seminorm.constraint(self, x, bound=bound)
-        inbox = self.seminorm(x, lagrange=1,
-                              check_feasibility=True) <= bound * (1 + self.tol)
-        if inbox:
-            return 0
-        else:
-            return np.inf
-
-    @doc_template_user
-    def lagrange_prox(self, x, lipschitz=1, lagrange=None):
-        raise NotImplementedError
-
-    @doc_template_user
-    def bound_prox(self, x, bound=None):
-        bound = seminorm.bound_prox(self, x, bound)
-
-        # the proximal map is evaluated
-        # by working out the SLOPE proximal
-        # map and computing the residual
-
-        # might be better to just find the correct cython function instead
-        # of always constructing IsotonicRegression
-
-        _slope_prox = _basic_proximal_map(x, self.weights * bound)
-        return x - _slope_prox
-
-
-def _basic_proximal_map(center, weights):
-    """
-    Proximal algorithm described (2.3) of SLOPE
-    though sklearn isotonic has ordering reversed.
-    """
-
-    # the proximal map sorts the absolute values,
-    # runs isotonic regression with an offset
-    # reassigns the signs
-
-    # might be better to just find the correct cython function instead
-    # of always constructing IsotonicRegression
-
-    ir = IsotonicRegression()
-
-    _dummy = np.arange(center.shape[0])
-    _arg = np.argsort(np.fabs(center))
-    shifted_center = np.fabs(center)[_arg] - weights[::-1]
-    _prox_val = np.clip(ir.fit_transform(_dummy, shifted_center), 0, np.inf)
-    _return_val = np.zeros_like(_prox_val)
-    _return_val[_arg] = _prox_val
-    _return_val *= np.sign(center)
-    return _return_val
-
+from regreg.atoms.slope import _basic_proximal_map
 
 def _projection_onto_selected_subgradients(prox_arg,
                                            weights,
@@ -281,20 +78,10 @@ def _projection_onto_selected_subgradients(prox_arg,
             indices = np.array([j + cur_idx for j in range(len(cluster))])
             cluster_weights = weights[indices]
 
-            pen = slope(cluster_weights, lagrange=1.)
-            loss = rr.squared_error(np.identity(len(cluster)), prox_subarg)
-            slope_problem = rr.simple_problem(loss, pen)
-            result[indices] = prox_subarg - slope_problem.solve()
+            slope_prox = _basic_proximal_map(prox_subarg, cluster_weights)
+            result[indices] = prox_subarg - slope_prox
 
         cur_idx += len(cluster)
 
     return result
 
-"""
-For a cluster of size bigger than 1, we solve
-"""
-
-conjugate_slope_pairs = {}
-for n1, n2 in [(slope, slope_conjugate)]:
-    conjugate_slope_pairs[n1] = n2
-    conjugate_slope_pairs[n2] = n1

From e7c3d8b4d82da0774d9d36aaf1641d2eb2a6c156 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jul 2017 12:36:56 -0700
Subject: [PATCH 024/617] removing unnecessary imports

---
 selection/SLOPE/tests/projection_subgrad_test.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/selection/SLOPE/tests/projection_subgrad_test.py b/selection/SLOPE/tests/projection_subgrad_test.py
index 0d873511e..0f056e8ec 100644
--- a/selection/SLOPE/tests/projection_subgrad_test.py
+++ b/selection/SLOPE/tests/projection_subgrad_test.py
@@ -1,8 +1,4 @@
 import numpy as np
-import sys
-
-from regreg.atoms.slope import slope
-import regreg.api as rr
 
 from selection.SLOPE.slope import _projection_onto_selected_subgradients
 
@@ -22,4 +18,4 @@ def test_projection():
 
     print("projection", proj)
 
-test_projection()
\ No newline at end of file
+test_projection()

From 29dc482a9e0b3c1e60afda8bffce13d6b28bbc9e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jul 2017 15:05:38 -0700
Subject: [PATCH 025/617] started a randomized LASSO convenience class

---
 selection/randomized/api.py           |   1 -
 selection/randomized/convenience.py   | 698 ++++++++++++++++++++++++++
 selection/randomized/tests/test_cv.py |   7 +-
 3 files changed, 704 insertions(+), 2 deletions(-)
 create mode 100644 selection/randomized/convenience.py

diff --git a/selection/randomized/api.py b/selection/randomized/api.py
index ef64091fa..abdff4233 100644
--- a/selection/randomized/api.py
+++ b/selection/randomized/api.py
@@ -11,4 +11,3 @@
                   target as glm_target)
 
 from .randomization import randomization
-
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
new file mode 100644
index 000000000..debdd6f85
--- /dev/null
+++ b/selection/randomized/convenience.py
@@ -0,0 +1,698 @@
+"""
+Classes encapsulating some common workflows in randomized setting
+"""
+
+from copy import copy
+
+import numpy as np
+import regreg.api as rr
+
+from .glm import target as glm_target, glm_group_lasso
+from .randomization import randomization
+from .query import multiple_queries
+
+class lasso(object):
+
+    r"""
+    A class for the LASSO for post-selection inference.
+    The problem solved is
+
+    .. math::
+
+        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
+            \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
+
+    where $\lambda$ is `lam`, $\omega$ is a randomization generated below
+    and the last term is a small ridge penalty.
+
+    """
+
+
+    def __init__(self, 
+                 loglike, 
+                 feature_weights,
+                 ridge_term,
+                 randomization_scale,
+                 randomization='gaussian',
+                 covariance_estimator=None):
+        r"""
+
+        Create a new post-selection dor the LASSO problem
+
+        Parameters
+        ----------
+
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is brodcast to all features.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomization_scale : float
+            Scale for IID components of randomization.
+
+        randomization : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        covariance_estimator : callable (optional)
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+
+        self.loglike = loglike
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+        self.covariance_estimator = covariance_estimator
+
+        if randomization == 'laplace':
+            self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
+        elif randomization == 'gaussian':
+            self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
+        elif randomization == 'logistic':
+            self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
+
+        self.ridge_term = ridge_term
+
+    def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, marginalize_subgrad=True,
+            views=[]):
+        """
+        Fit the randomized lasso using `regreg`.
+
+        Parameters
+        ----------
+
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+
+        marginalize_subgrad : bool 
+             If True, marginalize over inactive coordinates of the subgradient.
+
+        views : list
+             Other views of the data, e.g. cross-validation.
+
+        Returns
+        -------
+
+        sign_beta : np.float
+             Support and non-zero signs of randomized lasso solution.
+             
+        """
+
+        self.penalty = rr.group_lasso(np.arange(p),
+                                      weights=dict(zip(np.arange(p), self.feature_weights)), lagrange=1.)
+        self._view = glm_group_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
+
+        views = copy(views); views.append(self._view)
+        self._queries = multiple_queries(views)
+        self._queries.solve()
+
+        if marginalize_subgrad == True:
+            self.view.decompose_subgradient(conditioning_groups=np.zeros(p, np.bool),
+                                            marginalizing_groups=np.ones(p, np.bool))
+        
+        self.signs = np.sign(self._view.initial_soln)
+        return self.signs
+
+    def summary(self, selected_features, 
+                null_values=None,
+                ndraw=10000, 
+                burnin=2000,
+                bootstrap=False):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        selected_features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+
+        """
+        if not hasattr(self, "_queries"):
+            raise ValueError('run `fit` method before producing summary.')
+        target_sampler, target_observed = glm_target(glm_loss,
+                                                     selected_features,
+                                                     self._queries,
+                                                     bootstrap=bootstrap)
+
+        full_sample = target_sampler.sample(ndraw=ndraw,
+                                            burnin=burnin,
+                                            keep_opt=True)
+        LU = target_sampler.confidence_intervals_translate(target_observed,
+                                                           sample=full_sample,
+                                                           level=0.9)
+        pvalues = target_sampler.coefficient_pvalues_translate(target_observed,
+                                                               parameter=np.zeros_like(true_vec),
+                                                               sample=full_sample)
+        return LU, pvalues
+
+    @staticmethod
+    def gaussian(X, 
+                 Y, 
+                 feature_weights, 
+                 sigma=1., 
+                 covariance_estimator=None,
+                 quadratic=None,
+                 ridge_term=None,
+                 randomization_scale=None,
+                 randomization='gaussian'):
+        r"""
+        Squared-error LASSO with feature weights.
+
+        Objective function (before randomization) is 
+        $$
+        \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\lambda$ is `feature_weights`. The ridge term
+        is determined by the Hessian and `np.std(Y)` by default,
+        as is the randomization scale.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        sigma : float (optional)
+            Noise variance. Set to 1 if `covariance_estimator` is not None.
+            This scales the loglikelihood by `sigma**(-2)`.
+
+        covariance_estimator : callable (optional)
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomization_scale : float
+            Scale for IID components of randomization.
+
+        randomization : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of some of the
+        rows and columns of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        if covariance_estimator is not None:
+            sigma = 1.
+        loglike = glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
+        n, p = X.shape
+
+        mean_diag = np.mean((X**2).sum(0))
+        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return lasso(loglike, np.asarray(feature_weights) / sigma**2,
+                     ridge_term, randomizer_scale, randomization=randomization)
+
+    @staticmethod
+    def logistic(X, 
+                 successes, 
+                 feature_weights, 
+                 trials=None, 
+                 covariance_estimator=None,
+                 quadratic=None):
+        r"""
+        Logistic LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell$ is the negative of the logistic 
+        log-likelihood (half the logistic deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y. 
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomization_scale : float
+            Scale for IID components of randomization.
+
+        randomization : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        loglike = glm.logistic(X, successes, trials=trials, quadratic=quadratic)
+
+        mean_diag = np.mean((X**2).sum(0))
+        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return lasso(loglike, feature_weights, ridge_term, 
+                     randomizer_scale,
+                     covariance_estimator=covariance_estimator)
+
+    @staticmethod
+    def coxph(X, 
+              times, 
+              status, 
+              feature_weights, 
+              covariance_estimator=None,
+              quadratic=None):
+        r"""
+        Cox proportional hazards LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell^{\text{Cox}}$ is the 
+        negative of the log of the Cox partial
+        likelihood and $\lambda$ is `feature_weights`.
+
+        Uses Efron's tie breaking method.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        times : ndarray
+            Shape (n,) -- the survival times.
+
+        status : ndarray
+            Shape (n,) -- the censoring status.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomization_scale : float
+            Scale for IID components of randomization.
+
+        randomization : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        loglike = coxph_obj(X, times, status, quadratic=quadratic)
+
+        # scale for randomization seems kind of meaningless here...
+
+        mean_diag = np.mean((X**2).sum(0))
+        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return lasso(loglike, feature_weights, ridge_term,
+                     randomizer_scale, randomization=randomization,
+                     covariance_estimator=covariance_estimator)
+
+    @staticmethod
+    def poisson(X, 
+                counts, 
+                feature_weights, 
+                covariance_estimator=None,
+                quadratic=None):
+        r"""
+        Poisson log-linear LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell^{\text{Poisson}}$ is the negative
+        of the log of the Poisson likelihood (half the deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        counts : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomization_scale : float
+            Scale for IID components of randomization.
+
+        randomization : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        loglike = glm.poisson(X, counts, quadratic=quadratic)
+
+        # scale for randomization seems kind of meaningless here...
+
+        mean_diag = np.mean((X**2).sum(0))
+        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return lasso(loglike, feature_weights, ridge_term,
+                     randomizer_scale, randomization=randomization,
+                     covariance_estimator=covariance_estimator)
+
+    @staticmethod
+    def sqrt_lasso(X, 
+                   Y, 
+                   feature_weights, 
+                   quadratic=None,
+                   covariance='parametric',
+                   sigma_estimate='truncated',
+                   solve_args={'min_its':200}):
+        r"""
+        Use sqrt-LASSO to choose variables.
+
+        Objective function is 
+        $$
+        \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\lambda$ is `feature_weights`. After solving the problem
+        treat as if `gaussian` with implied variance and choice of 
+        multiplier. See arxiv.org/abs/1504.08031 for details.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        covariance : str
+            One of 'parametric' or 'sandwich'. Method
+            used to estimate covariance for inference
+            in second stage.
+
+        sigma_estimate : str
+            One of 'truncated' or 'OLS'. Method
+            used to estimate $\sigma$ when using
+            parametric covariance.
+
+        solve_args : dict
+            Arguments passed to solver.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomization_scale : float
+            Scale for IID components of randomization.
+
+        randomization : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+        
+        Notes
+        -----
+
+        Unlike other variants of LASSO, this
+        solves the problem on construction as the active
+        set is needed to find equivalent gaussian LASSO.
+
+        Assumes parametric model is correct for inference,
+        i.e. does not accept a covariance estimator.
+
+        """
+
+        raise NotImplementedError
+
+        n, p = X.shape
+
+        # scale for randomization seems kind of meaningless here...
+
+        mean_diag = np.mean((X**2).sum(0))
+        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(p) * feature_weights
+        feature_weights = np.asarray(feature_weights)
+
+        # TODO: refits sqrt lasso more than once -- make an override for avoiding refitting?
+
+        soln = solve_sqrt_lasso(X, Y, weights=feature_weights, quadratic=quadratic, solve_args=solve_args)[0]
+
+        # find active set, and estimate of sigma
+
+        active = (soln != 0)
+        nactive = active.sum()
+
+        if nactive:
+
+            subgrad = np.sign(soln[active]) * feature_weights[active]
+            X_E = X[:,active]
+            X_Ei = np.linalg.pinv(X_E)
+            sigma_E = np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)
+            multiplier = np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))
+
+            # check truncation interval for sigma_E
+
+            # the KKT conditions imply an inequality like
+            # \hat{\sigma}_E \cdot LHS \leq RHS
+
+            penalized = feature_weights[active] != 0
+
+            if penalized.sum():
+                D_E = np.sign(soln[active][penalized]) # diagonal matrix of signs
+                LHS = D_E * np.linalg.solve(X_E.T.dot(X_E), subgrad)[penalized]
+                RHS = D_E * X_Ei.dot(Y)[penalized] 
+
+                ratio = RHS / LHS
+
+                group1 = LHS > 0
+                upper_bound = np.inf
+                if group1.sum():
+                    upper_bound = min(upper_bound, np.min(ratio[group1])) # necessarily these will have RHS > 0
+
+                group2 = (LHS <= 0) * (RHS <= 0) # we can ignore the other possibility since this gives a lower bound of 0
+                lower_bound = 0
+                if group2.sum():
+                    lower_bound = max(lower_bound, np.max(ratio[group2]))
+
+                upper_bound /= multiplier
+                lower_bound /= multiplier
+
+            else:
+                lower_bound = 0
+                upper_bound = np.inf
+
+            _sigma_estimator_args = (sigma_E, 
+                                     n - nactive,
+                                     lower_bound, 
+                                     upper_bound)
+
+            if sigma_estimate == 'truncated':
+                _sigma_hat = estimate_sigma(*_sigma_estimator_args)
+            elif sigma_estimate == 'OLS':
+                _sigma_hat = sigma_E
+            else:
+                raise ValueError('sigma_estimate must be one of ["truncated", "OLS"]')
+        else:
+            _sigma_hat = np.linalg.norm(Y) / np.sqrt(n)
+            multiplier = np.sqrt(n)
+            sigma_E = _sigma_hat
+
+        # XXX how should quadratic be changed?
+        # multiply everything by sigma_E?
+
+        if quadratic is not None:
+            qc = quadratic.collapsed()
+            qc.coef *= np.sqrt(n - nactive) / sigma_E
+            qc.linear_term *= np.sqrt(n - nactive) / sigma_E
+            quadratic = qc
+
+        loglike = glm.gaussian(X, Y, quadratic=quadratic)
+
+        if covariance == 'parametric':
+            cov_est = glm_parametric_estimator(loglike, dispersion=_sigma_hat)
+        elif covariance == 'sandwich':
+            cov_est = glm_sandwich_estimator(loglike, B=2000)
+        else:
+            raise ValueError('covariance must be one of ["parametric", "sandwich"]')
+
+        L = lasso(loglike, feature_weights * multiplier * sigma_E,
+                  covariance_estimator=cov_est,
+                  ignore_inactive_constraints=True)
+
+        # these arguments are reused for data carving
+
+        if nactive:
+            L._sigma_hat = _sigma_hat
+            L._sigma_estimator_args = _sigma_estimator_args
+            L._weight_multiplier = multiplier * sigma_E
+            L._multiplier = multiplier
+            L.lasso_solution = soln
+
+        return L
+
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 217c64b8a..b8d2f5c62 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -54,7 +54,12 @@ def test_cv(n=100, p=50, s=0, signal=3.5, K=5, rho=0.,
 
     epsilon = 1./np.sqrt(n)
     # view 1
-    cv = CV_view(glm_loss, loss_label=loss, lasso_randomization=randomizer, epsilon=epsilon,  scale1=scale1, scale2=scale2)
+    cv = CV_view(glm_loss, 
+                 loss_label=loss, 
+                 lasso_randomization=randomizer, 
+                 epsilon=epsilon, 
+                 scale1=scale1, 
+                 scale2=scale2)
     cv.solve(glmnet=True)
 
     lam = cv.lam_CVR

From 0a454058bad7a754c0f2de24f55fd61911b375b1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jul 2017 15:26:10 -0700
Subject: [PATCH 026/617] added test for randomized lasso -- seems unfeasible

---
 selection/randomized/M_estimator.py           |  2 +-
 selection/randomized/api.py                   |  2 +
 selection/randomized/convenience.py           | 81 ++++++++++---------
 .../randomized/tests/test_randomized_lasso.py | 18 +++++
 selection/tests/instance.py                   | 34 ++++----
 5 files changed, 81 insertions(+), 56 deletions(-)
 create mode 100644 selection/randomized/tests/test_randomized_lasso.py

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 7a292da44..cb841b27b 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -482,7 +482,7 @@ def construct_weights(self, full_state):
             raise ValueError('setup_sampler should be called before using this function')
 
         if self._marginalize_subgradient:
-            p = self.p
+            p = self.penalty.shape[0]
             weights = np.zeros(p)
 
             if self.inactive_marginal_groups.sum()>0:
diff --git a/selection/randomized/api.py b/selection/randomized/api.py
index abdff4233..1eea5850f 100644
--- a/selection/randomized/api.py
+++ b/selection/randomized/api.py
@@ -11,3 +11,5 @@
                   target as glm_target)
 
 from .randomization import randomization
+
+from .convenience import lasso
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index debdd6f85..3fa15ecc0 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -32,8 +32,8 @@ def __init__(self,
                  loglike, 
                  feature_weights,
                  ridge_term,
-                 randomization_scale,
-                 randomization='gaussian',
+                 randomizer_scale,
+                 randomizer='gaussian',
                  covariance_estimator=None):
         r"""
 
@@ -52,10 +52,10 @@ def __init__(self,
         ridge_term : float
             How big a ridge term to add?
 
-        randomization_scale : float
+        randomizer_scale : float
             Scale for IID components of randomization.
 
-        randomization : str
+        randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
 
         covariance_estimator : callable (optional)
@@ -76,21 +76,26 @@ def __init__(self,
         """
 
         self.loglike = loglike
+        self.nfeature = p = self.loglike.shape[0]
+
         if np.asarray(feature_weights).shape == ():
             feature_weights = np.ones(loglike.shape) * feature_weights
         self.feature_weights = np.asarray(feature_weights)
 
         self.covariance_estimator = covariance_estimator
 
-        if randomization == 'laplace':
+        if randomizer == 'laplace':
             self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
-        elif randomization == 'gaussian':
+        elif randomizer == 'gaussian':
             self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
-        elif randomization == 'logistic':
+        elif randomizer == 'logistic':
             self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
 
         self.ridge_term = ridge_term
 
+        self.penalty = rr.group_lasso(np.arange(p),
+                                      weights=dict(zip(np.arange(p), self.feature_weights)), lagrange=1.)
+
     def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, marginalize_subgrad=True,
             views=[]):
         """
@@ -116,16 +121,16 @@ def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, marginalize_subgrad=True,
              
         """
 
-        self.penalty = rr.group_lasso(np.arange(p),
-                                      weights=dict(zip(np.arange(p), self.feature_weights)), lagrange=1.)
+        p = self.nfeature
         self._view = glm_group_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
+        self._view.solve()
 
         views = copy(views); views.append(self._view)
         self._queries = multiple_queries(views)
         self._queries.solve()
 
         if marginalize_subgrad == True:
-            self.view.decompose_subgradient(conditioning_groups=np.zeros(p, np.bool),
+            self._view.decompose_subgradient(conditioning_groups=np.zeros(p, np.bool),
                                             marginalizing_groups=np.ones(p, np.bool))
         
         self.signs = np.sign(self._view.initial_soln)
@@ -150,7 +155,7 @@ def summary(self, selected_features,
         """
         if not hasattr(self, "_queries"):
             raise ValueError('run `fit` method before producing summary.')
-        target_sampler, target_observed = glm_target(glm_loss,
+        target_sampler, target_observed = glm_target(self.loglike,
                                                      selected_features,
                                                      self._queries,
                                                      bootstrap=bootstrap)
@@ -174,19 +179,19 @@ def gaussian(X,
                  covariance_estimator=None,
                  quadratic=None,
                  ridge_term=None,
-                 randomization_scale=None,
-                 randomization='gaussian'):
+                 randomizer_scale=None,
+                 randomizer='gaussian'):
         r"""
         Squared-error LASSO with feature weights.
 
-        Objective function (before randomization) is 
+        Objective function (before randomizer) is 
         $$
         \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
 
         where $\lambda$ is `feature_weights`. The ridge term
         is determined by the Hessian and `np.std(Y)` by default,
-        as is the randomization scale.
+        as is the randomizer scale.
 
         Parameters
         ----------
@@ -219,10 +224,10 @@ def gaussian(X,
         ridge_term : float
             How big a ridge term to add?
 
-        randomization_scale : float
-            Scale for IID components of randomization.
+        randomizer_scale : float
+            Scale for IID components of randomizer.
 
-        randomization : str
+        randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
 
         Returns
@@ -245,7 +250,7 @@ def gaussian(X,
         """
         if covariance_estimator is not None:
             sigma = 1.
-        loglike = glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
+        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
         n, p = X.shape
 
         mean_diag = np.mean((X**2).sum(0))
@@ -253,7 +258,7 @@ def gaussian(X,
         randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
 
         return lasso(loglike, np.asarray(feature_weights) / sigma**2,
-                     ridge_term, randomizer_scale, randomization=randomization)
+                     ridge_term, randomizer_scale, randomizer=randomizer)
 
     @staticmethod
     def logistic(X, 
@@ -307,10 +312,10 @@ def logistic(X,
         ridge_term : float
             How big a ridge term to add?
 
-        randomization_scale : float
-            Scale for IID components of randomization.
+        randomizer_scale : float
+            Scale for IID components of randomizer.
 
-        randomization : str
+        randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
 
         Returns
@@ -330,7 +335,7 @@ def logistic(X,
         the unpenalized estimator.
 
         """
-        loglike = glm.logistic(X, successes, trials=trials, quadratic=quadratic)
+        loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
 
         mean_diag = np.mean((X**2).sum(0))
         ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
@@ -391,10 +396,10 @@ def coxph(X,
         ridge_term : float
             How big a ridge term to add?
 
-        randomization_scale : float
-            Scale for IID components of randomization.
+        randomizer_scale : float
+            Scale for IID components of randomizer.
 
-        randomization : str
+        randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
 
         Returns
@@ -423,7 +428,7 @@ def coxph(X,
         randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
 
         return lasso(loglike, feature_weights, ridge_term,
-                     randomizer_scale, randomization=randomization,
+                     randomizer_scale, randomizer=randomizer,
                      covariance_estimator=covariance_estimator)
 
     @staticmethod
@@ -471,10 +476,10 @@ def poisson(X,
         ridge_term : float
             How big a ridge term to add?
 
-        randomization_scale : float
-            Scale for IID components of randomization.
+        randomizer_scale : float
+            Scale for IID components of randomizer.
 
-        randomization : str
+        randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
 
         Returns
@@ -494,16 +499,16 @@ def poisson(X,
         the unpenalized estimator.
 
         """
-        loglike = glm.poisson(X, counts, quadratic=quadratic)
+        loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
 
-        # scale for randomization seems kind of meaningless here...
+        # scale for randomizer seems kind of meaningless here...
 
         mean_diag = np.mean((X**2).sum(0))
         ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
         randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
 
         return lasso(loglike, feature_weights, ridge_term,
-                     randomizer_scale, randomization=randomization,
+                     randomizer_scale, randomizer=randomizer,
                      covariance_estimator=covariance_estimator)
 
     @staticmethod
@@ -562,10 +567,10 @@ def sqrt_lasso(X,
         ridge_term : float
             How big a ridge term to add?
 
-        randomization_scale : float
-            Scale for IID components of randomization.
+        randomizer_scale : float
+            Scale for IID components of randomizer.
 
-        randomization : str
+        randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
 
         Returns
@@ -672,7 +677,7 @@ def sqrt_lasso(X,
             qc.linear_term *= np.sqrt(n - nactive) / sigma_E
             quadratic = qc
 
-        loglike = glm.gaussian(X, Y, quadratic=quadratic)
+        loglike = rr.glm.gaussian(X, Y, quadratic=quadratic)
 
         if covariance == 'parametric':
             cov_est = glm_parametric_estimator(loglike, dispersion=_sigma_hat)
diff --git a/selection/randomized/tests/test_randomized_lasso.py b/selection/randomized/tests/test_randomized_lasso.py
new file mode 100644
index 000000000..15dff77a2
--- /dev/null
+++ b/selection/randomized/tests/test_randomized_lasso.py
@@ -0,0 +1,18 @@
+from __future__ import print_function
+import numpy as np
+
+from selection.randomized.api import lasso as randomized_lasso
+from selection.tests.instance import gaussian_instance
+
+def test_randomized_lasso(n=100, p=200, s=10, signal=7, rho=0):
+
+    X, Y, beta, active, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho)
+
+    L = randomized_lasso.gaussian(X, Y, 4.5 * sigma * np.ones(p))
+    signs = L.fit()
+
+    print(L.summary(signs != 0))
+
+
+if __name__ == "__main__":
+    test_randomized_lasso()
diff --git a/selection/tests/instance.py b/selection/tests/instance.py
index ed70f04e4..eb291763d 100644
--- a/selection/tests/instance.py
+++ b/selection/tests/instance.py
@@ -3,21 +3,21 @@
 
 from scipy.stats import t as tdist
 
-# def design(n, p, rho, equi_correlated):
-#     if equi_correlated:
-#         X = (np.sqrt(1 - rho) * np.random.standard_normal((n, p)) +
-#              np.sqrt(rho) * np.random.standard_normal(n)[:, None])
-#     else:
-#         def AR1(rho, p):
-#             idx = np.arange(p)
-#             cov = rho ** np.abs(np.subtract.outer(idx, idx))
-#             return cov, np.linalg.cholesky(cov)
-
-#         sigmaX, cholX = AR1(rho=rho, p=p)
-#         X = np.random.standard_normal((n, p)).dot(cholX.T)
-#         # X = np.random.multivariate_normal(mean=np.zeros(p), cov = sigmaX, size = (n,))
-#         # print(X.shape)
-#     return X
+def _equicor_design(n, p, rho, equi_correlated):
+    if equi_correlated:
+        X = (np.sqrt(1 - rho) * np.random.standard_normal((n, p)) +
+             np.sqrt(rho) * np.random.standard_normal(n)[:, None])
+    else:
+        def AR1(rho, p):
+            idx = np.arange(p)
+            cov = rho ** np.abs(np.subtract.outer(idx, idx))
+            return cov, np.linalg.cholesky(cov)
+
+        sigmaX, cholX = AR1(rho=rho, p=p)
+        X = np.random.standard_normal((n, p)).dot(cholX.T)
+        # X = np.random.multivariate_normal(mean=np.zeros(p), cov = sigmaX, size = (n,))
+        # print(X.shape)
+    return X
 
 def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
                       random_signs=False, df=np.inf,
@@ -78,8 +78,8 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
     sigma : float
         Noise level.
     """
-    X=design(n,p, rho, equi_correlated)
 
+    X = _equicor_design(n,p, rho, equi_correlated)
 
     if center:
         X -= X.mean(0)[None, :]
@@ -205,7 +205,7 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
 
     """
 
-    X= design(n,p, rho, equi_correlated)
+    X = _equicor_design(n,p, rho, equi_correlated)
 
     if center:
         X -= X.mean(0)[None,:]

From 964ae931e67937674fcbfc8d91d05eaa6700193f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 24 Jul 2017 15:32:48 -0700
Subject: [PATCH 027/617] BF: undefined variable

---
 selection/randomized/convenience.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 3fa15ecc0..08a018cd1 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -137,7 +137,7 @@ def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, marginalize_subgrad=True,
         return self.signs
 
     def summary(self, selected_features, 
-                null_values=None,
+                null_value=None,
                 ndraw=10000, 
                 burnin=2000,
                 bootstrap=False):
@@ -152,6 +152,18 @@ def summary(self, selected_features,
             Binary encoding of which features to use in final
             model and targets.
 
+        null_value : np.array
+            Hypothesized value for null -- defaults to 0.
+
+        ndraw : int (optional)
+            Defaults to 1000.
+
+        burnin : int (optional)
+            Defaults to 1000.
+
+        bootstrap : bool
+            Use wild bootstrap instead of Gaussian plugin.
+
         """
         if not hasattr(self, "_queries"):
             raise ValueError('run `fit` method before producing summary.')
@@ -166,8 +178,11 @@ def summary(self, selected_features,
         LU = target_sampler.confidence_intervals_translate(target_observed,
                                                            sample=full_sample,
                                                            level=0.9)
+
+        if null_value is None:
+            null_value = np.zeros(self.loglike.shape[0])
         pvalues = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                               parameter=np.zeros_like(true_vec),
+                                                               parameter=null_value,
                                                                sample=full_sample)
         return LU, pvalues
 

From 3b82c0ddad0b68187c57bfd1e1bd552a7befd884 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 2 Aug 2017 16:02:04 -0700
Subject: [PATCH 028/617] option of using tilt or translate, exception raised
 in langevin, using AR in the randomized lasso test

---
 selection/randomized/convenience.py           | 52 +++++++++++++++----
 .../randomized/tests/test_randomized_lasso.py | 12 +++--
 selection/sampling/langevin.py                |  6 ++-
 3 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 08a018cd1..c3fd4004f 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -138,8 +138,11 @@ def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, marginalize_subgrad=True,
 
     def summary(self, selected_features, 
                 null_value=None,
+                level=0.9,
                 ndraw=10000, 
                 burnin=2000,
+                reference_type='translate',
+                compute_intervals=False,
                 bootstrap=False):
         """
         Produce p-values and confidence intervals for targets
@@ -155,36 +158,63 @@ def summary(self, selected_features,
         null_value : np.array
             Hypothesized value for null -- defaults to 0.
 
+        level : float
+            Confidence level.
+
         ndraw : int (optional)
             Defaults to 1000.
 
         burnin : int (optional)
             Defaults to 1000.
 
+        reference_type : str
+            One of ['translate', 'tilt']. 
+
         bootstrap : bool
             Use wild bootstrap instead of Gaussian plugin.
 
         """
         if not hasattr(self, "_queries"):
             raise ValueError('run `fit` method before producing summary.')
+
+        if reference_type not in ['translate', 'tilt']:
+            raise ValueError('reference_type must be one of ["translate", "tilt"]')
+
         target_sampler, target_observed = glm_target(self.loglike,
                                                      selected_features,
                                                      self._queries,
                                                      bootstrap=bootstrap)
 
-        full_sample = target_sampler.sample(ndraw=ndraw,
-                                            burnin=burnin,
-                                            keep_opt=True)
-        LU = target_sampler.confidence_intervals_translate(target_observed,
-                                                           sample=full_sample,
-                                                           level=0.9)
-
         if null_value is None:
             null_value = np.zeros(self.loglike.shape[0])
-        pvalues = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                               parameter=null_value,
-                                                               sample=full_sample)
-        return LU, pvalues
+
+        intervals = None
+        if reference_type == 'translate':
+            full_sample = target_sampler.sample(ndraw=ndraw,
+                                                burnin=burnin,
+                                                keep_opt=True)
+
+            pvalues = target_sampler.coefficient_pvalues_translate(target_observed,
+                                                                   parameter=null_value,
+                                                                   sample=full_sample)
+
+            if compute_intervals:
+                intervals = target_sampler.confidence_intervals_translate(target_observed,
+                                                                          sample=full_sample,
+                                                                          level=level)
+        else:
+            full_sample = target_sampler.sample(ndraw=ndraw,
+                                                burnin=burnin,
+                                                keep_opt=False)
+            pvalues = target_sampler.coefficient_pvalues(target_observed,
+                                                         parameter=null_value,
+                                                         sample=full_sample)
+            if compute_intervals:
+                intervals = target_sampler.confidence_intervals(target_observed,
+                                                                sample=full_sample,
+                                                                level=level)
+            
+        return intervals, pvalues
 
     @staticmethod
     def gaussian(X, 
diff --git a/selection/randomized/tests/test_randomized_lasso.py b/selection/randomized/tests/test_randomized_lasso.py
index 15dff77a2..973d34cf4 100644
--- a/selection/randomized/tests/test_randomized_lasso.py
+++ b/selection/randomized/tests/test_randomized_lasso.py
@@ -2,16 +2,18 @@
 import numpy as np
 
 from selection.randomized.api import lasso as randomized_lasso
-from selection.tests.instance import gaussian_instance
+from selection.tests.instance import gaussian_instance, AR_instance
 
-def test_randomized_lasso(n=100, p=200, s=10, signal=7, rho=0):
+def test_randomized_lasso(n=300, p=500, s=5, signal=7.5, rho=0.2):
 
-    X, Y, beta, active, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho)
+    X, Y, beta, active, sigma = AR_instance(n=n, p=p, s=s, rho=rho, signal=signal)
 
-    L = randomized_lasso.gaussian(X, Y, 4.5 * sigma * np.ones(p))
+    L = randomized_lasso.gaussian(X, Y, 3.5 * sigma * np.ones(p))
     signs = L.fit()
 
-    print(L.summary(signs != 0))
+    print(np.nonzero(signs != 0)[0])
+    print(np.nonzero(beta != 0)[0])
+    print(L.summary(signs != 0, ndraw=10000, burnin=2000, reference_type='tilt', compute_intervals=False))
 
 
 if __name__ == "__main__":
diff --git a/selection/sampling/langevin.py b/selection/sampling/langevin.py
index 05a290d52..67a623b56 100644
--- a/selection/sampling/langevin.py
+++ b/selection/sampling/langevin.py
@@ -29,14 +29,18 @@ def __iter__(self):
         return self
 
     def next(self):
+        nattempt = 0
         while True:
+            
             proj_arg = (self.state
                         + 0.5 * self.stepsize * self.gradient_map(self.state)
                         + self._noise.rvs(self._shape) * self._sqrt_step)
             candidate = self.projection_map(proj_arg)
             if not np.all(np.isfinite(self.gradient_map(candidate))):
-                print(candidate, self._sqrt_step)
+                nattempt += 1
                 self._sqrt_step *= 0.8
+                if nattempt >= 10:
+                    raise ValueError('unable to find feasible step')
             else:
                 self.state[:] = candidate
                 break

From d314b9f517c3d132826a2b40a2743dbaf235a58a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slij.SUNet>
Date: Wed, 2 Aug 2017 16:03:15 -0700
Subject: [PATCH 029/617] changed step size

---
 selection/approx_ci/ci_via_approx_density.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index 29eaad4e0..e2d3d0fd5 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -307,7 +307,7 @@ def approx_conditional_prob(self, j):
         for i in range(self.grid.shape[0]):
 
             approx = approximate_conditional_prob(self.grid[i], self.sel_alg)
-            h_hat.append(-(approx.minimize2(j, nstep=50)[::-1])[0])
+            h_hat.append(-(approx.minimize2(step=1, nstep=50)[::-1])[0])
 
         return np.array(h_hat)
 

From 29ad109f393470f89a1a49ba679515d760c61269 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slij.SUNet>
Date: Wed, 2 Aug 2017 17:15:26 -0700
Subject: [PATCH 030/617] updated grids

---
 selection/approx_ci/ci_via_approx_density.py | 31 ++++++++++++--------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index e2d3d0fd5..601e45983 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -3,6 +3,7 @@
 import regreg.api as rr
 from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
 from scipy.stats import norm
+import sys
 
 def myround(a, decimals=1):
     a_x = np.round(a, decimals=1)* 10.
@@ -277,10 +278,12 @@ def __init__(self, sel_alg,
     def solve_approx(self):
 
         #defining the grid on which marginal conditional densities will be evaluated
-        grid_length = 1601
-        self.grid = np.linspace(-15,65, num=grid_length)
+        grid_length = 301
+
+        #self.grid = np.linspace(-15,65, num=grid_length)
         #self.grid = np.linspace(-5*np.amax(np.absolute(target_observed)), 5*np.amax(np.absolute(target_observed)), num=grid_length)
         #s_obs = np.round(self.target_observed, decimals =1)
+        self.grid = np.zeros((self.nactive, grid_length))
 
         print("observed values", self.target_observed)
         self.ind_obs = np.zeros(self.nactive, int)
@@ -289,13 +292,16 @@ def solve_approx(self):
 
         for j in range(self.nactive):
             obs = self.target_observed[j]
+            self.grid[j, :] = np.linspace(self.target_observed[j] - 15., self.target_observed[j] + 15., num=grid_length)
             self.norm[j] = self.target_cov[j,j]
             if obs < self.grid[0]:
                 self.ind_obs[j] = 0
             elif obs > np.max(self.grid):
                 self.ind_obs[j] = grid_length-1
             else:
-                self.ind_obs[j] = np.argmin(np.abs(self.grid-obs))
+                self.ind_obs[j] = np.argmin(np.abs(self.grid[j,:]-obs))
+
+            sys.stderr.write("number of variable being computed: " + str(j) + "\n")
             self.h_approx[j, :] = self.approx_conditional_prob(j)
 
 
@@ -304,12 +310,14 @@ def approx_conditional_prob(self, j):
 
         self.sel_alg.setup_map(j)
 
-        for i in range(self.grid.shape[0]):
+        for i in xrange(self.grid[j, :].shape[0]):
+            approx = approximate_conditional_prob((self.grid[j, :])[i], self.sel_alg)
+            val = -(approx.minimize2(step=1, nstep=100)[::-1])[0]
 
-            approx = approximate_conditional_prob(self.grid[i], self.sel_alg)
-            h_hat.append(-(approx.minimize2(step=1, nstep=50)[::-1])[0])
-
-        return np.array(h_hat)
+            if val != -float('Inf'):
+                h_hat.append(val)
+            else:
+                h_hat.append(h_hat[i - 1])
 
     def area_normalized_density(self, j, mean):
 
@@ -318,10 +326,10 @@ def area_normalized_density(self, j, mean):
         approx_nonnormalized = []
 
         for i in range(self.grid.shape[0]):
-            approx_density = np.exp(-np.true_divide((self.grid[i] - mean) ** 2, 2 * self.norm[j])
+            approx_density = np.exp(-np.true_divide(((self.grid[j,:])[i] - mean) ** 2, 2 * self.norm[j])
                                     + (self.h_approx[j,:])[i])
             normalizer += approx_density
-            grad_normalizer +=  (-mean/self.norm[j] + self.grid[i]/self.norm[j])* approx_density
+            grad_normalizer += (-mean / self.norm[j] + (self.grid[j, :])[i] / self.norm[j]) * approx_density
             approx_nonnormalized.append(approx_density)
 
         return np.cumsum(np.array(approx_nonnormalized / normalizer)), normalizer, grad_normalizer
@@ -387,8 +395,7 @@ def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
 
     def approximate_ci(self, j):
 
-        #param_grid = np.linspace(-5*np.amax(np.absolute(self.target_observed)), 5*np.amax(np.absolute(self.target_observed)), num=grid_length)
-        param_grid = np.linspace(-15, 65, num=1601)
+        param_grid = np.linspace(-15., 15., num=301)
         area = np.zeros(param_grid.shape[0])
 
         for k in range(param_grid.shape[0]):

From bbb7c625ec893e634bf1f77218c84df00a132d6d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 3 Aug 2017 14:01:00 -0700
Subject: [PATCH 031/617] logic worked out to write separate maxZ test method

---
 selection/algorithms/forward_step.py          | 372 ++++++++++++------
 selection/algorithms/tests/test_compareR.py   |   2 +-
 .../algorithms/tests/test_forward_step.py     |  18 +-
 3 files changed, 265 insertions(+), 127 deletions(-)

diff --git a/selection/algorithms/forward_step.py b/selection/algorithms/forward_step.py
index 1b4e67a51..a1054c9e1 100644
--- a/selection/algorithms/forward_step.py
+++ b/selection/algorithms/forward_step.py
@@ -16,7 +16,7 @@
 
 from ..constraints.affine import (constraints, 
                                   gibbs_test, 
-                                  stack,
+                                  stack as stack_con,
                                   gaussian_hit_and_run)
 from ..distributions.chain import parallel_test, serial_test
 from ..distributions.chisq import quadratic_test
@@ -27,20 +27,65 @@
 class forward_step(object):
 
     """
-    Centers columns of X!
+    Forward stepwise model selection.
+
+   
     """
 
     def __init__(self, X, Y, 
-                 subset=[],
-                 fixed_regressors=[],
+                 subset=None,
+                 fixed_regressors=None,
                  intercept=True,
                  covariance=None):
+
+        """
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        subset : ndarray (optional)
+            Shape (n,) -- boolean indicator of which cases to use.
+            Defaults to np.ones(n, np.bool)
+
+        fixed_regressors: ndarray (optional)
+            Shape (n, *) -- fixed regressors to regress out before
+            computing score.
+
+        intercept : bool
+            Remove intercept -- this effectively includes np.ones(n) to fixed_regressors.
+
+        covariance : ndarray (optional)
+            Covariance matrix of errors. Defaults to np.identity(n).
+
+        Returns
+        -------
+
+        FS : `selection.algorithms.forward_step.forward_step`
+        
+        Notes
+        -----
+
+        """
+
         self.subset = subset
         self.X, self.Y = X, Y
 
+        n, p = self.X.shape
+        if fixed_regressors is not None:
+            fixed_regressors = np.asarray(fixed_regressors).reshape((n,-1))
+
         if intercept:
-            fixed_regressors = fixed_regressors + [np.ones((X.shape[0], 1))]
-        if fixed_regressors != []:
+            if fixed_regressors is not None:
+                fixed_regressors = np.hstack([fixed_regressors, np.ones((n, 1))])
+            else:
+                fixed_regressors = np.ones((n, 1))
+
+        if fixed_regressors is not None:
             self.fixed_regressors = np.hstack(fixed_regressors)
             if self.fixed_regressors.ndim == 1:
                 self.fixed_regressors = self.fixed_regressors.reshape((-1,1))
@@ -55,45 +100,71 @@ def __init__(self, X, Y,
             self.X = self.X - np.dot(self.fixed_regressors, 
                                      np.dot(self.fixed_pinv, self.X))
         else:
-            self.fixed_regressors = []
+            self.fixed_regressors = None
+
+        if self.subset is not None:
 
-        if subset != []:
             self.adjusted_X = self.X.copy()[subset]
             self.subset_X = self.X.copy()[subset]
             self.subset_Y = self.Y.copy()[subset]
             self.subset_selector = np.identity(self.X.shape[0])[subset]
+
         else:
             self.adjusted_X = self.X.copy()
             self.subset_Y = self.Y.copy()
             self.subset_X = self.X.copy()
 
+        # scale columns of X to have length 1
         self.adjusted_X /= np.sqrt((self.adjusted_X**2).sum(0))[None, :]
 
-        self.variables = []
-        self.Z = []
-        self.Zfunc = []
-        self.signs = []
-        self.covariance = covariance
-        self._resid_vector = self.subset_Y.copy() 
+        self.variables = [] # the sequence of selected variables
+        self.Z = []         # the achieved Z scores
+        self.Zfunc = []     # the linear functionals of Y that achieve the Z scores
+        self.signs = []     # the signs of the achieved Z scores
 
-        # setup for iteration
+        self.covariance = covariance               # the covariance of errors
+        self._resid_vector = self.subset_Y.copy()  # the current residual -- already adjusted for fixed regressors
 
-        iter(self)
+        # setup for iteration
 
-    def __iter__(self):
-        n, p = self.X.shape
-        self.identity_cone = []
-        self.inactive = range(p)
-        self.offset = [[np.ones(p) * np.inf, np.ones(p) * np.inf]]
-        return self
+        self.identity_constraints = []    # this will store linear functionals that identify the variables
+        self.inactive = np.ones(p, np.bool)   # current inactive set
+        self.maxZ_offset = [[np.ones(p) * np.inf, np.ones(p) * np.inf]] # stored for computing
+                                                                   # the limits of maxZ selected test
+        self.maxZ_constraints = []
 
-    def next(self, compute_pval=False,
+    def step(self, 
+             compute_maxZ_pval=False,
              use_identity=False,
-             burnin=2000,
              ndraw=8000,
+             burnin=2000,
              sigma_known=True,
              accept_reject_params=(100, 15, 2000)):
         """
+        Parameters
+        ----------
+
+        compute_maxZ_pval : bool
+            Compute a p-value for this step? Requires MCMC sampling.
+
+        use_identity : bool
+            If computing a p-value condition on the identity of the variable?
+
+        ndraw : int (optional)
+            Defaults to 1000.
+
+        burnin : int (optional)
+            Defaults to 1000.
+
+        sigma_known : bool
+            Is $\sigma$ assumed known?
+
+        accept_reject_params : tuple
+            If not () should be a tuple (num_trial, min_accept, num_draw).
+            In this case, we first try num_trial accept-reject samples,
+            if at least min_accept of them succeed, we just draw num_draw
+            accept_reject samples.
+
         """
         
         adjusted_X, Y = self.adjusted_X, self.subset_Y
@@ -101,131 +172,198 @@ def next(self, compute_pval=False,
         n, p = adjusted_X.shape
 
         # up to now inactive
-        inactive = self.inactive = sorted(set(range(p)).difference(self.variables))
-        scale = np.sqrt(np.sum(adjusted_X**2, 0))
+        inactive = self.inactive
+
+        # compute Z scores
 
-        Zfunc = adjusted_X.T[inactive] 
-        Zstat = np.dot(Zfunc, Y)
-        idx = np.argmax(np.fabs(Zstat))
-        next_var = inactive[idx]
-        next_sign = np.sign(Zstat[idx])
+        scale = self.scale = np.sqrt(np.sum(adjusted_X**2, 0))
+        scale[~inactive] = np.inf # should never be used in any case
+        Zfunc = adjusted_X.T # [inactive] 
+        Zstat = np.dot(Zfunc, Y) / scale # [inactive]
 
-        realized_Z_max = Zstat[idx]
-        self.Z.append(realized_Z_max)
+        winning_var = np.argmax(np.fabs(Zstat))
+        winning_func = adjusted_X[:,winning_var] / scale[winning_var]
+        winning_sign = np.sign(Zstat[winning_var])
 
-        if self.subset != []:
-            self.Zfunc.append(np.dot(Zfunc[idx], self.subset_selector) * next_sign)
+        realized_maxZ = Zstat[winning_var] * winning_sign 
+        self.Z.append(realized_maxZ)
+
+        if self.subset is not None:
+            self.Zfunc.append(np.dot(Zfunc[winning_var], self.subset_selector) * winning_sign / scale[winning_var])
         else:
-            self.Zfunc.append(Zfunc[idx] * next_sign)
+            self.Zfunc.append(Zfunc[winning_var] * winning_sign / scale[winning_var])
 
         # keep track of identity for testing
         # variables other than the last one added
 
-        keep = np.zeros(p, np.bool)
-        keep[inactive] = True
-        keep[next_var] = False
-        identity_linpart = np.vstack([adjusted_X[:,keep].T -
-                                      next_sign * adjusted_X[:,next_var],
-                                      -adjusted_X[:,keep].T -
-                                      next_sign * adjusted_X[:,next_var],
-                                      -next_sign * adjusted_X[:,next_var].reshape((1,-1))])
+        # this adds a constraint to self.identity_constraints
+
+        # losing_vars are variables that are inactive (i.e. not in self.variables)
+        # and did not win in this step
 
-        if self.subset != []:
+        losing_vars = np.zeros(p, np.bool)
+        losing_vars[inactive] = True
+        losing_vars[winning_var] = False
+
+        identity_linpart = np.vstack([ 
+                adjusted_X[:,losing_vars].T / scale[losing_vars,None]-
+                winning_sign * winning_func,
+                -adjusted_X[:,losing_vars].T / scale[losing_vars,None] -
+                winning_sign * winning_func,
+                -winning_sign * winning_func.reshape((1,-1))])
+
+        if self.subset is not None:
             identity_linpart = np.dot(identity_linpart, 
                                       self.subset_selector)
 
         identity_con = constraints(identity_linpart,
                                    np.zeros(identity_linpart.shape[0]))
 
-        self.identity_cone.append(identity_linpart)
-
-        eta = adjusted_X[:,next_var]
-
-        if compute_pval:
-
-            XI = self.subset_X[:,inactive]
-            linear_part = np.vstack([XI.T, -XI.T])
-            offset = np.array(self.offset)
-            offset = offset[:,:,inactive]
-            offset_pos = np.min(offset[:,0], 0)
-            offset_neg = np.min(offset[:,1], 0)
-            offset = np.hstack([offset_pos, offset_neg])
-            con = constraints(linear_part, offset,
-                              covariance=self.covariance)
-
-            #use_identity = False
-            if use_identity:
-                con = stack(con, identity_con)
-                con.covariance = self.covariance
-            if self.variables or (self.fixed_regressors != []):
-                XA = self.subset_X[:,self.variables]
-                # TODO allow other regressors here
-                XA = np.hstack([self.fixed_regressors, XA])
-                sequential_con = con.conditional(XA.T,
-                                                 np.dot(XA.T, Y))
-            else:
-                sequential_con = con
-
-            def maxT(Z, L=adjusted_X[:,inactive], S=scale[inactive]):
-                Tstat = np.fabs(np.dot(Z, L) / S[None,:]).max(1)
-                return Tstat
-
-            B = sequential_con.offset
-            d = offset_pos.shape[0]
-            sequential_con.offset[:d] -= XI.T.dot(sequential_con.mean)
-            sequential_con.offset[d:(2*d)] += XI.T.dot(sequential_con.mean)
-
-            pval = gibbs_test(sequential_con,
-                              Y,
-                              eta,
-                              sigma_known=sigma_known,
-                              white=False,
-                              ndraw=ndraw,
-                              burnin=burnin,
-                              how_often=-1,
-                              UMPU=False,
-                              use_random_directions=False,
-                              tilt=None,
-                              alternative='greater',
-                              test_statistic=maxT,
-                              accept_reject_params=accept_reject_params
-                              )[0]
+        if not identity_con(self.subset_Y):
+            raise ValueError('identity fail!')
+
+        self.identity_constraints.append(identity_linpart)
+
+        # form the maxZ constraint
+
+        XI = self.subset_X[:,self.inactive]
+        linear_part = np.vstack([XI.T, -XI.T])
+        _offset = np.array(self.maxZ_offset)
+        _offset = _offset[:,:,self.inactive]
+        offset_pos = np.min(_offset[:,0], 0) # this corresponds to X_L^TY \leq (Z_max + V) * S_L
+        offset_neg = np.min(_offset[:,1], 0) # this corresponds to -X_L^TY \leq (Z_max - V) * S_L
+        offset = np.hstack([offset_pos, offset_neg])
+        maxZ_con = constraints(linear_part, offset,
+                               covariance=self.covariance)
+
+        if use_identity:
+            maxZ_con = stack_con(maxZ_con, identity_con)
+            con.covariance = self.covariance
+
+        if len(self.variables) > 0 or (self.fixed_regressors != []):
+            XA = self.subset_X[:, self.variables]
+            XA = np.hstack([self.fixed_regressors, XA])
+            # the RHS, i.e. offset is fixed by this conditioning
+            conditional_con = maxZ_con.conditional(XA.T,  
+                                            np.dot(XA.T, Y))
+        else:
+            conditional_con = maxZ_con
+
+        self.maxZ_constraints.append(conditional_con)
+        if compute_maxZ_pval:
+            maxZ_pval = self._maxZ_test(ndraw, burnin,
+                                        sigma_known=sigma_known,
+                                        accept_reject_params=accept_reject_params)
 
         # now update state for next step
 
-        inactive.pop(idx)
-        self.inactive = inactive # unnecessary?
-        self.variables.append(next_var); self.signs.append(next_sign)
+        # update the offsets for maxZ
+
+        # when we condition on the sufficient statistics up to
+        # and including winning_var, the Z_scores are fixed
+        
+        # then, the losing variables at this stage can be expressed as
+        # abs(adjusted_X.T.dot(Y)[:,inactive] / scale[inactive]) < realized_maxZ
+        # where inactive is the updated inactive 
+
+        # the event we have witnessed this step is 
+        # $$\|X^T_L(I-P)Y / diag(X^T_L(I-P)X_L)\|_{\infty} \leq X^T_W(I-P)Y / \sqrt(X^T_W(I-P)X_W)$$
+        # where P is the current "model"
+
+        # let V=PY and S_L the losing scales, we rewrite this as
+        # $$\|X^T_LY / S_L - V\|_{\infty} \leq Z_max $$
+        # and again
+        # $$X^T_LY / S_L - V \leq Z_max, -(X^T_LY / S_L - V) \leq Z_max $$
+        # or,
+        # $$X^T_LY \leq (Z_max + V) * S_L, -X^T_LY \leq (Z_max - V) * S_L $$
+
+        # where, at the next step Z_max and V are measurable with respect to
+        # the appropriate sigma algebra
+
+        realized_Z_adjustment = realized_maxZ * scale                      # Z_max * S_L
+        fit_adjustment = np.dot(self.subset_X.T, Y - resid_vector) * scale # V * S_L
+        self.maxZ_offset.append([realized_Z_adjustment + fit_adjustment,   # (Z_max + V) * S_L
+                                 realized_Z_adjustment - fit_adjustment])  # (Z_max - V) * S_L
+
 
-        realized_Z_adjusted = np.fabs(realized_Z_max) * scale
-        offset_shift = np.dot(self.subset_X.T, Y - resid_vector)
-        self.offset.append([realized_Z_adjusted + offset_shift,
-                            realized_Z_adjusted - offset_shift])
+        # update our list of variables and signs
 
-        resid_vector -= realized_Z_max * adjusted_X[:,next_var] / scale[next_var]
-        adjusted_X -= (np.multiply.outer(eta, 
-                                         np.dot(eta,
-                                                adjusted_X)) / 
-                       (eta**2).sum())
-        # maintain the scale
-        adjusted_X /= np.sqrt(np.sum(adjusted_X**2, 0))[None, :]
-        if compute_pval:
-            return pval
+        self.inactive[winning_var] = False # inactive is now losing_vars
+        self.variables.append(winning_var); self.signs.append(winning_sign)
 
-    __next__ = next # Python3 compatibility
+        # update residual, and adjust X
+
+        resid_vector -= realized_maxZ * winning_sign * winning_func
+        adjusted_X -= (np.multiply.outer(winning_func, winning_func.dot(adjusted_X)) /
+                       (winning_func**2).sum())
+
+        check_resid = True
+        if check_resid:
+            X = np.hstack([self.subset_X[:, self.variables], self.fixed_regressors]) 
+            resid_vector2 = Y - X.dot(np.linalg.pinv(X).dot(Y))
+            print(np.linalg.norm(resid_vector - resid_vector2) / np.linalg.norm(resid_vector), 'resids')
+
+        if check_resid:
+            adjusted_X2 = self.subset_X - X.dot(np.linalg.pinv(X).dot(self.subset_X))
+            print(np.linalg.norm(adjusted_X - adjusted_X2) / np.linalg.norm(adjusted_X), 'adjusted')
+
+        if compute_maxZ_pval:
+            return maxZ_pval
 
     def constraints(self, step=np.inf, identify_last_variable=True):
         default_step = len(self.variables)
         if default_step > 0 and not identify_last_variable:
             default_step -= 1
         step = min(step, default_step)
-        A = np.vstack(self.identity_cone[:step])
+        A = np.vstack(self.identity_constraints[:step])
 
         con = constraints(A, 
                           np.zeros(A.shape[0]), 
                           covariance=self.covariance)
         return con
 
+    def _maxZ_test(self, ndraw, burnin,
+                   sigma_known=True,
+                   accept_reject_params=(100, 15, 2000)
+                   ):
+
+        XI, Y = self.subset_X[:, self.inactive], self.subset_Y
+        sequential_con = self.maxZ_constraints[-1]
+        if not sequential_con(Y):
+            raise ValueError('doh!')
+
+        # use partial
+        def maxT(Z, L=self.adjusted_X[:,self.inactive], S=self.scale[self.inactive]):
+            Tstat = np.fabs(np.dot(Z, L) / S[None,:]).max(1)
+            return Tstat
+
+        #B = sequential_con.offset
+        #d = offset_pos.shape[0]
+        #sequential_con.offset[:d] += XI.T.dot(sequential_con.mean)
+        #sequential_con.offset[d:(2*d)] -= XI.T.dot(sequential_con.mean)
+
+        #if not sequential_con(Y):
+        #    raise ValueError('doh!')
+
+        pval, _, _, dfam = gibbs_test(sequential_con,
+                                      Y,
+                                      self.Zfunc[-1],
+                                      sigma_known=sigma_known,
+                                      white=False,
+                                      ndraw=ndraw,
+                                      burnin=burnin,
+                                      how_often=-1,
+                                      UMPU=False,
+                                      use_random_directions=False,
+                                      tilt=None,
+                                      alternative='greater',
+                                      test_statistic=maxT,
+                                      accept_reject_params=accept_reject_params
+                                      )
+        return pval
+
+
+
     def mcmc_test(self, step, variable=None,
                   nstep=100,
                   ndraw=20,
@@ -245,7 +383,7 @@ def mcmc_test(self, step, variable=None,
         if variable not in variables:
             raise ValueError('variable not included at given step')
 
-        A = np.vstack(self.identity_cone[:step])
+        A = np.vstack(self.identity_constraints[:step])
         con = constraints(A, 
                           np.zeros(A.shape[0]), 
                           covariance=self.covariance)
diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index f496ef626..10c49d72e 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -214,7 +214,7 @@ def test_coxph():
     print(G1, 'glmnet')
     print(G2, 'regreg')
 
-    yield np.testing.assert_equal, L.active + 1, selected_vars
+    yield np.testing.assert_equal, np.array(L.active) + 1, selected_vars
     yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'cox coeff'
     yield np.testing.assert_allclose, L.summary('onesided')['pval'], R_pvals, tol, tol, False, 'cox pvalues'
 
diff --git a/selection/algorithms/tests/test_forward_step.py b/selection/algorithms/tests/test_forward_step.py
index 549d2e0bd..586f630c8 100644
--- a/selection/algorithms/tests/test_forward_step.py
+++ b/selection/algorithms/tests/test_forward_step.py
@@ -8,7 +8,7 @@
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_FS(k=10, ndraw=5000, burnin=5000):
 
-    n, p = 100, 200
+    n, p = 100, 50
     X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None]
     X /= (X.std(0)[None,:] * np.sqrt(n))
     
@@ -17,7 +17,7 @@ def test_FS(k=10, ndraw=5000, burnin=5000):
     FS = forward_step(X, Y, covariance=0.5**2 * np.identity(n))
 
     for i in range(k):
-        FS.next(compute_pval=True)
+        print(FS.step(compute_maxZ_pval=True), 'pvalues')
 
     print('first %s variables selected' % k, FS.variables)
 
@@ -39,7 +39,7 @@ def test_FS_unknown(k=10, ndraw=5000, burnin=5000):
     FS = forward_step(X, Y)
 
     for i in range(k):
-        FS.next()
+        FS.step()
 
     print('first %s variables selected' % k, FS.variables)
 
@@ -62,7 +62,7 @@ def test_subset(k=10, ndraw=5000, burnin=5000):
                           covariance=0.5**2 * np.identity(n))
 
     for i in range(k):
-        FS.next()
+        FS.step()
 
     print('first %s variables selected' % k, FS.variables)
 
@@ -74,7 +74,7 @@ def test_subset(k=10, ndraw=5000, burnin=5000):
     FS = forward_step(X, Y, subset=subset)
 
     for i in range(k):
-        FS.next()
+        FS.step()
     print(FS.model_pivots(3, saturated=False, which_var=[FS.variables[2]], burnin=burnin, ndraw=ndraw))
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
@@ -108,7 +108,7 @@ def simulate_null(saturated=True, ndraw=8000, burnin=2000):
     FS = forward_step(X, Y, covariance=0.5**2 * np.identity(n))
     
     for i in range(5):
-        FS.next()
+        FS.step()
 
     return [p[-1] for p in FS.model_pivots(3, saturated=saturated, ndraw=ndraw, burnin=burnin)]
 
@@ -205,7 +205,7 @@ def test_full_pvals(n=100, p=40, rho=0.3, snr=4, ndraw=8000, burnin=2000):
     pval = []
     completed_yet = False
     for i in range(min(n, p)):
-        FS.next()
+        FS.step()
         var_select, pval_select = FS.model_pivots(i+1, alternative='twosided',
                                                   which_var=[FS.variables[-1]],
                                                   saturated=False,
@@ -241,7 +241,7 @@ def test_mcmc_tests(n=100, p=40, s=4, rho=0.3, snr=5, ndraw=None, burnin=2000,
     null_rank, alt_rank = None, None
 
     for i in range(min(n, p)):
-        FS.next()
+        FS.step()
 
         if extra_steps <= 0:
             null_rank = FS.mcmc_test(i+1, variable=FS.variables[i-2], 
@@ -273,7 +273,7 @@ def test_independence_null_mcmc(n=100, p=40, s=4, rho=0.5, snr=5,
 
     null_ranks = []
     for i in range(min(n, p)):
-        FS.next()
+        FS.step()
 
         if completed and extra_steps > 0:
             null_rank = FS.mcmc_test(i+1, variable=FS.variables[-1], 

From eeba62e76de0b275d60458ffde4ea027aba52613 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 4 Aug 2017 09:50:15 -0700
Subject: [PATCH 032/617] BF: constraints had an extra factor of scale on the
 fitted values

---
 selection/algorithms/forward_step.py | 70 ++++++++++++++++------------
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/selection/algorithms/forward_step.py b/selection/algorithms/forward_step.py
index a1054c9e1..27009d2d2 100644
--- a/selection/algorithms/forward_step.py
+++ b/selection/algorithms/forward_step.py
@@ -29,7 +29,6 @@ class forward_step(object):
     """
     Forward stepwise model selection.
 
-   
     """
 
     def __init__(self, X, Y, 
@@ -108,11 +107,12 @@ def __init__(self, X, Y,
             self.subset_X = self.X.copy()[subset]
             self.subset_Y = self.Y.copy()[subset]
             self.subset_selector = np.identity(self.X.shape[0])[subset]
-
+            self.subset_fixed = self.fixed_regressors[subset]
         else:
             self.adjusted_X = self.X.copy()
             self.subset_Y = self.Y.copy()
             self.subset_X = self.X.copy()
+            self.subset_fixed = self.fixed_regressors
 
         # scale columns of X to have length 1
         self.adjusted_X /= np.sqrt((self.adjusted_X**2).sum(0))[None, :]
@@ -182,16 +182,16 @@ def step(self,
         Zstat = np.dot(Zfunc, Y) / scale # [inactive]
 
         winning_var = np.argmax(np.fabs(Zstat))
-        winning_func = adjusted_X[:,winning_var] / scale[winning_var]
         winning_sign = np.sign(Zstat[winning_var])
+        winning_func = Zfunc[winning_var] / scale[winning_var] * winning_sign
 
         realized_maxZ = Zstat[winning_var] * winning_sign 
         self.Z.append(realized_maxZ)
 
         if self.subset is not None:
-            self.Zfunc.append(np.dot(Zfunc[winning_var], self.subset_selector) * winning_sign / scale[winning_var])
+            self.Zfunc.append(winning_func.dot(self.subset_selector))
         else:
-            self.Zfunc.append(Zfunc[winning_var] * winning_sign / scale[winning_var])
+            self.Zfunc.append(winning_func)
 
         # keep track of identity for testing
         # variables other than the last one added
@@ -201,16 +201,15 @@ def step(self,
         # losing_vars are variables that are inactive (i.e. not in self.variables)
         # and did not win in this step
 
-        losing_vars = np.zeros(p, np.bool)
-        losing_vars[inactive] = True
+        losing_vars = inactive.copy()
         losing_vars[winning_var] = False
 
         identity_linpart = np.vstack([ 
-                adjusted_X[:,losing_vars].T / scale[losing_vars,None]-
-                winning_sign * winning_func,
+                adjusted_X[:,losing_vars].T / scale[losing_vars,None] -
+                winning_func,
                 -adjusted_X[:,losing_vars].T / scale[losing_vars,None] -
-                winning_sign * winning_func,
-                -winning_sign * winning_func.reshape((1,-1))])
+                winning_func,
+                - winning_func.reshape((1,-1))])
 
         if self.subset is not None:
             identity_linpart = np.dot(identity_linpart, 
@@ -219,7 +218,7 @@ def step(self,
         identity_con = constraints(identity_linpart,
                                    np.zeros(identity_linpart.shape[0]))
 
-        if not identity_con(self.subset_Y):
+        if not identity_con(self.Y):
             raise ValueError('identity fail!')
 
         self.identity_constraints.append(identity_linpart)
@@ -228,11 +227,18 @@ def step(self,
 
         XI = self.subset_X[:,self.inactive]
         linear_part = np.vstack([XI.T, -XI.T])
+        if self.subset is not None:
+            linear_part = np.dot(linear_part, 
+                                 self.subset_selector)
+
         _offset = np.array(self.maxZ_offset)
         _offset = _offset[:,:,self.inactive]
-        offset_pos = np.min(_offset[:,0], 0) # this corresponds to X_L^TY \leq (Z_max + V) * S_L
+        offset_pos = np.min(_offset[:,0], 0) # this corresponds to X_L^TY \leq (Z_max + V) * S_L 
         offset_neg = np.min(_offset[:,1], 0) # this corresponds to -X_L^TY \leq (Z_max - V) * S_L
+                                             # both minimized over all previous steps
+
         offset = np.hstack([offset_pos, offset_neg])
+
         maxZ_con = constraints(linear_part, offset,
                                covariance=self.covariance)
 
@@ -242,10 +248,14 @@ def step(self,
 
         if len(self.variables) > 0 or (self.fixed_regressors != []):
             XA = self.subset_X[:, self.variables]
-            XA = np.hstack([self.fixed_regressors, XA])
+            XA = np.hstack([self.subset_fixed, XA])
             # the RHS, i.e. offset is fixed by this conditioning
-            conditional_con = maxZ_con.conditional(XA.T,  
-                                            np.dot(XA.T, Y))
+            if self.subset is not None:
+                conditional_con = maxZ_con.conditional(XA.T.dot(self.subset_selector),
+                                                       np.dot(XA.T, Y))
+            else:
+                conditional_con = maxZ_con.conditional(XA.T,
+                                                       np.dot(XA.T, Y))
         else:
             conditional_con = maxZ_con
 
@@ -255,7 +265,7 @@ def step(self,
                                         sigma_known=sigma_known,
                                         accept_reject_params=accept_reject_params)
 
-        # now update state for next step
+        # now update for next step
 
         # update the offsets for maxZ
 
@@ -271,17 +281,17 @@ def step(self,
         # where P is the current "model"
 
         # let V=PY and S_L the losing scales, we rewrite this as
-        # $$\|X^T_LY / S_L - V\|_{\infty} \leq Z_max $$
+        # $$\|(X^T_LY - V) / S_L\|_{\infty} \leq Z_max $$
         # and again
-        # $$X^T_LY / S_L - V \leq Z_max, -(X^T_LY / S_L - V) \leq Z_max $$
+        # $$X^T_LY / S_L - V / S_L \leq Z_max, -(X^T_LY / S_L - V / S_L) \leq Z_max $$
         # or,
-        # $$X^T_LY \leq (Z_max + V) * S_L, -X^T_LY \leq (Z_max - V) * S_L $$
+        # $$X^T_LY \leq Z_max * S_L + V, -X^T_LY \leq Z_max * S_L - V $$
 
         # where, at the next step Z_max and V are measurable with respect to
         # the appropriate sigma algebra
 
         realized_Z_adjustment = realized_maxZ * scale                      # Z_max * S_L
-        fit_adjustment = np.dot(self.subset_X.T, Y - resid_vector) * scale # V * S_L
+        fit_adjustment = np.dot(self.subset_X.T, Y - resid_vector)         # V * S_L
         self.maxZ_offset.append([realized_Z_adjustment + fit_adjustment,   # (Z_max + V) * S_L
                                  realized_Z_adjustment - fit_adjustment])  # (Z_max - V) * S_L
 
@@ -293,13 +303,13 @@ def step(self,
 
         # update residual, and adjust X
 
-        resid_vector -= realized_maxZ * winning_sign * winning_func
+        resid_vector -= realized_maxZ * winning_func
         adjusted_X -= (np.multiply.outer(winning_func, winning_func.dot(adjusted_X)) /
                        (winning_func**2).sum())
 
         check_resid = True
         if check_resid:
-            X = np.hstack([self.subset_X[:, self.variables], self.fixed_regressors]) 
+            X = np.hstack([self.subset_X[:, self.variables], self.subset_fixed]) 
             resid_vector2 = Y - X.dot(np.linalg.pinv(X).dot(Y))
             print(np.linalg.norm(resid_vector - resid_vector2) / np.linalg.norm(resid_vector), 'resids')
 
@@ -589,7 +599,7 @@ def model_quadratic(self, which_step):
         return quadratic_test(self.Y, P_LS, self.constraints(step=which_step))
 
 def info_crit_stop(Y, X, sigma, cost=2,
-                   subset=[]):
+                   subset=None):
     """
     Fit model using forward stepwise,
     stopping using a rule like AIC or BIC.
@@ -612,8 +622,9 @@ def info_crit_stop(Y, X, sigma, cost=2,
     cost : float
         Cost per parameter. For BIC use cost=log(X.shape[0])
 
-    subset : []
-        Subset of cases to use for selection, defaults to [].
+    subset : ndarray (optional)
+        Shape (n,) -- boolean indicator of which cases to use.
+        Defaults to np.ones(n, np.bool)
 
     Returns
     -------
@@ -628,8 +639,7 @@ def info_crit_stop(Y, X, sigma, cost=2,
     FS = forward_step(X, Y, covariance=sigma**2 * np.identity(n), subset=subset)
 
     while True:
-        FS.next()
-
+        FS.step()
         if FS.Z[-1] < sigma * np.sqrt(cost):
             break
 
@@ -638,8 +648,8 @@ def info_crit_stop(Y, X, sigma, cost=2,
     new_offset = -sigma * np.sqrt(cost) * np.ones(new_linear_part.shape[0])
     new_offset[-1] *= -1
 
-    new_con = stack(FS.constraints(), constraints(new_linear_part,
-                                                  new_offset))
+    new_con = stack_con(FS.constraints(), constraints(new_linear_part,
+                                                      new_offset))
     new_con.covariance[:] = sigma**2 * np.identity(n)
     FS._constraints = new_con
     FS.active = FS.variables[:-1]

From 30d947fb9937a72eb79a48b5ee33530e3ff95c53 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 4 Aug 2017 09:54:32 -0700
Subject: [PATCH 033/617] still off in R comparison

---
 selection/algorithms/forward_step.py            | 10 ----------
 selection/algorithms/tests/test_compareR.py     |  6 ++++--
 selection/algorithms/tests/test_forward_step.py | 16 ++++++++--------
 3 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/selection/algorithms/forward_step.py b/selection/algorithms/forward_step.py
index 27009d2d2..40b5028b8 100644
--- a/selection/algorithms/forward_step.py
+++ b/selection/algorithms/forward_step.py
@@ -307,16 +307,6 @@ def step(self,
         adjusted_X -= (np.multiply.outer(winning_func, winning_func.dot(adjusted_X)) /
                        (winning_func**2).sum())
 
-        check_resid = True
-        if check_resid:
-            X = np.hstack([self.subset_X[:, self.variables], self.subset_fixed]) 
-            resid_vector2 = Y - X.dot(np.linalg.pinv(X).dot(Y))
-            print(np.linalg.norm(resid_vector - resid_vector2) / np.linalg.norm(resid_vector), 'resids')
-
-        if check_resid:
-            adjusted_X2 = self.subset_X - X.dot(np.linalg.pinv(X).dot(self.subset_X))
-            print(np.linalg.norm(adjusted_X - adjusted_X2) / np.linalg.norm(adjusted_X), 'adjusted')
-
         if compute_maxZ_pval:
             return maxZ_pval
 
diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 10c49d72e..2364e22d1 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -110,11 +110,13 @@ def test_forward_step():
     FS = forward_step(x, y, covariance=sigma**2 * np.identity(y.shape[0]))
     steps = []
     for i in range(x.shape[1]):
-        FS.next()
+        FS.step()
         steps.extend(FS.model_pivots(i+1, 
                                      which_var=FS.variables[-1:],
                                      alternative='onesided'))
 
+    print(selected_vars, [i+1 for i, p in steps])
+    print(FS.variables, FS.signs)
     np.testing.assert_array_equal(selected_vars, [i + 1 for i, p in steps])
     np.testing.assert_allclose([p for i, p in steps], R_pvals, atol=tol, rtol=tol)
 
@@ -152,7 +154,7 @@ def test_forward_step_all():
     FS = forward_step(x, y, covariance=sigma**2 * np.identity(y.shape[0]))
     steps = []
     for i in range(5):
-        FS.next()
+        FS.step()
     steps = FS.model_pivots(5, 
                             alternative='onesided')
 
diff --git a/selection/algorithms/tests/test_forward_step.py b/selection/algorithms/tests/test_forward_step.py
index 586f630c8..2fae25f14 100644
--- a/selection/algorithms/tests/test_forward_step.py
+++ b/selection/algorithms/tests/test_forward_step.py
@@ -134,7 +134,7 @@ def test_data_carving_IC(nsim=500,
                          s=7,
                          sigma=5,
                          rho=0.3,
-                         snr=7.,
+                         signal=7.,
                          split_frac=0.9,
                          ndraw=5000,
                          burnin=1000, 
@@ -151,7 +151,7 @@ def test_data_carving_IC(nsim=500,
                                              s=s, 
                                              sigma=sigma, 
                                              rho=rho, 
-                                             snr=snr, 
+                                             signal=signal, 
                                              df=df)
         mu = np.dot(X, beta)
         splitn = int(n*split_frac)
@@ -196,9 +196,9 @@ def test_data_carving_IC(nsim=500,
 
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_full_pvals(n=100, p=40, rho=0.3, snr=4, ndraw=8000, burnin=2000):
+def test_full_pvals(n=100, p=40, rho=0.3, signal=4, ndraw=8000, burnin=2000):
 
-    X, y, beta, active, sigma = gaussian_instance(n=n, p=p, snr=snr, rho=rho)
+    X, y, beta, active, sigma = gaussian_instance(n=n, p=p, signal=signal, rho=rho)
     FS = forward_step(X, y, covariance=sigma**2 * np.identity(n))
 
     from scipy.stats import norm as ndist
@@ -229,11 +229,11 @@ def test_full_pvals(n=100, p=40, rho=0.3, snr=4, ndraw=8000, burnin=2000):
     return X, y, beta, active, sigma, np.array(pval), completion_index
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_mcmc_tests(n=100, p=40, s=4, rho=0.3, snr=5, ndraw=None, burnin=2000,
+def test_mcmc_tests(n=100, p=40, s=4, rho=0.3, signal=5, ndraw=None, burnin=2000,
                     nstep=200,
                     method='serial'):
 
-    X, y, beta, active, sigma = gaussian_instance(n=n, p=p, snr=snr, rho=rho, s=s)
+    X, y, beta, active, sigma = gaussian_instance(n=n, p=p, signal=signal, rho=rho, s=s)
     FS = forward_step(X, y, covariance=sigma**2 * np.identity(n))
 
     extra_steps = 4
@@ -260,12 +260,12 @@ def test_mcmc_tests(n=100, p=40, s=4, rho=0.3, snr=5, ndraw=None, burnin=2000,
     return null_rank, alt_rank
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_independence_null_mcmc(n=100, p=40, s=4, rho=0.5, snr=5, 
+def test_independence_null_mcmc(n=100, p=40, s=4, rho=0.5, signal=5, 
                                 ndraw=None, burnin=2000,
                                 nstep=200,
                                 method='serial'):
 
-    X, y, beta, active, sigma = gaussian_instance(n=n, p=p, snr=snr, rho=rho, s=s)
+    X, y, beta, active, sigma = gaussian_instance(n=n, p=p, signal=signal, rho=rho, s=s)
     FS = forward_step(X, y, covariance=sigma**2 * np.identity(n))
 
     extra_steps = 4

From 045c0619b5e33dad76f2a698f84f2b9b083f7d88 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 4 Aug 2017 10:03:03 -0700
Subject: [PATCH 034/617] truncation limits don't agree after a few steps of FS

---
 selection/algorithms/tests/test_compareR.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 2364e22d1..6adca2484 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -46,6 +46,7 @@ def test_fixed_lambda():
 
         vlo = out$vlo
         vup = out$vup
+
         sdvar = out$sd
         pval=out$pv
         coef0=out$coef0
@@ -95,6 +96,8 @@ def test_forward_step():
     out.seq = fsInf(fsfit,sigma=sigma)
     vars = out.seq$vars
     pval = out.seq$pv
+    vlo = out.seq$vlo
+    vup = out.seq$vup
     """
 
     rpy.r(R_code)
@@ -107,6 +110,10 @@ def test_forward_step():
     y = y.reshape(-1)
     y -= y.mean()
     x -= x.mean(0)[None,:]
+
+    vlo = np.asarray(rpy.r('vlo'))
+    vup = np.asarray(rpy.r('vup'))
+    print(np.vstack([vlo, vup]).T)
     FS = forward_step(x, y, covariance=sigma**2 * np.identity(y.shape[0]))
     steps = []
     for i in range(x.shape[1]):
@@ -151,6 +158,10 @@ def test_forward_step_all():
     y = y.reshape(-1)
     y -= y.mean()
     x -= x.mean(0)[None,:]
+
+    vlo = np.asarray(rpy.r('vlo'))
+    vup = np.asarray(rpy.r('vup'))
+    print(np.vstack([vlo, vup]).T)
     FS = forward_step(x, y, covariance=sigma**2 * np.identity(y.shape[0]))
     steps = []
     for i in range(5):

From 30592b36edd95d6c4c9993d2ac3df01cf9210f6a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 4 Aug 2017 10:03:30 -0700
Subject: [PATCH 035/617] making sure to use n in construction of Y

---
 selection/algorithms/tests/test_forward_step.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/algorithms/tests/test_forward_step.py b/selection/algorithms/tests/test_forward_step.py
index 2fae25f14..3de0eb3c0 100644
--- a/selection/algorithms/tests/test_forward_step.py
+++ b/selection/algorithms/tests/test_forward_step.py
@@ -8,11 +8,11 @@
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_FS(k=10, ndraw=5000, burnin=5000):
 
-    n, p = 100, 50
+    n, p = 100, 200
     X = np.random.standard_normal((n,p)) + 0.4 * np.random.standard_normal(n)[:,None]
     X /= (X.std(0)[None,:] * np.sqrt(n))
     
-    Y = np.random.standard_normal(100) * 0.5
+    Y = np.random.standard_normal(n) * 0.5
     
     FS = forward_step(X, Y, covariance=0.5**2 * np.identity(n))
 

From 56c776ad0d0e93003bb2b655c0dc05fb1a873626 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 4 Aug 2017 10:03:55 -0700
Subject: [PATCH 036/617] adding _design

---
 selection/tests/instance.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/selection/tests/instance.py b/selection/tests/instance.py
index eb291763d..44ac3bf14 100644
--- a/selection/tests/instance.py
+++ b/selection/tests/instance.py
@@ -15,8 +15,6 @@ def AR1(rho, p):
 
         sigmaX, cholX = AR1(rho=rho, p=p)
         X = np.random.standard_normal((n, p)).dot(cholX.T)
-        # X = np.random.multivariate_normal(mean=np.zeros(p), cov = sigmaX, size = (n,))
-        # print(X.shape)
     return X
 
 def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,

From 63885aacfa5e8b413113718b8926ad7babfe29df Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 4 Aug 2017 10:30:08 -0700
Subject: [PATCH 037/617] BF: variable name

---
 selection/algorithms/forward_step.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/algorithms/forward_step.py b/selection/algorithms/forward_step.py
index 40b5028b8..1207c8667 100644
--- a/selection/algorithms/forward_step.py
+++ b/selection/algorithms/forward_step.py
@@ -244,7 +244,7 @@ def step(self,
 
         if use_identity:
             maxZ_con = stack_con(maxZ_con, identity_con)
-            con.covariance = self.covariance
+            maxZ_con.covariance = self.covariance
 
         if len(self.variables) > 0 or (self.fixed_regressors != []):
             XA = self.subset_X[:, self.variables]

From d1686b061ed433b29efa73fad0e61a3ca44cdeae Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 5 Aug 2017 09:00:56 -0700
Subject: [PATCH 038/617] moving mcmc method

---
 selection/algorithms/forward_step.py | 127 +++++++++++++--------------
 1 file changed, 62 insertions(+), 65 deletions(-)

diff --git a/selection/algorithms/forward_step.py b/selection/algorithms/forward_step.py
index 1207c8667..aec7278cf 100644
--- a/selection/algorithms/forward_step.py
+++ b/selection/algorithms/forward_step.py
@@ -295,7 +295,6 @@ def step(self,
         self.maxZ_offset.append([realized_Z_adjustment + fit_adjustment,   # (Z_max + V) * S_L
                                  realized_Z_adjustment - fit_adjustment])  # (Z_max - V) * S_L
 
-
         # update our list of variables and signs
 
         self.inactive[winning_var] = False # inactive is now losing_vars
@@ -322,7 +321,8 @@ def constraints(self, step=np.inf, identify_last_variable=True):
                           covariance=self.covariance)
         return con
 
-    def _maxZ_test(self, ndraw, burnin,
+    def _maxZ_test(self, ndraw, 
+                   burnin,
                    sigma_known=True,
                    accept_reject_params=(100, 15, 2000)
                    ):
@@ -362,69 +362,6 @@ def maxT(Z, L=self.adjusted_X[:,self.inactive], S=self.scale[self.inactive]):
                                       )
         return pval
 
-
-
-    def mcmc_test(self, step, variable=None,
-                  nstep=100,
-                  ndraw=20,
-                  method='parallel', 
-                  burnin=1000,):
-
-        if method not in ['parallel', 'serial']:
-            raise ValueError("method must be in ['parallel', 'serial']")
-
-        X, Y = self.subset_X, self.subset_Y
-
-        variables = self.variables[:step]
-
-        if variable is None:
-            variable = variables[-1]
-
-        if variable not in variables:
-            raise ValueError('variable not included at given step')
-
-        A = np.vstack(self.identity_constraints[:step])
-        con = constraints(A, 
-                          np.zeros(A.shape[0]), 
-                          covariance=self.covariance)
-
-        XA = X[:,variables]
-        con_final = con.conditional(XA.T, XA.T.dot(Y))
-        
-        if burnin > 0:
-            chain_final = gaussian_hit_and_run(con_final, Y, nstep=burnin)
-            chain_final.step()
-            new_Y = chain_final.state
-        else:
-            new_Y = Y
-
-        keep = np.ones(XA.shape[1], np.bool)
-        keep[list(variables).index(variable)] = 0
-        nuisance_variables = [v for i, v in enumerate(variables) if keep[i]]
-
-        if nuisance_variables:
-            XA_0 = X[:,nuisance_variables]
-            beta_dir = np.linalg.solve(XA_0.T.dot(XA_0), XA_0.T.dot(X[:,variable]))
-            adjusted_direction = X[:,variable] - XA_0.dot(beta_dir)
-            con_test = con.conditional(XA_0.T, XA_0.T.dot(Y))
-        else:
-            con_test = con
-            adjusted_direction = X[:,variable]
-
-        chain_test = gaussian_hit_and_run(con_test, new_Y, nstep=nstep)
-        test_stat = lambda y: -np.fabs(adjusted_direction.dot(y))
-
-        if method == 'parallel':
-            rank = parallel_test(chain_test,
-                                 new_Y,
-                                 test_stat)
-        else:
-            rank = serial_test(chain_test,
-                               new_Y,
-                               test_stat)
-            
-        return rank
-
     def model_pivots(self, which_step, alternative='onesided',
                      saturated=True,
                      ndraw=5000,
@@ -788,3 +725,63 @@ def data_carving_IC(y, X, sigma,
                    splitting_pvalues,
                    splitting_intervals), FS
 
+def mcmc_test(fs_obj, step, variable=None,
+              nstep=100,
+              ndraw=20,
+              method='parallel', 
+              burnin=1000,):
+
+    if method not in ['parallel', 'serial']:
+        raise ValueError("method must be in ['parallel', 'serial']")
+
+    X, Y = fs_obj.subset_X, fs_obj.subset_Y
+
+    variables = fs_obj.variables[:step]
+
+    if variable is None:
+        variable = variables[-1]
+
+    if variable not in variables:
+        raise ValueError('variable not included at given step')
+
+    A = np.vstack(fs_obj.identity_constraints[:step])
+    con = constraints(A, 
+                      np.zeros(A.shape[0]), 
+                      covariance=fs_obj.covariance)
+
+    XA = X[:,variables]
+    con_final = con.conditional(XA.T, XA.T.dot(Y))
+
+    if burnin > 0:
+        chain_final = gaussian_hit_and_run(con_final, Y, nstep=burnin)
+        chain_final.step()
+        new_Y = chain_final.state
+    else:
+        new_Y = Y
+
+    keep = np.ones(XA.shape[1], np.bool)
+    keep[list(variables).index(variable)] = 0
+    nuisance_variables = [v for i, v in enumerate(variables) if keep[i]]
+
+    if nuisance_variables:
+        XA_0 = X[:,nuisance_variables]
+        beta_dir = np.linalg.solve(XA_0.T.dot(XA_0), XA_0.T.dot(X[:,variable]))
+        adjusted_direction = X[:,variable] - XA_0.dot(beta_dir)
+        con_test = con.conditional(XA_0.T, XA_0.T.dot(Y))
+    else:
+        con_test = con
+        adjusted_direction = X[:,variable]
+
+    chain_test = gaussian_hit_and_run(con_test, new_Y, nstep=nstep)
+    test_stat = lambda y: -np.fabs(adjusted_direction.dot(y))
+
+    if method == 'parallel':
+        rank = parallel_test(chain_test,
+                             new_Y,
+                             test_stat)
+    else:
+        rank = serial_test(chain_test,
+                           new_Y,
+                           test_stat)
+
+    return rank

From 5bee448c426df9aa3229208a0b9917359852d79e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 7 Aug 2017 11:54:15 -0700
Subject: [PATCH 039/617] added debiased lasso, also a null_value for pivot

---
 selection/algorithms/debiased_lasso.py        | 144 ++++++++++++++++++
 .../algorithms/tests/test_debiased_lasso.py   |  26 ++++
 selection/constraints/affine.py               |  11 +-
 3 files changed, 178 insertions(+), 3 deletions(-)
 create mode 100644 selection/algorithms/debiased_lasso.py
 create mode 100644 selection/algorithms/tests/test_debiased_lasso.py

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
new file mode 100644
index 000000000..11ae2db6d
--- /dev/null
+++ b/selection/algorithms/debiased_lasso.py
@@ -0,0 +1,144 @@
+import numpy as np
+from regreg.api import (quadratic_loss,
+                        identity_quadratic,
+                        l1norm,
+                        simple_problem)
+
+from ..constraints.affine import constraints
+
+def _find_row_approx_inverse(Sigma, j, delta):
+    """
+
+    Find an approximation of j-th row of inverse of Sigma.
+
+    """
+    p = Sigma.shape[0]
+    elem_basis = np.zeros(p, np.float)
+    elem_basis[j] = 1.
+    loss = quadratic_loss(p, Q=Sigma)
+    penalty = l1norm(p, lagrange=delta)
+    iq = identity_quadratic(0, 0, elem_basis, 0)
+    problem = simple_problem(loss, penalty)
+    linfunc = problem.solve(iq, min_its=100)
+    return -linfunc
+
+def debiased_lasso_inference(lasso_obj, variables, delta):
+
+    """
+
+    Debiased estimate is 
+
+    .. math::
+
+        \hat{\beta}^d = \hat{\beta} - \hat{\theta} \nabla \ell(\hat{\beta})
+
+    where $\ell$ is the Gaussian loss and $\hat{\theta}$ is an approximation of the 
+    inverse Hessian at $\hat{\beta}$.
+
+    The term on the right is expressible in terms of the inactive gradient
+    as well as the fixed active subgradient. The left hand term is expressible in
+    terms of $\bar{\beta}$ the "relaxed" solution and the fixed active subgradient.
+
+    We need a covariance for $(\bar{\beta}_M, G_{-M})$.
+
+    Parameters
+    ----------
+
+    lasso_obj : `selection.algorithms.lasso.lasso`
+        A lasso object after calling fit() method.
+
+    variables : seq
+        Which variables should we produce p-values / intervals for?
+
+    delta : float
+        Feasibility parameter for estimating row of inverse of Sigma. 
+
+    """
+
+    if not lasso_obj.ignore_inactive_constraints:
+        raise ValueError('debiased lasso should be fit ignoring active constraints as implied covariance between active and inactive score is 0')
+
+    # should we check that loglike is gaussian
+
+    lasso_soln = lasso_obj.lasso_solution
+    lasso_active = lasso_soln[lasso_obj.active]
+    active_list = list(lasso_obj.active)
+
+    G = lasso_obj.loglike.smooth_objective(lasso_soln, 'grad')
+    G_I = G[lasso_obj.inactive]
+
+    # this is the fixed part of subgradient
+    subgrad_term = -G[lasso_obj.active]
+
+    # we make new constraints for the Gaussian vector \hat{\beta}_M --
+    # same covariance as those for \bar{\beta}_M, but the constraints are just on signs,
+    # not signs after translation
+
+    if lasso_obj.active_penalized.sum():
+        _constraints = constraints(-np.diag(lasso_obj.active_signs)[lasso_obj.active_penalized],
+                                    np.zeros(lasso_obj.active_penalized.sum()),
+                                    covariance=lasso_obj._constraints.covariance)
+    
+    _inactive_constraints = lasso_obj._inactive_constraints
+
+    # now make a product of the two constraints
+    # assuming independence -- which is true under
+    # selected model
+
+    _full_linear_part = np.zeros(((_constraints.linear_part.shape[0] + 
+                                  _inactive_constraints.linear_part.shape[0]),
+                                  (_constraints.linear_part.shape[1] + 
+                                  _inactive_constraints.linear_part.shape[1])))
+
+    _full_linear_part[:_constraints.linear_part.shape[0]][:,:_constraints.linear_part.shape[1]] = _constraints.linear_part
+    _full_linear_part[_constraints.linear_part.shape[0]:][:,_constraints.linear_part.shape[1]:] = _inactive_constraints.linear_part
+
+    _full_offset = np.zeros(_full_linear_part.shape[0])
+    _full_offset[:_constraints.linear_part.shape[0]] = _constraints.offset
+    _full_offset[_constraints.linear_part.shape[0]:] = _inactive_constraints.offset
+
+    _full_cov = np.zeros((_full_linear_part.shape[1],
+                          _full_linear_part.shape[1]))
+    _full_cov[:_constraints.linear_part.shape[1]][:,:_constraints.linear_part.shape[1]] = _constraints.covariance
+    _full_cov[_constraints.linear_part.shape[1]:][:,_constraints.linear_part.shape[1]:] = _inactive_constraints.covariance
+    _full_constraints = constraints(_full_linear_part,
+                                    _full_offset,
+                                    covariance=_full_cov)
+                                    
+    _full_data = np.hstack([lasso_active, G_I])
+    if not _full_constraints(_full_data):
+        raise ValueError('constraints not satisfied')
+
+    H = lasso_obj.loglike.hessian(lasso_obj.lasso_solution)
+    H_AA = H[lasso_obj.active][:,lasso_obj.active]
+    bias_AA = np.linalg.inv(H_AA).dot(subgrad_term)
+
+    intervals = []
+    pvalues = []
+    for var in variables:
+        theta_var = _find_row_approx_inverse(H, var, delta)
+
+        # express target in pair (\hat{\beta}_A, G_I)
+        eta = np.zeros_like(theta_var)
+
+        # XXX should be better way to do this
+        if var in active_list:
+            idx = active_list.index(var)
+            eta[idx] = 1.
+
+        # inactive coordinates
+        eta[lasso_active.shape[0]:] = theta_var[lasso_obj.inactive]
+        theta_active = theta_var[active_list]
+
+        # offset term 
+
+        offset = -bias_AA[idx] + theta_active.dot(subgrad_term)
+
+        intervals.append(_full_constraints.interval(eta, 
+                                                    _full_data) + offset)
+        pvalues.append(_full_constraints.pivot(eta, 
+                                               _full_data, 
+                                               null_value=-offset,
+                                               alternative='twosided'))
+
+    return [(j, p) + tuple(i) for j, p, i in zip(active_list, pvalues, intervals)]
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
new file mode 100644
index 000000000..c540dd530
--- /dev/null
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -0,0 +1,26 @@
+import numpy as np
+import nose.tools as nt
+import numpy.testing.decorators as dec
+
+from selection.tests.instance import gaussian_instance as instance
+import selection.tests.reports as reports
+
+from selection.algorithms.lasso import lasso 
+from selection.algorithms.debiased_lasso import debiased_lasso_inference
+import regreg.api as rr
+
+def test_gaussian(n=100, p=20):
+
+    X, y, beta = instance(n=n, p=p, sigma=1.)[:3]
+
+    lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0))
+
+    weights = 1.1 * lam_theor * np.ones(p)
+    weights[:3] = 0.
+
+    L = lasso.gaussian(X, y, weights, sigma=1.)
+    L.ignore_inactive_constraints = True
+    L.fit()
+
+    print(debiased_lasso_inference(L, L.active, np.sqrt(2 * np.log(p) / n)))
+    print(beta)
diff --git a/selection/constraints/affine.py b/selection/constraints/affine.py
index 72bfcaeb1..549b0a645 100644
--- a/selection/constraints/affine.py
+++ b/selection/constraints/affine.py
@@ -278,7 +278,10 @@ def bounds(self, direction_of_interest, Y):
                                     Y,
                                     direction_of_interest)
 
-    def pivot(self, direction_of_interest, Y,
+    def pivot(self, 
+              direction_of_interest, 
+              Y,
+              null_value=None,
               alternative='greater'):
         r"""
         For a realization $Y$ of the random variable $N(\mu,\Sigma)$
@@ -316,12 +319,14 @@ def pivot(self, direction_of_interest, Y,
         then we return $1-F$; if it is 'less' we return $F$
         and if it is 'twosided' we return $2 \min(F,1-F)$.
 
-        
         """
         if alternative not in ['greater', 'less', 'twosided']:
             raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
         L, Z, U, S = self.bounds(direction_of_interest, Y)
-        meanZ = (direction_of_interest * self.mean).sum()
+        if null_value is None:
+            meanZ = (direction_of_interest * self.mean).sum()
+        else:
+            meanZ = null_value
         P = truncnorm_cdf((Z-meanZ)/S, (L-meanZ)/S, (U-meanZ)/S)
         if alternative == 'greater':
             return 1 - P

From ce5a54b8fcfeaacb9696ab49b2d181874169932b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 7 Aug 2017 12:55:13 -0700
Subject: [PATCH 040/617] removing old AR instance code

---
 selection/randomized/tests/test_power.py |   8 +-
 selection/tests/instance.py              | 104 +++++++----------------
 2 files changed, 37 insertions(+), 75 deletions(-)

diff --git a/selection/randomized/tests/test_power.py b/selection/randomized/tests/test_power.py
index 5b26a9d1d..fe1b8a6a3 100644
--- a/selection/randomized/tests/test_power.py
+++ b/selection/randomized/tests/test_power.py
@@ -30,7 +30,7 @@ def test_power(s=30,
                n=2000,
                p=1000,
                rho=0.6,
-               equi_correlated=False,
+               equicorrelated=False,
                signal=3.5,
                lam_frac = 1.,
                cross_validation = True,
@@ -47,11 +47,11 @@ def test_power(s=30,
     print(n,p,s)
     if loss=="gaussian":
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, signal=signal, sigma=1.,
-                                                       equi_correlated=equi_correlated)
+                                                       equicorrelated=equicorrelated)
         lam = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
         glm_loss = rr.glm.gaussian(X, y)
     elif loss=="logistic":
-        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal, equi_correlated=equi_correlated)
+        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal, equicorrelated=equicorrelated)
         glm_loss = rr.glm.logistic(X, y)
         lam = np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
 
@@ -199,7 +199,7 @@ def compute_power(**kwargs):
 if __name__ == '__main__':
     np.random.seed(500)
     kwargs = {'s':30, 'n':2000, 'p':1000, 'rho':0.6,
-              'equi_correlated':False,
+              'equicorrelated':False,
               'signal':3.5,
               'lam_frac':1.,
               'cross_validation':True,
diff --git a/selection/tests/instance.py b/selection/tests/instance.py
index 44ac3bf14..97a25edbe 100644
--- a/selection/tests/instance.py
+++ b/selection/tests/instance.py
@@ -3,8 +3,11 @@
 
 from scipy.stats import t as tdist
 
-def _equicor_design(n, p, rho, equi_correlated):
-    if equi_correlated:
+def _design(n, p, rho, equicorrelated):
+    """
+    Create an equicorrelated or AR(1) design.
+    """
+    if equicorrelated:
         X = (np.sqrt(1 - rho) * np.random.standard_normal((n, p)) +
              np.sqrt(rho) * np.random.standard_normal(n)[:, None])
     else:
@@ -20,14 +23,14 @@ def AR1(rho, p):
 def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
                       random_signs=False, df=np.inf,
                       scale=True, center=True,
-                      equi_correlated=True):
+                      equicorrelated=True):
 
 
     """
     A testing instance for the LASSO.
-    If equi_correlated is True design is equi-correlated in the population,
+    If equicorrelated is True design is equi-correlated in the population,
     normalized to have columns of norm 1.
-    If equi_correlated is False design is auto-regressive.
+    If equicorrelated is False design is auto-regressive.
     For the default settings, a $\lambda$ of around 13.5
     corresponds to the theoretical $E(\|X^T\epsilon\|_{\infty})$
     with $\epsilon \sim N(0, \sigma^2 I)$.
@@ -44,8 +47,9 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
     rho : float
         Equicorrelation value (must be in interval [0,1])
 
-    signal : float
-        Size of each coefficient
+    signal : float or (float, float)
+        Sizes for the coefficients. If a tuple -- then coefficients
+        are equally spaced between these values using np.linspace.
 
     random_signs : bool
         If true, assign random signs to coefficients.
@@ -54,7 +58,7 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
     df : int
         Degrees of freedom for noise (from T distribution).
 
-    equi_correlated: bool
+    equicorrelated: bool
         If true, design in equi-correlated,
         Else design is AR.
 
@@ -77,19 +81,23 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
         Noise level.
     """
 
-    X = _equicor_design(n,p, rho, equi_correlated)
+    X = _design(n,p, rho, equicorrelated)
 
     if center:
         X -= X.mean(0)[None, :]
     if scale:
         X /= (X.std(0)[None,:] * np.sqrt(n))
     beta = np.zeros(p) 
-    beta[:s] = signal 
-
+    if type(signal) != type((3,4)):
+        beta[:s] = signal 
+    else:
+        beta[:s] = np.linspace(signal[0], signal[1], s)
     if random_signs:
         beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
+    np.random.shuffle(beta)
+
     active = np.zeros(p, np.bool)
-    active[:s] = True
+    active[beta != 0] = True
 
     # noise model
     def _noise(n, df=np.inf):
@@ -102,63 +110,12 @@ def _noise(n, df=np.inf):
     Y = (X.dot(beta) + _noise(n, df)) * sigma
     return X, Y, beta * sigma, np.nonzero(active)[0], sigma
 
-_cholesky_factors = {} # should we store them?
-
-def _AR_cov(p, rho=0.25):
-    idx = np.arange(p)
-    return rho**np.fabs(np.subtract.outer(idx, idx))
-
-def _AR_sqrt_cov(p, rho=0.25):
-    idx = np.arange(p)
-    C = rho**np.fabs(np.subtract.outer(idx, idx))
-    return np.linalg.cholesky(C)
-
-
-def AR_instance(n=2000, p=2500, s=30, sigma=2, rho=0.25, signal=4.5):
-    """
-    Used to compare to Barber and Candes high-dim knockoff.
-
-    Parameters
-    ----------
-
-    n : int
-        Sample size
-
-    p : int
-        Number of features
-
-    s : int
-        True sparsity
-
-    sigma : float
-        Noise level
-
-    rho : float
-        AR(1) parameter.
-
-    signal : float
-        Size of each coefficient
-
-    """
-
-    if (rho, p) not in _cholesky_factors.keys():
-        _cholesky_factors[(rho, p)] = _AR_sqrt_cov(p, rho)
-    _sqrt_cov = _cholesky_factors[(rho, p)]
-
-    X = np.random.standard_normal((n, p)).dot(_sqrt_cov.T)
-
-    X /= (np.sqrt((X**2).sum(0))) # like normc
-    beta = np.zeros(p)
-    beta[:s] = signal * (2 * np.random.binomial(1, 0.5, size=(s,)) - 1) 
-    np.random.shuffle(beta)
-
-    Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma
-    true_active = np.nonzero(beta != 0)[0]
-    return X, Y, beta * sigma, true_active, sigma
 
 def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
                       random_signs=False, 
-                      scale=True, center=True, equi_correlated=True):
+                      scale=True, 
+                      center=True, 
+                      equicorrelated=True):
     """
     A testing instance for the LASSO.
     Design is equi-correlated in the population,
@@ -179,8 +136,9 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
     rho : float
         Equicorrelation value (must be in interval [0,1])
 
-    signal : float
-        Size of each coefficient
+    signal : float or (float, float)
+        Sizes for the coefficients. If a tuple -- then coefficients
+        are equally spaced between these values using np.linspace.
 
     random_signs : bool
         If true, assign random signs to coefficients.
@@ -203,7 +161,7 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
 
     """
 
-    X = _equicor_design(n,p, rho, equi_correlated)
+    X = _design(n,p, rho, equicorrelated)
 
     if center:
         X -= X.mean(0)[None,:]
@@ -211,12 +169,16 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
         X /= X.std(0)[None,:]
     X /= np.sqrt(n)
     beta = np.zeros(p) 
-    beta[:s] = signal 
+    if type(signal) != type((3,4)):
+        beta[:s] = signal 
+    else:
+        beta[:s] = np.linspace(signal[0], signal[1], s)
     if random_signs:
         beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
+    np.random.shuffle(beta)
 
     active = np.zeros(p, np.bool)
-    active[:s] = True
+    active[beta != 0] = True
 
     eta = linpred = np.dot(X, beta) 
     pi = np.exp(eta) / (1 + np.exp(eta))

From 1565098e037fbce2ec3c1fafb59c81ef6d80d881 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 7 Aug 2017 12:56:47 -0700
Subject: [PATCH 041/617] BF: ref to AR_instance removed

---
 selection/randomized/tests/test_randomized_lasso.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/tests/test_randomized_lasso.py b/selection/randomized/tests/test_randomized_lasso.py
index 973d34cf4..a7a25fc3a 100644
--- a/selection/randomized/tests/test_randomized_lasso.py
+++ b/selection/randomized/tests/test_randomized_lasso.py
@@ -2,11 +2,11 @@
 import numpy as np
 
 from selection.randomized.api import lasso as randomized_lasso
-from selection.tests.instance import gaussian_instance, AR_instance
+from selection.tests.instance import gaussian_instance
 
 def test_randomized_lasso(n=300, p=500, s=5, signal=7.5, rho=0.2):
 
-    X, Y, beta, active, sigma = AR_instance(n=n, p=p, s=s, rho=rho, signal=signal)
+    X, Y, beta, active, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, signal=signal, equicorrelated=False)
 
     L = randomized_lasso.gaussian(X, Y, 3.5 * sigma * np.ones(p))
     signs = L.fit()

From 99a089131f429aa0e1fca8baa0dc1c3e091ba516 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 7 Aug 2017 13:59:28 -0700
Subject: [PATCH 042/617] unused offset shift in forward stepwise, allowing
 signals to spread over a range in instance

---
 selection/algorithms/forward_step.py | 11 ++---------
 selection/tests/instance.py          | 10 ++++++----
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/selection/algorithms/forward_step.py b/selection/algorithms/forward_step.py
index aec7278cf..efad3a225 100644
--- a/selection/algorithms/forward_step.py
+++ b/selection/algorithms/forward_step.py
@@ -321,7 +321,8 @@ def constraints(self, step=np.inf, identify_last_variable=True):
                           covariance=self.covariance)
         return con
 
-    def _maxZ_test(self, ndraw, 
+    def _maxZ_test(self, 
+                   ndraw, 
                    burnin,
                    sigma_known=True,
                    accept_reject_params=(100, 15, 2000)
@@ -337,14 +338,6 @@ def maxT(Z, L=self.adjusted_X[:,self.inactive], S=self.scale[self.inactive]):
             Tstat = np.fabs(np.dot(Z, L) / S[None,:]).max(1)
             return Tstat
 
-        #B = sequential_con.offset
-        #d = offset_pos.shape[0]
-        #sequential_con.offset[:d] += XI.T.dot(sequential_con.mean)
-        #sequential_con.offset[d:(2*d)] -= XI.T.dot(sequential_con.mean)
-
-        #if not sequential_con(Y):
-        #    raise ValueError('doh!')
-
         pval, _, _, dfam = gibbs_test(sequential_con,
                                       Y,
                                       self.Zfunc[-1],
diff --git a/selection/tests/instance.py b/selection/tests/instance.py
index 97a25edbe..6dd7cf515 100644
--- a/selection/tests/instance.py
+++ b/selection/tests/instance.py
@@ -88,8 +88,9 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
     if scale:
         X /= (X.std(0)[None,:] * np.sqrt(n))
     beta = np.zeros(p) 
-    if type(signal) != type((3,4)):
-        beta[:s] = signal 
+    signal = np.atleast_1d(signal)
+    if signal.shape == (1,):
+        beta[:s] = signal[0] 
     else:
         beta[:s] = np.linspace(signal[0], signal[1], s)
     if random_signs:
@@ -169,8 +170,9 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
         X /= X.std(0)[None,:]
     X /= np.sqrt(n)
     beta = np.zeros(p) 
-    if type(signal) != type((3,4)):
-        beta[:s] = signal 
+    signal = np.atleast_1d(signal)
+    if signal.shape == (1,):
+        beta[:s] = signal[0] 
     else:
         beta[:s] = np.linspace(signal[0], signal[1], s)
     if random_signs:

From 5605ec78b8a103f1ca938a47f438d629857c64f1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 7 Aug 2017 14:20:42 -0700
Subject: [PATCH 043/617] better error message, implementing maxZ offset more
 efficiently

---
 selection/algorithms/forward_step.py | 44 ++++++++++++----------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/selection/algorithms/forward_step.py b/selection/algorithms/forward_step.py
index efad3a225..b036a5f9a 100644
--- a/selection/algorithms/forward_step.py
+++ b/selection/algorithms/forward_step.py
@@ -103,19 +103,19 @@ def __init__(self, X, Y,
 
         if self.subset is not None:
 
-            self.adjusted_X = self.X.copy()[subset]
+            self.working_X = self.X.copy()[subset]
             self.subset_X = self.X.copy()[subset]
             self.subset_Y = self.Y.copy()[subset]
             self.subset_selector = np.identity(self.X.shape[0])[subset]
             self.subset_fixed = self.fixed_regressors[subset]
         else:
-            self.adjusted_X = self.X.copy()
+            self.working_X = self.X.copy()
             self.subset_Y = self.Y.copy()
             self.subset_X = self.X.copy()
             self.subset_fixed = self.fixed_regressors
 
         # scale columns of X to have length 1
-        self.adjusted_X /= np.sqrt((self.adjusted_X**2).sum(0))[None, :]
+        self.working_X /= np.sqrt((self.working_X**2).sum(0))[None, :]
 
         self.variables = [] # the sequence of selected variables
         self.Z = []         # the achieved Z scores
@@ -129,8 +129,8 @@ def __init__(self, X, Y,
 
         self.identity_constraints = []    # this will store linear functionals that identify the variables
         self.inactive = np.ones(p, np.bool)   # current inactive set
-        self.maxZ_offset = [[np.ones(p) * np.inf, np.ones(p) * np.inf]] # stored for computing
-                                                                   # the limits of maxZ selected test
+        self.maxZ_offset = np.array([np.ones(p) * np.inf, np.ones(p) * np.inf]) # stored for computing
+                                                                                # the limits of maxZ selected test
         self.maxZ_constraints = []
 
     def step(self, 
@@ -167,18 +167,18 @@ def step(self,
 
         """
         
-        adjusted_X, Y = self.adjusted_X, self.subset_Y
+        working_X, Y = self.working_X, self.subset_Y
         resid_vector = self._resid_vector
-        n, p = adjusted_X.shape
+        n, p = working_X.shape
 
         # up to now inactive
         inactive = self.inactive
 
         # compute Z scores
 
-        scale = self.scale = np.sqrt(np.sum(adjusted_X**2, 0))
+        scale = self.scale = np.sqrt(np.sum(working_X**2, 0))
         scale[~inactive] = np.inf # should never be used in any case
-        Zfunc = adjusted_X.T # [inactive] 
+        Zfunc = working_X.T # [inactive] 
         Zstat = np.dot(Zfunc, Y) / scale # [inactive]
 
         winning_var = np.argmax(np.fabs(Zstat))
@@ -205,9 +205,9 @@ def step(self,
         losing_vars[winning_var] = False
 
         identity_linpart = np.vstack([ 
-                adjusted_X[:,losing_vars].T / scale[losing_vars,None] -
+                working_X[:,losing_vars].T / scale[losing_vars,None] -
                 winning_func,
-                -adjusted_X[:,losing_vars].T / scale[losing_vars,None] -
+                -working_X[:,losing_vars].T / scale[losing_vars,None] -
                 winning_func,
                 - winning_func.reshape((1,-1))])
 
@@ -231,15 +231,9 @@ def step(self,
             linear_part = np.dot(linear_part, 
                                  self.subset_selector)
 
-        _offset = np.array(self.maxZ_offset)
-        _offset = _offset[:,:,self.inactive]
-        offset_pos = np.min(_offset[:,0], 0) # this corresponds to X_L^TY \leq (Z_max + V) * S_L 
-        offset_neg = np.min(_offset[:,1], 0) # this corresponds to -X_L^TY \leq (Z_max - V) * S_L
-                                             # both minimized over all previous steps
+        inactive_offset = self.maxZ_offset[:, self.inactive]
 
-        offset = np.hstack([offset_pos, offset_neg])
-
-        maxZ_con = constraints(linear_part, offset,
+        maxZ_con = constraints(linear_part, np.hstack(inactive_offset),
                                covariance=self.covariance)
 
         if use_identity:
@@ -273,7 +267,7 @@ def step(self,
         # and including winning_var, the Z_scores are fixed
         
         # then, the losing variables at this stage can be expressed as
-        # abs(adjusted_X.T.dot(Y)[:,inactive] / scale[inactive]) < realized_maxZ
+        # abs(working_X.T.dot(Y)[:,inactive] / scale[inactive]) < realized_maxZ
         # where inactive is the updated inactive 
 
         # the event we have witnessed this step is 
@@ -292,8 +286,8 @@ def step(self,
 
         realized_Z_adjustment = realized_maxZ * scale                      # Z_max * S_L
         fit_adjustment = np.dot(self.subset_X.T, Y - resid_vector)         # V * S_L
-        self.maxZ_offset.append([realized_Z_adjustment + fit_adjustment,   # (Z_max + V) * S_L
-                                 realized_Z_adjustment - fit_adjustment])  # (Z_max - V) * S_L
+        self.maxZ_offset[0] = np.minimum(self.maxZ_offset[0], realized_Z_adjustment + fit_adjustment)   # (Z_max + V) * S_L
+        self.maxZ_offset[1] = np.minimum(self.maxZ_offset[1], realized_Z_adjustment - fit_adjustment)  # (Z_max - V) * S_L
 
         # update our list of variables and signs
 
@@ -303,7 +297,7 @@ def step(self,
         # update residual, and adjust X
 
         resid_vector -= realized_maxZ * winning_func
-        adjusted_X -= (np.multiply.outer(winning_func, winning_func.dot(adjusted_X)) /
+        working_X -= (np.multiply.outer(winning_func, winning_func.dot(working_X)) /
                        (winning_func**2).sum())
 
         if compute_maxZ_pval:
@@ -331,10 +325,10 @@ def _maxZ_test(self,
         XI, Y = self.subset_X[:, self.inactive], self.subset_Y
         sequential_con = self.maxZ_constraints[-1]
         if not sequential_con(Y):
-            raise ValueError('doh!')
+            raise ValueError('Constraints on Y not satisfied')
 
         # use partial
-        def maxT(Z, L=self.adjusted_X[:,self.inactive], S=self.scale[self.inactive]):
+        def maxT(Z, L=self.working_X[:,self.inactive], S=self.scale[self.inactive]):
             Tstat = np.fabs(np.dot(Z, L) / S[None,:]).max(1)
             return Tstat
 

From 6ecbaaf1847835613ee59dfa905b40d494a63b67 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 11:00:01 -0700
Subject: [PATCH 044/617] DOC: fixing indents and signature

---
 selection/algorithms/forward_step.py            |  3 ++-
 selection/algorithms/tests/test_forward_step.py | 14 +++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/selection/algorithms/forward_step.py b/selection/algorithms/forward_step.py
index b036a5f9a..b13bb40fe 100644
--- a/selection/algorithms/forward_step.py
+++ b/selection/algorithms/forward_step.py
@@ -349,7 +349,8 @@ def maxT(Z, L=self.working_X[:,self.inactive], S=self.scale[self.inactive]):
                                       )
         return pval
 
-    def model_pivots(self, which_step, alternative='onesided',
+    def model_pivots(self, which_step, 
+                     alternative='onesided',
                      saturated=True,
                      ndraw=5000,
                      burnin=2000,
diff --git a/selection/algorithms/tests/test_forward_step.py b/selection/algorithms/tests/test_forward_step.py
index 3de0eb3c0..fdf5bb780 100644
--- a/selection/algorithms/tests/test_forward_step.py
+++ b/selection/algorithms/tests/test_forward_step.py
@@ -147,12 +147,12 @@ def test_data_carving_IC(nsim=500,
     while counter < nsim:
         counter += 1
         X, y, beta, active, sigma = gaussian_instance(n=n, 
-                                             p=p, 
-                                             s=s, 
-                                             sigma=sigma, 
-                                             rho=rho, 
-                                             signal=signal, 
-                                             df=df)
+                                                      p=p, 
+                                                      s=s, 
+                                                      sigma=sigma, 
+                                                      rho=rho, 
+                                                      signal=signal, 
+                                                      df=df)
         mu = np.dot(X, beta)
         splitn = int(n*split_frac)
         indices = np.arange(n)
@@ -161,7 +161,7 @@ def test_data_carving_IC(nsim=500,
 
         FS = info_crit_stop(y, X, sigma, cost=np.log(n), subset=stage_one)
 
-        if set(range(s)).issubset(FS.active):
+        if set(active).issubset(FS.active):
             results, FS = data_carving_IC(y, X, sigma,
                                           stage_one=stage_one,
                                           splitting=True, 

From 5dab51a70a6466cdc274f8e29e7a070d3c34d591 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 11:28:21 -0700
Subject: [PATCH 045/617] deprecated CV -- using randomized CV

---
 selection/algorithms/cross_valid.py   | 691 --------------------------
 selection/algorithms/tests/test_cv.py | 124 -----
 2 files changed, 815 deletions(-)
 delete mode 100755 selection/algorithms/cross_valid.py
 delete mode 100644 selection/algorithms/tests/test_cv.py

diff --git a/selection/algorithms/cross_valid.py b/selection/algorithms/cross_valid.py
deleted file mode 100755
index c0d64f7bb..000000000
--- a/selection/algorithms/cross_valid.py
+++ /dev/null
@@ -1,691 +0,0 @@
-"""
-Script to implement selective inference after cross-validation
-
-"""
-
-import numpy as np
-from scipy.stats import norm as ndist
-
-from regreg.api import identity_quadratic
-
-from .lasso import lasso
-from .sqrt_lasso import solve_sqrt_lasso, choose_lambda
-from ..constraints.affine import (constraints, 
-                                  sample_from_constraints)
-from ..distributions.discrete_family import discrete_family
-
-# These next few functions should be generalized to not
-# be just sqrt_lasso
-
-### begin -- generalize from sqrt_lasso to smooth losses with \ell_1 penalty
-
-def solve_grid(Y, 
-               X, 
-               L, 
-               mults, 
-               post_estimator=False,
-               solve_args={'min_its':10, 'max_its':20},
-               quadratic=None):
-    """
-    Solve the square-root LASSO over a grid of values.
-
-    .. math::
-
-        \text{minimize}_{\beta} \|y-X\beta\|_2 + m * L \|\beta\|_1
-
-    for $m$ in `mults`.
-
-    Parameters
-    ----------
-
-    Y : np.float(n)
-        Response vectors
-
-    X : np.float((n,p))
-        Design matrix.
-
-    L : float
-        Value of $\lambda$ in square-root LASSO optimization
-        problem.
-
-    mults: [float]
-        Sequence of floats over which to solve square-root LASSO.
-
-    post_estimator: bool
-        Should we return the square-root LASSO estimate or the
-        OLS of the selected model (the post square-root LASSO estimator).
-
-    solve_args : {}
-        Keyword arguments passed to `solve_sqrt_lasso`.
-
-    Returns
-    -------
-
-    results : [(m, beta_m)]
-        Coefficient estimates for each `m` in `mults`.
-
-    """
-    n, p = X.shape
-    results = []
-    for i, m in enumerate(mults):
-        if i == 0:
-            results.append(
-                (m, solve_sqrt_lasso(X, 
-                                     Y, 
-                                     m * L * np.ones(p), 
-                                     quadratic=quadratic,
-                                     solve_args=solve_args)[0]))
-        else:
-            results.append(
-                (m, solve_sqrt_lasso(X, 
-                                     Y, 
-                                     m * L * np.ones(p), 
-                                     quadratic=quadratic,
-                                     initial=results[-1][1],
-                                     solve_args=solve_args)[0]))
-
-        if post_estimator:
-            active = np.nonzero(results[-1][1])[0]
-            coef = np.zeros(p)
-            if active.shape[0] > 0:
-                X_E = X[:,active]
-                coef[active] = np.dot(np.linalg.pinv(X_E), Y)
-            results[-1] = (m, coef)
-
-    return results
-
-def split_and_validate(Y, 
-                       X, 
-                       L, 
-                       mults, 
-                       test_frac,
-                       shift_size=0,
-                       quadratic=None):
-    """
-    Choose which lambda minimizes prediction
-    over a random split.
-
-    Parameters
-    ----------
-
-    Y : np.float(n)
-        Response vectors
-
-    X : np.float((n,p))
-        Design matrix.
-
-    L : float
-        Value of $\lambda$ in square-root LASSO optimization
-        problem.
-
-    mults: [float]
-        Sequence of floats over which to solve square-root LASSO.
-
-    test_frac: float
-        What percentage should be used as test?
-
-    shift_size : int
-        Return minimizer plus a uniform 
-        positive or negative shift in the index 
-        of `mults` of a given size.
-        Affects the size of the window of 
-        minimizers to be accepted by later sampling scheme.
-
-    quadratic : `regreg.identity_quadratic`
-        A quadratic term added to objective function.
-
-    """
-    n, p = X.shape
-    training = np.zeros(n, np.bool)
-    training[np.random.choice(np.arange(n), size=int(test_frac*n), replace=False)] = 1
-    test = ~training
-
-    results = solve_grid(Y[training], X[training], L, mults=mults, quadratic=quadratic)
-    error = []
-    for m, coef in results:
-        error.append((np.linalg.norm(Y[test] - np.dot(X[test], coef))**2, m))
-    m_min = min(error)[1]
-    idx_min = list(mults).index(m_min)
-    
-    # this shift randomizes the returned value of \lambda
-    # have not really used it much.
-    
-    if shift_size > 0:
-        random_shift = np.random.random_integers(low=-shift_size,
-                                          high=shift_size)
-        idx_min += random_shift
-        idx_min = max(idx_min, 0)
-    return [mults[idx_min + j] for j in range(-shift_size, shift_size+1, 1)
-            if idx_min + j >= 0 and idx_min + j < len(mults)]
-
-def kfold_CV(Y, 
-             X, 
-             L, 
-             mults, 
-             K=10,
-             random_shift=0,
-             shuffle=True, random_state=False):
-    """
-    Choose which lambda minimizes prediction
-    using K-fold cross-validation.
-
-
-    Parameters
-    ----------
-
-    Y : np.float(n)
-        Response vectors
-
-    X : np.float((n,p))
-        Design matrix.
-
-    L : float
-        Value of $\lambda$ in square-root LASSO optimization
-        problem.
-
-    mults: [float]
-        Sequence of floats over which to solve square-root LASSO.
-
-    K : int
-        Number of folds (defaults to 10).
-
-    shift_size : int
-        Return minimizer plus a uniform 
-        positive or negative shift in the index 
-        of `mults` of a given size.
-        Affects the size of the window of 
-        minimizers to be accepted by later sampling scheme.
-
-    shuffle : bool
-        Argument to `sklearn.cross_validation.KFold`
-
-    random_state : None, int or RandomState
-        Argument to `sklearn.cross_validation.KFold`
-    
-    Returns
-    -------
-
-    window : [float]
-        Values of multiplier that will be accepted
-        in sampling routine.
-
-    """
-
-    n, p = X.shape
-
-    kfold = sklearn.cross_validation.KFold(n=n, 
-                                           n_folds=K, 
-                                           shuffle=shuffle,
-                                           random_state=random_state)
-    error = {}
-
-    for train_index, test_index in kfold:
-        results = solve_grid(Y[train_index], X[train_index], L, mults=mults)
-        for m, coef in results:
-            error.setdefault(m, []).append(
-                nplinalg.norm(Y[test_index] - np.dot(X[test_index], coef))**2)
-    
-    for m in mults:
-        error[m] = (np.mean(error[m]), np.std(error[m]))
-    m_min = min([(error[k], k) for k in error])[1]
-    idx_min = list(mults).index(m_min)
-    if shift_size > 0:
-        random_shift = np.random.random_integers(low=-shift_size,
-                                          high=shift_size)
-        idx_min += random_shift
-        idx_min = max(idx_min, 0)
-    return [mults[idx_min + j] for j in range(-shift_size, shift_size+1, 1)
-            if idx_min + j >= 0 and idx_min + j < len(mults)]
-
-def select_vars_signs(Y, 
-                      X, 
-                      L, 
-                      quadratic=None,
-                      solve_args={'min_its':150}):
-
-    """
-    Return active set and signs for solution
-    of square-root LASSO.
-
-    Parameters
-    ----------
-
-    Y : np.float(n)
-        Response vectors
-
-    X : np.float((n,p))
-        Design matrix.
-
-    L : float
-        Value of $\lambda$ in square-root LASSO optimization
-        problem.
-
-    solve_args : {}
-        Keyword arguments passed to `solve_sqrt_lasso`.
-
-    Returns
-    -------
-
-    active : [int] 
-        Active set.
-
-    signs : [-1,1]
-        Signs of variables in active set.
-
-    sqlasso : `selection.algorithms.sqrt_lasso.sqrt_lasso`
-        Instance whose signs and active sets we return.
-
-    """
-    n, p = X.shape
-    SL = lasso.sqrt_lasso(X, Y, L * np.ones(p), quadratic=quadratic)
-    SL.fit(solve_args=solve_args)
-    return SL.active, SL.active_signs, SL
-
-### end -- generalize from sqrt_lasso to smooth losses with \ell_1 penalty
-
-
-## this class should be closer to examples in `selection.sampling.randomized` so
-## we can reuse that code
-
-class lasso_tuned(object):
-
-    """
-    
-    Selective inference after choosing lambda
-    in sqrt LASSO.
-    
-    Uses selected model on randomized data
-    after having chosen \lambda.
-
-    When \sigma^2_E is unknown
-    we estimate \sigma^2_E.
-
-    """
-
-    CV_period = 50 # how often to try to update Y_CV
-
-    def __init__(self, 
-                 Y, 
-                 X,
-                 randomization=ndist,
-                 test_frac = 0.9,
-                 mults = np.linspace(1.5,0.5,11),
-                 sigma = None,
-                 scale_inter = np.sqrt(0.2),
-                 scale_select = np.sqrt(0.1),
-                 scale_valid = np.sqrt(0.1),
-                 shift_size=1):
-
-        """
-
-        Parameters
-        ----------
-
-        Y : np.float(n)
-            Response vectors
-
-        X : np.float((n,p))
-            Design matrix.
-
-        randomization : `scipy.stats.rv_continuous`
-            A random variable with `pdf` and `rvs` methods.
-
-        mults: [float]
-            Sequence of floats over which to solve square-root LASSO.
-
-        sigma : float
-            Noise variance, if known. 
-
-        scale_inter : float
-            Proportion of variance (using
-            `self.rough_sigma` as baseline) 
-            added in randomization
-            to Y_inter.
-
-        scale_select : float
-            Proportion of variance (using
-            `self.rough_sigma` as baseline) 
-            added in randomization
-            to Y_select.
-
-        scale_valid : float
-            Proportion of variance (using
-            `self.rough_sigma` as baseline) 
-            added in randomization
-            to Y_valid.
-
-        shift_size : int
-            Return minimizer plus a uniform 
-            positive or negative shift in the index 
-            of `mults` of a given size.
-            Affects the size of the window of 
-            minimizers to be accepted by later sampling scheme.
-
-        """
-        n, p = X.shape
-
-        (self.Y, 
-         self.X, 
-         self.test_frac, 
-         self.mults,
-         self.randomization) = (
-            Y, 
-            X, 
-            test_frac, 
-            mults,
-            randomization)
-
-        self.L = choose_lambda(X)
-
-        self.scale_inter = scale_inter
-        self.scale_select = scale_select
-        self.scale_valid = scale_valid
-
-        # randomize our response
-
-        self.randomize()
-
-        # now find which CV values to accept
-
-        self.accept_values = self.choose_lambda(self.Y, 
-                                                shift_size=shift_size)
-        self.selected_value = np.median(self.accept_values)
-        self.choose_variables()
-
-        self.null_sample = {}
-
-        # estimate sigma if needed
-
-        if sigma is not None:
-            self.sigma_resid = sigma
-        else:
-            resid_current = (Y - np.dot(self.X[:,self.active_set],
-                                        self.SQ.onestep_estimator))
-            n = Y.shape[0]
-            self.sigma_resid = np.linalg.norm(resid_current) / np.sqrt(n - self.active_set.shape[0])
-
-        # find response independent of Y_inter, Y_valid, Y_select
-
-        # XXX code below is specific to squared error loss -- need to rewrite for logistic
-#         ratio = self.sigma_resid**2 / (self.scale_inter * self.rough_sigma)**2
-#         self.Y_indep = Y - ratio * (self.Y_inter - Y)
-#         self.betahat_indep = np.dot(np.linalg.pinv(self.X[:,self.active_set]), self.Y_indep)
-#         cov_indep = np.linalg.pinv(np.dot(self.X[:,self.active_set].T, self.X[:,self.active_set])) * self.sigma_resid**2 * (1 + ratio)
-#         T_indep = np.fabs(self.betahat_indep / np.sqrt(np.diag(cov_indep)))
-#         self.pval_indep = 2 * (1 - ndist.cdf(T_indep))
-
-    def randomize(self):
-        """
-        Carry out the randomization,
-        finding the value of lambda
-        as well as the selected variables and signs.
-
-        Initiailizes the attributes: [Y_inter, Y_valid, Y_select].
-        """
-
-        n = self.Y.shape[0]
-
-        # intermediate between 
-        # CV and model selection 
-        # and the actual data
-
-        self.Q_inter = identity_quadratic(0, 0, self.randomization.rvs(size=self.X.shape[1]) * self.scale_inter, 0)
-        self.Q_valid = self.Q_inter + identity_quadratic(0, 0, self.randomization.rvs(size=self.X.shape[1]) * self.scale_valid, 0) 
-        self.Q_select = self.Q_inter + identity_quadratic(0, 0, self.randomization.rvs(size=self.X.shape[1]) * self.scale_select, 0)
-
-    def choose_lambda(self, Y, shift_size=0):
-        """
-        Select a value of lambda using `self.Y_valid`
-
-        Stores result in attribute `accept_values`.
-
-        Any resampling of Y_valid that results in a value within these
-        values has a chance to be accepted.
-
-        Parameters
-        ----------
-
-        Y : np.float(n)
-            Response vector.
-
-        shift_size : int
-            Return minimizer plus a uniform 
-            positive or negative shift in the index 
-            of `mults` of a given size.
-            Affects the size of the window of 
-            minimizers to be accepted by later sampling scheme.
-
-        """
-        return split_and_validate(Y,
-                                  self.X,
-                                  self.L, 
-                                  self.mults, 
-                                  self.test_frac,
-                                  quadratic=self.Q_valid,
-                                  shift_size=shift_size)
-        
-    def choose_variables(self):
-        """
-        Select variables and signs `self.Y_select`
-
-        Stores results in attributes `(active_set, active_signs)`.
-
-        Also initializes some attributes used in sampling Y_select.
-        """
-        # now, select a model
-
-        (self.active_set, 
-         self.active_signs,
-         self.SQ) = select_vars_signs(self.Y, 
-                                      self.X,
-                                      self.selected_value * self.L,
-                                      quadratic=self.Q_select)
-
-        self.inactive_set = self.SQ.inactive
-        self._select_beta = self.SQ.lasso_solution
-        self._select_loss = self.SQ.loglike
-        self._select_subgrad = -(self._select_loss.smooth_objective(self._select_beta, 'grad') + 
-                                 self.Q_select.objective(self._select_beta, 'grad'))
-
-    def step_valid(self,
-                   max_trials=10):
-        """
-        Try and move Y_valid
-        by accept reject stopping after `max_trials`.
-        """
-
-        X, L, mults = self.X, self.L, self.mults
-        n, p = X.shape
-
-        count = 0
-        Q_old = self.Q_valid
-
-        while True:
-            count += 1
-            self.Q_valid = self.Q_inter + identity_quadratic(0, 0, self.randomization.rvs(size=self.X.shape[1]) * 
-                                                             self.scale_valid, 0) 
-
-            if len(self.mults) > 0:
-                proposal_value = self.choose_lambda(self.Y,
-                                                    shift_size=0)
-
-                if proposal_value[0] in self.accept_values:
-                    break
-            else:
-                break
-
-            if count >= max_trials:
-                self.Q_valid = Q_old
-                break
-
-    def step_select(self,
-                    step_size=0.1):
-        """
-        Take `ndraw` Gibbs steps of Y_select
-        """
-
-        L_inter = self.Q_inter.linear_term
-        L_select = self.Q_select.linear_term - L_inter
- 
-        # self.randomization defaults to Gaussian or beware!
-        G_cur = np.linalg.norm(self._select_loss.smooth_objective(self._select_beta, 'grad') + 
-                               L_inter + self._select_subgrad)**2 / self.scale_select**2
-
-        while True:
-            _beta = self._select_beta.copy()
-            _beta[self.active_set] += (step_size * 
-                                       self.randomization.rvs(size=self.active_set.shape) * 
-                                       self.scale_select)
-
-            _subgrad = self._select_subgrad.copy()
-            _subgrad[self.inactive_set] += (step_size * 
-                                            self.randomization.rvs(size=self.inactive_set.sum()) * 
-                                            self.scale_select)
-
-
-            if (np.all(np.sign(_beta) == np.sign(self._select_beta))
-                and 
-                np.all(np.fabs(_subgrad[self.inactive_set]) < self.SQ.feature_weights[self.inactive_set])):
-                break
-
-        G_proposal = np.linalg.norm(self._select_loss.smooth_objective(_beta, 'grad') + 
-                                    L_inter + _subgrad)**2 / self.scale_select**2
-
-        logMH_ratio = G_proposal - G_cur
-        if np.random.sample() < np.exp(logMH_ratio): # MH step accepted
-            self._select_beta[:] = _beta
-            self._select_subgrad[:] = _subgrad
-
-            self.Q_select.linear_term = -(self._select_loss.smooth_objective(_beta, 'grad') + 
-                                          _subgrad)
-
-    def step_inter(self,
-                   do_gibbs=True):
-
-        L_old = self.Q_inter.linear_term
-
-        T_IS = self.Q_select.linear_term
-        T_IV = self.Q_valid.linear_term
-
-        quadratic_term = (1. / self.scale_inter**2 + 
-                          1. / self.scale_valid**2 + 
-                          1. / self.scale_select**2)
-
-        linear_term = (T_IS / self.scale_select**2 + T_IV / self.scale_valid**2)
-
-        sampling_sd = 1. / np.sqrt(quadratic_term)
-        sampling_mean = linear_term / quadratic_term
-
-        # self.randomization defaults to scipy.stats.norm -- otherwise beware!
-        self.Q_inter.linear_term = (sampling_mean + self.randomization.rvs(size=T_IS.shape) * 
-                                    sampling_sd)
-        
-    def step_randomized(self):
-        """
-        Take a move on the all 
-        randomized variables.
-        """
-
-        self.counter += 1
-
-        if self.counter % self.CV_period == 0:
-            self.step_valid()
-        
-        self.step_select()
-        self.step_inter()
-
-    def setup_inference(self, which_var): 
-        """
-        Setup the current gaussian for sampling
-
-        TODO: we should use the tilted distribution
-        with the selectively unbiased estimate. Will help 
-        with intervals.
-
-        """
-        p = self.X.shape[1]
-        self._gaussian_mean = np.zeros(p)
-        self._gaussian_cov = np.identity(p)
-        self._invcov_noisy = 0.5 * np.identity(p)
-        self._gaussian_conditional_sqrt = np.sqrt(0.5) * np.identity(p)
-        self.which_var = which_var
-        self.null_sample[which_var] = []
-        self._gaussian_stat = np.zeros(p)
-        self._gaussian_obs = self._gaussian_stat.copy()
-
-    def step_sample(self):
-
-        """
-        Move Y_sample -- a Gaussian draw
-        with mean depending on Y_inter.
-        """
-
-        p = self.X.shape[1]
-        (mean, 
-         cov, 
-         invcov_noisy, 
-         sampling_sqrt) = (self._gaussian_mean, 
-                           self._gaussian_cov, 
-                           self._invcov_noisy, 
-                           self._gaussian_conditional_sqrt)
-
-        noisy_statistic = self._gaussian_stat - self.Q_inter.linear_term
-        sampling_mean = mean + cov.dot(invcov_noisy).dot(noisy_statistic - mean)
-        self._gaussian_stat = sampling_mean + sampling_sqrt.dot(np.random.standard_normal(p))
-        self.null_sample[self.which_var].append(self._gaussian_stat[self.which_var])
-
-    def __iter__(self):
-        if not hasattr(self, "which_var"):
-            raise ValueError("choose a variable in active set on which to do inference")
-        self.counter = 0
-        return self
-
-    def next(self):
-        
-        # move randomized responses Q_inter, Q_valid, Q_select
-        self.step_randomized()
-
-        # move Y_sample
-        self.step_sample()
-        
-    __next__ = next # Python3 compatibility
-
-    def pvalue(self, which_var,
-               ndraw=2000,
-               burnin=500):
-        """
-        Produce two p-values for one of the
-        active variables, which_var, assumed to be in self.active_set
-
-        First one uses sampling, the second based on
-        a particular conditional distribution.
-        """
-
-        self.setup_inference(which_var); iter(self)
-        for _ in xrange(ndraw + burnin):
-            self.next()
-
-        family = discrete_family(self.null_sample[which_var][burnin:],
-                                 np.ones(ndraw))
-        obs = self._gaussian_obs[self.which_var]
-        pval = family.cdf(0, obs)
-        pval = 2 * min(pval, 1 - pval)
-    
-        idx = list(self.active_set).index(which_var)
-        return pval, self.pval_indep[idx]
-
-
-class lasso_tuned_conditional(lasso_tuned):
-
-    """
-    Condition on the value of Y_valid -- accomplished by never
-    sampling Y_valid.
-
-    TODO: this can be made a fast sampler by automatically
-    marginalizing over Y_inter.
-    """
-
-    CV_period = np.inf
-    pass
-
-
diff --git a/selection/algorithms/tests/test_cv.py b/selection/algorithms/tests/test_cv.py
deleted file mode 100644
index fdd715301..000000000
--- a/selection/algorithms/tests/test_cv.py
+++ /dev/null
@@ -1,124 +0,0 @@
-from __future__ import print_function
-import numpy as np
-
-from selection.tests.instance import gaussian_instance 
-from selection.algorithms.cross_valid import lasso_tuned, lasso_tuned_conditional 
-from selection.distributions.discrete_family import discrete_family
-
-def test_CV(ndraw=500, sigma_known=True,
-            burnin=100,
-            s=7,
-            rho=0.3,
-            method=lasso_tuned,
-            snr=5):
-    # generate a null and alternative pvalue
-    # from a particular model
-
-    X, Y, beta, active, sigma = gaussian_instance(n=500, p=100, s=s, rho=rho, snr=snr)
-    if sigma_known:
-        sigma = sigma
-    else:
-        sigma = None
-
-    method_ = method(Y, X, scale_inter=0.0001, scale_valid=0.0001, scale_select=0.0001)
-
-    if True: 
-        do_null = True
-        if do_null:
-            which_var = method_.active_set[s] # the first null one
-            method_.setup_inference(which_var) ; iter(method_)
-
-            for i in range(ndraw + burnin):
-                method_.next()
-
-            Z = np.array(method_.null_sample[which_var][burnin:])
-            family = discrete_family(Z, 
-                                     np.ones_like(Z))
-            obs = method_._gaussian_obs[which_var]
-
-            pval0 = family.cdf(0, obs)
-            pval0 = 2 * min(pval0, 1 - pval0)
-        else:
-            pval0 = np.random.sample()
-
-        which_var = 0
-        method_.setup_inference(which_var); iter(method_)
-        for i in range(ndraw + burnin):
-            method_.next()
-
-        family = discrete_family(method_.null_sample[which_var][burnin:], 
-                                 np.ones(ndraw))
-        obs = method_._gaussian_obs[which_var]
-        pvalA = family.cdf(0, obs)
-        pvalA = 2 * min(pvalA, 1 - pvalA)
-        return pval0, pvalA, method_
-
-def plot_fig():
-
-    from statsmodels.distributions import ECDF
-    import matplotlib.pyplot as plt
-    f = plt.figure(num=1)
-
-    s = 7
-    P0, PA = [], []
-    screened = 0
-
-    results = {}
-    counter = {}
-    linestyle = {lasso_tuned:'-',
-                 lasso_tuned_conditional:'-.'}
-
-    results.setdefault('indep', [])
-
-    for i in range(200):
-        print(i)
-        for method in [lasso_tuned, lasso_tuned_conditional]:
-            result = test_CV(ndraw=1000, burnin=500, sigma_known=False,
-                              method=method, s=s)
-            counter.setdefault(method, 0) 
-            if result is not None:
-                results.setdefault(method, []).append(result[:2])
-                counter[method] += 1
-
-                P0 = np.array(results[method])[:,0]
-                PA = np.array(results[method])[:,1]
-
-                U = np.linspace(0,1,101)
-                ecdf0 = ECDF(P0)(U)
-                ecdfA = ECDF(PA)(U)
-                ax = f.gca()
-                ax.plot(U, ecdf0, 'k' + linestyle[method], 
-                        linewidth=3,
-                        label=str(method.__name__)[11:])
-                ax.plot(U, ecdfA, 'r' + linestyle[method], 
-                        linewidth=3)
-                results['indep'].append((result[2].pval_indep[s], result[2].pval_indep[0]))
-                np.savez(str(method.__name__)[11:] + '.npz', P0=P0, PA=PA)
-
-            print(('screening', str(method.__name__)), (counter[method] * 1.) / (i + 1))
-            print(('power', str(method.__name__)), np.mean(PA < 0.05))
-            print(('level', str(method.__name__)), np.mean(P0 < 0.05))
-
-        P0 = np.array(results['indep'])[:,0]
-        PA = np.array(results['indep'])[:,1]
-        np.savez('indep.npz', P0=P0, PA=PA)
-
-        print(('power', 'indep'), np.mean(PA < 0.05))
-        print(('level', 'level'), np.mean(P0 < 0.05))
-
-
-        U = np.linspace(0,1,101)
-        ecdf0 = ECDF(P0)(U)
-        ecdfA = ECDF(PA)(U)
-
-        ax.plot(U, ecdf0, 'k:',
-                linewidth=3,
-                label='independent')
-        ax.plot(U, ecdfA, 'r:',
-                linewidth=3)
-
-        ax.legend(loc='lower right')
-        f.savefig('ecdf.pdf')
-        f.clf()
-
-

From 9d5d78b192856defb4b63062e8daf60438c80346 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 12:19:44 -0700
Subject: [PATCH 046/617] BF: array shapes

---
 .../algorithms/tests/test_forward_step.py     | 37 +++++---
 selection/algorithms/tests/test_lasso.py      | 89 +++++++++----------
 2 files changed, 66 insertions(+), 60 deletions(-)

diff --git a/selection/algorithms/tests/test_forward_step.py b/selection/algorithms/tests/test_forward_step.py
index fdf5bb780..e857470d0 100644
--- a/selection/algorithms/tests/test_forward_step.py
+++ b/selection/algorithms/tests/test_forward_step.py
@@ -2,8 +2,12 @@
 
 from selection.tests.flags import SET_SEED, SMALL_SAMPLES
 from selection.tests.instance import gaussian_instance
-from selection.algorithms.forward_step import forward_step, info_crit_stop, data_carving_IC
-from selection.tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+from selection.algorithms.forward_step import (forward_step, 
+                                               info_crit_stop, 
+                                               data_carving_IC)
+import selection.algorithms.forward_step as forward_mod
+from selection.tests.decorators import (set_sampling_params_iftrue, 
+                                        set_seed_iftrue)
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_FS(k=10, ndraw=5000, burnin=5000):
@@ -244,14 +248,17 @@ def test_mcmc_tests(n=100, p=40, s=4, rho=0.3, signal=5, ndraw=None, burnin=2000
         FS.step()
 
         if extra_steps <= 0:
-            null_rank = FS.mcmc_test(i+1, variable=FS.variables[i-2], 
-                                     nstep=nstep,
-                                     burnin=burnin,
-                                     method="serial")
-            alt_rank = FS.mcmc_test(i+1, variable=FS.variables[0], 
-                                    burnin=burnin,
-                                    nstep=nstep, 
-                                    method="parallel")
+            null_rank = forward_mod.mcmc_test(FS, 
+                                              i+1, 
+                                              variable=FS.variables[i-2], 
+                                              nstep=nstep,
+                                              burnin=burnin,
+                                              method="serial")
+            alt_rank = forward_mod.mcmc_test(FS, i+1,
+                                             variable=FS.variables[0], 
+                                             burnin=burnin,
+                                             nstep=nstep, 
+                                             method="parallel")
             break
 
         if set(active).issubset(FS.variables):
@@ -276,10 +283,12 @@ def test_independence_null_mcmc(n=100, p=40, s=4, rho=0.5, signal=5,
         FS.step()
 
         if completed and extra_steps > 0:
-            null_rank = FS.mcmc_test(i+1, variable=FS.variables[-1], 
-                                     nstep=nstep,
-                                     burnin=burnin,
-                                     method="serial")
+            null_rank = forward_mod.mcmc_test(FS, 
+                                              i+1, 
+                                              variable=FS.variables[-1], 
+                                              nstep=nstep,
+                                              burnin=burnin,
+                                              method="serial")
             null_ranks.append(int(null_rank))
 
         if extra_steps <= 0:
diff --git a/selection/algorithms/tests/test_lasso.py b/selection/algorithms/tests/test_lasso.py
index 666efc01d..b5a7f1a35 100644
--- a/selection/algorithms/tests/test_lasso.py
+++ b/selection/algorithms/tests/test_lasso.py
@@ -4,7 +4,8 @@
 from itertools import product
 
 from selection.tests.flags import SMALL_SAMPLES
-from selection.tests.instance import gaussian_instance as instance
+from selection.tests.instance import (gaussian_instance as instance,
+                                      logistic_instance)
 from selection.tests.decorators import set_sampling_params_iftrue, wait_for_return_value, register_report
 import selection.tests.reports as reports
 
@@ -168,7 +169,7 @@ def test_data_carving_gaussian(n=200,
                                s=7,
                                sigma=5,
                                rho=0.3,
-                               snr=7.,
+                               signal=7.,
                                split_frac=0.8,
                                lam_frac=2.,
                                ndraw=8000,
@@ -183,7 +184,7 @@ def test_data_carving_gaussian(n=200,
                                               s=s, 
                                               sigma=sigma, 
                                               rho=rho, 
-                                              snr=snr, 
+                                              signal=signal, 
                                               df=df)
     mu = np.dot(X, beta)
 
@@ -223,7 +224,7 @@ def test_data_carving_gaussian(n=200,
         Xa = X[:,DC.active]
         truth = np.dot(np.linalg.pinv(Xa), mu) 
 
-        active = np.zeros_like(DC.active, np.bool)
+        active = np.zeros(p, np.bool)
         active[true_active] = 1
         v = (carve, split, active)
         return v
@@ -236,7 +237,7 @@ def test_data_carving_sqrt_lasso(n=200,
                                  s=7,
                                  sigma=5,
                                  rho=0.3,
-                                 snr=7.,
+                                 signal=7.,
                                  split_frac=0.9,
                                  lam_frac=1.2,
                                  ndraw=8000,
@@ -250,7 +251,7 @@ def test_data_carving_sqrt_lasso(n=200,
                                          s=s, 
                                          sigma=sigma, 
                                          rho=rho, 
-                                         snr=snr, 
+                                         signal=signal, 
                                          df=df)
     mu = np.dot(X, beta)
 
@@ -275,7 +276,6 @@ def test_data_carving_sqrt_lasso(n=200,
         print(DC.active)
         data_split = False
 
-
     if set(true_active).issubset(DC.active):
         carve = []
         split = []
@@ -290,7 +290,7 @@ def test_data_carving_sqrt_lasso(n=200,
         Xa = X[:,DC.active]
         truth = np.dot(np.linalg.pinv(Xa), mu) 
 
-        active = np.zeros_like(DC.active, np.bool)
+        active = np.zeros(p, np.bool)
         active[true_active] = 1
         v = (carve, split, active)
         return v
@@ -304,7 +304,7 @@ def test_data_carving_logistic(n=700,
                                s=5,
                                sigma=5,
                                rho=0.05,
-                               snr=4.,
+                               signal=4.,
                                split_frac=0.8,
                                ndraw=8000,
                                burnin=2000, 
@@ -313,25 +313,22 @@ def test_data_carving_logistic(n=700,
                                use_full_cov=False,
                                return_only_screening=True):
     
-    X, y, beta, true_active, sigma = instance(n=n, 
-                                         p=p, 
-                                         s=s, 
-                                         sigma=sigma, 
-                                         rho=rho, 
-                                         snr=snr, 
-                                         df=df)
-
+    X, y, beta, true_active = logistic_instance(n=n, 
+                                                p=p, 
+                                                s=s, 
+                                                rho=rho, 
+                                                signal=signal,
+                                                equicorrelated=False)
 
     mu = X.dot(beta)
     prob = np.exp(mu) / (1 + np.exp(mu))
 
     X = np.hstack([np.ones((n,1)), X])
-    z = np.random.binomial(1, prob)
     active = np.array(true_active)
     active += 1
     s += 1
     active = [0] + list(active)
-    true_active = np.nonzero(active)[0]
+    true_active = active
 
     idx = np.arange(n)
     np.random.shuffle(idx)
@@ -340,13 +337,14 @@ def test_data_carving_logistic(n=700,
 
     lam_theor = 1.0 * np.ones(p+1)
     lam_theor[0] = 0.
-    DC = data_carving.logistic(X, z, feature_weights=lam_theor,
+    DC = data_carving.logistic(X, y, 
+                               feature_weights=lam_theor,
                                stage_one=stage_one)
 
     DC.fit()
 
     if len(DC.active) < n - int(n*split_frac):
-        DS = data_splitting.logistic(X, z, feature_weights=lam_theor,
+        DS = data_splitting.logistic(X, y, feature_weights=lam_theor,
                                      stage_one=stage_one)
         DS.fit(use_full_cov=True)
         data_split = True
@@ -355,6 +353,7 @@ def test_data_carving_logistic(n=700,
         print(DC.active)
         data_split = False
 
+    print(true_active, DC.active)
     if set(true_active).issubset(DC.active):
         carve = []
         split = []
@@ -367,13 +366,11 @@ def test_data_carving_logistic(n=700,
 
         Xa = X[:,DC.active]
 
-        active = np.zeros_like(DC.active, np.bool)
+        active = np.zeros(p, np.bool)
         active[true_active] = 1
         v = (carve, split, active)
         return v
 
-    return return_value
-
 @register_report(['pvalue', 'split_pvalue', 'active'])
 @wait_for_return_value()
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
@@ -382,7 +379,7 @@ def test_data_carving_poisson(n=500,
                               s=5,
                               sigma=5,
                               rho=0.3,
-                              snr=12.,
+                              signal=12.,
                               split_frac=0.8,
                               lam_frac=1.2,
                               ndraw=8000,
@@ -397,7 +394,7 @@ def test_data_carving_poisson(n=500,
                                               s=s, 
                                               sigma=sigma, 
                                               rho=rho, 
-                                              snr=snr, 
+                                              signal=signal, 
                                               df=df)
     X = np.hstack([np.ones((n,1)), X])
     y = np.random.poisson(10, size=y.shape)
@@ -439,7 +436,7 @@ def test_data_carving_poisson(n=500,
 
         Xa = X[:,DC.active]
 
-        active = np.zeros_like(DC.active, np.bool)
+        active = np.zeros(p, np.bool)
         active[true_active] = 1
         v = (carve, split, active)
         return v
@@ -503,7 +500,7 @@ def test_data_carving_coxph(n=400,
 
         Xa = X[:,DC.active]
 
-        active = np.zeros_like(DC.active, np.bool)
+        active = np.zeros(p, np.bool)
         active[true_active] = 1
         v = (carve, split, active)
         return v
@@ -528,14 +525,14 @@ def test_gaussian_pvals(n=100,
                         s=7,
                         sigma=5,
                         rho=0.3,
-                        snr=8.):
+                        signal=8.):
 
     X, y, beta, true_active, sigma = instance(n=n, 
                                          p=p, 
                                          s=s, 
                                          sigma=sigma, 
                                          rho=rho, 
-                                         snr=snr)
+                                         signal=signal)
     L = lasso.gaussian(X, y, 20., sigma=sigma)
     L.fit()
     L.fit(L.lasso_solution)
@@ -551,14 +548,14 @@ def test_sqrt_lasso_pvals(n=100,
                           s=7,
                           sigma=5,
                           rho=0.3,
-                          snr=7.):
+                          signal=7.):
 
     X, y, beta, true_active, sigma = instance(n=n, 
                                          p=p, 
                                          s=s, 
                                          sigma=sigma, 
                                          rho=rho, 
-                                         snr=snr)
+                                         signal=signal)
 
     lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0)) / np.sqrt(n)
     Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)
@@ -582,7 +579,7 @@ def test_sqrt_lasso_sandwich_pvals(n=200,
                                    s=10,
                                    sigma=10,
                                    rho=0.3,
-                                   snr=6.,
+                                   signal=6.,
                                    use_lasso_sd=False):
 
     X, y, beta, true_active, sigma = instance(n=n, 
@@ -590,7 +587,7 @@ def test_sqrt_lasso_sandwich_pvals(n=200,
                                          s=s, 
                                          sigma=sigma, 
                                          rho=rho, 
-                                         snr=snr)
+                                         signal=signal)
 
     heteroscedastic_error = sigma * np.random.standard_normal(n) * (np.fabs(X[:,-1]) + 0.5)**2
     heteroscedastic_error += sigma * np.random.standard_normal(n) * (np.fabs(X[:,-2]) + 0.2)**2
@@ -614,7 +611,7 @@ def test_gaussian_sandwich_pvals(n=200,
                                  s=10,
                                  sigma=10,
                                  rho=0.3,
-                                 snr=6.,
+                                 signal=6.,
                                  use_lasso_sd=False):
 
     X, y, beta, true_active, sigma = instance(n=n, 
@@ -622,7 +619,7 @@ def test_gaussian_sandwich_pvals(n=200,
                                          s=s, 
                                          sigma=sigma, 
                                          rho=rho, 
-                                         snr=snr)
+                                         signal=signal)
 
     heteroscedastic_error = sigma * np.random.standard_normal(n) * (np.fabs(X[:,-1]) + 0.5)**2
     heteroscedastic_error += sigma * np.random.standard_normal(n) * (np.fabs(X[:,-2]) + 0.2)**2
@@ -674,28 +671,28 @@ def test_logistic_pvals(n=500,
                         s=3,
                         sigma=2,
                         rho=0.3,
-                        snr=7.):
+                        signal=10.):
 
-    X, y, beta, true_active, sigma = instance(n=n, 
-                                         p=p, 
-                                         s=s, 
-                                         sigma=sigma, 
-                                         rho=rho, 
-                                         snr=snr)
+    X, y, beta, true_active = logistic_instance(n=n, 
+                                                p=p, 
+                                                s=s, 
+                                                rho=rho, 
+                                                signal=signal,
+                                                equicorrelated=False)
 
-    z = (y > 0)
     X = np.hstack([np.ones((n,1)), X])
 
     active = np.array(true_active)
     active += 1
     active = [0] + list(active)
+    true_active = active
 
-    L = lasso.logistic(X, z, [0]*1 + [1.2]*p)
+    L = lasso.logistic(X, y, [0]*1 + [1.2]*p)
     L.fit()
     S = L.summary('onesided')
 
     true_active = np.nonzero(active)[0]
-    if set(true_active).issubset(L.active) > 0:
+    if set(true_active).issubset(L.active):
         return S['pval'], [v in true_active for v in S['variable']]
 
 def test_adding_quadratic_lasso():

From 27a7e825d3f6e15d1a058a967035cdfe9117cac0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 12:23:54 -0700
Subject: [PATCH 047/617] fixing some active sets

---
 selection/algorithms/tests/test_lasso.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/selection/algorithms/tests/test_lasso.py b/selection/algorithms/tests/test_lasso.py
index b5a7f1a35..d5a3ae657 100644
--- a/selection/algorithms/tests/test_lasso.py
+++ b/selection/algorithms/tests/test_lasso.py
@@ -302,9 +302,8 @@ def test_data_carving_sqrt_lasso(n=200,
 def test_data_carving_logistic(n=700,
                                p=300,
                                s=5,
-                               sigma=5,
                                rho=0.05,
-                               signal=4.,
+                               signal=12.,
                                split_frac=0.8,
                                ndraw=8000,
                                burnin=2000, 
@@ -669,9 +668,8 @@ def test_gaussian_sandwich_pvals(n=200,
 def test_logistic_pvals(n=500,
                         p=200,
                         s=3,
-                        sigma=2,
                         rho=0.3,
-                        signal=10.):
+                        signal=15.):
 
     X, y, beta, true_active = logistic_instance(n=n, 
                                                 p=p, 
@@ -682,6 +680,7 @@ def test_logistic_pvals(n=500,
 
     X = np.hstack([np.ones((n,1)), X])
 
+    print(true_active, 'true')
     active = np.array(true_active)
     active += 1
     active = [0] + list(active)
@@ -691,7 +690,7 @@ def test_logistic_pvals(n=500,
     L.fit()
     S = L.summary('onesided')
 
-    true_active = np.nonzero(active)[0]
+    print(true_active, L.active)
     if set(true_active).issubset(L.active):
         return S['pval'], [v in true_active for v in S['variable']]
 

From 7072d8375f819bf4fcb2fbcf6abe5f1a6cbae0b3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 12:28:16 -0700
Subject: [PATCH 048/617] BF: fixing lasso tests

---
 selection/tests/instance.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/selection/tests/instance.py b/selection/tests/instance.py
index 6dd7cf515..f6c56ae5d 100644
--- a/selection/tests/instance.py
+++ b/selection/tests/instance.py
@@ -34,16 +34,22 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
     For the default settings, a $\lambda$ of around 13.5
     corresponds to the theoretical $E(\|X^T\epsilon\|_{\infty})$
     with $\epsilon \sim N(0, \sigma^2 I)$.
+
     Parameters
     ----------
+
     n : int
         Sample size
+
     p : int
         Number of features
+
     s : int
         True sparsity
+
     sigma : float
         Noise level
+
     rho : float
         Equicorrelation value (must be in interval [0,1])
 
@@ -81,7 +87,7 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
         Noise level.
     """
 
-    X = _design(n,p, rho, equicorrelated)
+    X = _design(n, p, rho, equicorrelated)
 
     if center:
         X -= X.mean(0)[None, :]
@@ -162,7 +168,7 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
 
     """
 
-    X = _design(n,p, rho, equicorrelated)
+    X = _design(n, p, rho, equicorrelated)
 
     if center:
         X -= X.mean(0)[None,:]

From de51f20980f91b111a70a9a6451396f3e1dbde53 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 16:12:27 -0700
Subject: [PATCH 049/617] BF: a non-integer warning from numpy from this sum?

---
 selection/randomized/M_estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index cb841b27b..171a3626e 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -277,7 +277,7 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
 
     def form_VQLambda(self):
         nactive_groups = len(self.active_directions_list)
-        nactive_vars = np.sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
+        nactive_vars = sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
         V = np.zeros((nactive_vars, nactive_vars-nactive_groups))
         #U = np.zeros((nvariables, ngroups))
         Lambda = np.zeros((nactive_vars,nactive_vars))

From d2b7fd9651893c8e7c0618e342488df0484cb024 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 17:30:47 -0700
Subject: [PATCH 050/617] WIP: fixing randomized tests

---
 selection/algorithms/sqrt_lasso.py            | 13 ++++++++
 selection/randomized/M_estimator.py           | 32 ++++++++++++++++---
 selection/randomized/tests/test_condition.py  |  6 ++--
 selection/randomized/tests/test_cv.py         | 15 ++-------
 selection/randomized/tests/test_sqrt_lasso.py | 31 ++++++++++--------
 5 files changed, 65 insertions(+), 32 deletions(-)

diff --git a/selection/algorithms/sqrt_lasso.py b/selection/algorithms/sqrt_lasso.py
index d64adbb26..94621d225 100644
--- a/selection/algorithms/sqrt_lasso.py
+++ b/selection/algorithms/sqrt_lasso.py
@@ -11,6 +11,7 @@
 import regreg.api as rr
 import regreg.affine as ra
 from regreg.smooth.glm import gaussian_loglike
+from regreg.affine import astransform
 
 from ..constraints.affine import (constraints as affine_constraints, 
                                   sample_from_sphere)
@@ -46,8 +47,20 @@ def __init__(self, X, Y,
 
         self.X = X
         self.Y = Y
+        self.data = (X, Y)
         self._sqerror = rr.squared_error(X, Y)
 
+    def get_data(self):
+        return self._X, self._Y
+
+    def set_data(self, data):
+        X, Y = data
+        self._transform = astransform(X)
+        self._X = X
+        self._is_transform = id(self._X) == id(self._transform) # i.e. astransform was a nullop
+        self._Y = Y
+
+    data = property(get_data, set_data, doc="Data for the sqrt LASSO objective.")
 
     def smooth_objective(self, x, mode='both', check_feasibility=False):
 
diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 171a3626e..1b5389803 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -1,5 +1,6 @@
 import numpy as np
 import regreg.api as rr
+import regreg.affine as ra
 
 from .query import query
 from .randomization import split
@@ -503,13 +504,36 @@ def fraction(full_state_plus, full_state_minus, inactive_marginal_groups):
             return query.construct_weights(self, full_state)
 
 def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
+    """
+    Fit a restricted model using only columns `active`.
 
+    Parameters
+    ----------
+
+    Mest_loss : objective function
+        A GLM loss.
+
+    active : ndarray
+        Which columns to use.
+
+    solve_args : dict
+        Passed to `solve`.
+
+    Returns
+    -------
+
+    soln : ndarray
+        Solution to restricted problem.
+
+    """
     X, Y = Mest_loss.data
 
-    if Mest_loss._is_transform:
-        raise NotImplementedError('to fit restricted model, X must be an ndarray or scipy.sparse; general transforms not implemented')
-    X_restricted = X[:,active]
-    loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
+    if not Mest_loss._is_transform and hasattr(Mest_loss, 'saturated_loss'): # M_est is a glm
+        X_restricted = X[:,active]
+        loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
+    else:
+        I_restricted = ra.selector(active, X.input_shape[0], ra.identity(X.input_shape))
+        loss_restricted = rr.affine_smooth(Mest_loss, I_restricted)
     beta_E = loss_restricted.solve(**solve_args)
     
     return beta_E
diff --git a/selection/randomized/tests/test_condition.py b/selection/randomized/tests/test_condition.py
index dacc9eb61..97204b4e1 100644
--- a/selection/randomized/tests/test_condition.py
+++ b/selection/randomized/tests/test_condition.py
@@ -80,11 +80,13 @@ def test_condition(s=0,
 
         if scalings: # try condition on some scalings
             for i in range(nviews):
-                views[i].condition_on_subgradient()
+                views[i].decompose_subgradient(conditioning_groups=np.zeros(p, bool),
+                                               marginalizing_groups=np.ones(p, bool))
                 views[i].condition_on_scalings()
         else:
             for i in range(nviews):
-               views[i].condition_on_subgradient()
+               views[i].decompose_subgradient(conditioning_groups=np.zeros(p, bool),
+                                               marginalizing_groups=np.ones(p, bool))
 
         active_set = np.nonzero(active_union)[0]
         target_sampler, target_observed = glm_target(loss,
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index b8d2f5c62..17ec84509 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -23,7 +23,7 @@
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
-def test_cv(n=100, p=50, s=0, signal=3.5, K=5, rho=0.,
+def test_cv(n=100, p=50, s=0, signal=7.5, K=5, rho=0.,
              randomizer = 'gaussian',
              randomizer_scale = 1.,
              scale1 = 0.1,
@@ -70,7 +70,7 @@ def test_cv(n=100, p=50, s=0, signal=3.5, K=5, rho=0.,
         lam = cv.one_SD_rule(direction="up")
         print("new lam", lam)
 
-    # non-randomied Lasso, just looking how many vars it selects
+    # non-randomized Lasso, just looking how many vars it selects
     problem = rr.simple_problem(glm_loss, rr.l1norm(p, lagrange=lam))
     beta_hat = problem.solve()
     active_hat = beta_hat !=0
@@ -83,10 +83,8 @@ def test_cv(n=100, p=50, s=0, signal=3.5, K=5, rho=0.,
     M_est1 = glm_group_lasso(glm_loss, epsilon, penalty, randomizer)
 
     mv = multiple_queries([cv, M_est1])
-    #mv = multiple_queries([M_est1])
     mv.solve()
 
-    #active = soln != 0
     active_union = M_est1._overall
     nactive = np.sum(active_union)
     print("nactive", nactive)
@@ -100,7 +98,7 @@ def test_cv(n=100, p=50, s=0, signal=3.5, K=5, rho=0.,
         true_vec = beta[active_union]
 
         if marginalize_subgrad == True:
-            M_est1.decompose_subgradient(conditioning_groups=np.zeros(p, dtype=bool),
+            M_est1.decompose_subgradient(conditioning_groups=np.zeros(p, bool),
                                          marginalizing_groups=np.ones(p, bool))
 
         target_sampler, target_observed = glm_target(glm_loss,
@@ -115,9 +113,6 @@ def test_cv(n=100, p=50, s=0, signal=3.5, K=5, rho=0.,
                                                      sample=target_sample,
                                                      level=0.9)
 
-            #pivots_mle = target_sampler.coefficient_pvalues(target_observed,
-            #                                                parameter=target_sampler.reference,
-            #                                                sample=target_sample)
             pivots_truth = target_sampler.coefficient_pvalues(target_observed,
                                                               parameter=true_vec,
                                                               sample=target_sample)
@@ -131,9 +126,6 @@ def test_cv(n=100, p=50, s=0, signal=3.5, K=5, rho=0.,
             LU = target_sampler.confidence_intervals_translate(target_observed,
                                                                sample=full_sample,
                                                                level=0.9)
-            #pivots_mle = target_sampler.coefficient_pvalues_translate(target_observed,
-            #                                                          parameter=target_sampler.reference,
-            #                                                          sample=full_sample)
             pivots_truth = target_sampler.coefficient_pvalues_translate(target_observed,
                                                                         parameter=true_vec,
                                                                         sample=full_sample)
@@ -168,7 +160,6 @@ def test_cv(n=100, p=50, s=0, signal=3.5, K=5, rho=0.,
 
 def report(niter=50, **kwargs):
     np.random.seed(500)
-    #kwargs = {'s': 0, 'n': 600, 'p': 100, 'signal': 3.5, 'bootstrap': False}
     intervals_report = reports.reports['test_cv']
     runs = reports.collect_multiple_runs(intervals_report['test'],
                                              intervals_report['columns'],
diff --git a/selection/randomized/tests/test_sqrt_lasso.py b/selection/randomized/tests/test_sqrt_lasso.py
index 70b97fac0..f523fa2aa 100644
--- a/selection/randomized/tests/test_sqrt_lasso.py
+++ b/selection/randomized/tests/test_sqrt_lasso.py
@@ -33,18 +33,18 @@ def choose_lambda_with_randomization(X, randomization, quantile=0.90, ndraw=1000
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
-def test_cv(n=500, p=20, s=0, signal=5, K=5, rho=0.,
-             randomizer = 'gaussian',
-             randomizer_scale = 1.,
-             scale1 = 0.1,
-             scale2 = 0.2,
-             lam_frac = 1.,
-             intervals = 'old',
-             bootstrap = False,
-             condition_on_CVR = False,
-             marginalize_subgrad = True,
-             ndraw = 10000,
-             burnin = 2000):
+def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
+                    randomizer = 'gaussian',
+                    randomizer_scale = 1.,
+                    scale1 = 0.1,
+                    scale2 = 0.2,
+                    lam_frac = 1.,
+                    intervals = 'old',
+                    bootstrap = False,
+                    condition_on_CVR = False,
+                    marginalize_subgrad = True,
+                    ndraw = 10000,
+                    burnin = 2000):
 
     print(n,p,s)
     if randomizer == 'laplace':
@@ -59,7 +59,7 @@ def test_cv(n=500, p=20, s=0, signal=5, K=5, rho=0.,
     lam_random = choose_lambda_with_randomization(X, randomizer)
     loss = sqlasso_objective(X, y)
 
-    epsilon = 1./np.sqrt(n)
+    epsilon = 1./n
 
     # non-randomized sqrt-Lasso, just looking how many vars it selects
     problem = rr.simple_problem(loss, rr.l1norm(p, lagrange=lam_nonrandom))
@@ -71,7 +71,7 @@ def test_cv(n=500, p=20, s=0, signal=5, K=5, rho=0.,
     # view 2
     W = lam_frac * np.ones(p) * lam_random
     penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+                             weights=dict(zip(np.arange(p), W)), lagrange=1. / np.sqrt(n))
     M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
 
     mv = multiple_queries([M_est1])
@@ -84,6 +84,9 @@ def test_cv(n=500, p=20, s=0, signal=5, K=5, rho=0.,
     if nactive==0:
         return None
 
+    import sys
+    sys.stderr.write(`(nonzero, active_union )` + '\n')
+
     nonzero = np.where(beta)[0]
     if set(nonzero).issubset(np.nonzero(active_union)[0]):
 

From 9077cc47828ecf987289a48c4e56c72ce11dc54a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 17:31:38 -0700
Subject: [PATCH 051/617] BF: get shape from reliable place -- covariance must
 be a 2d matrix

---
 selection/randomized/query.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 168610acc..50f429d8f 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -236,8 +236,7 @@ def setup_sampler(self, form_covariances):
         curr_randomization_length = 0
         self.randomization_slice = []
         for objective in self.objectives:
-            randomization_length = objective._beta_full.shape[0]
-            #print(randomization_length)
+            randomization_length = objective.loss.shape[0]
             self.randomization_slice.append(slice(curr_randomization_length,
                                                   curr_randomization_length + randomization_length))
             curr_randomization_length = curr_randomization_length + randomization_length
@@ -430,6 +429,7 @@ def __init__(self,
                 self.objectives[i].linear_decomposition(self.score_cov[i],
                                                         self.target_cov,
                                                         self.observed_target_state))
+        self.target_cov = np.atleast_2d(self.target_cov)
         self.target_inv_cov = np.linalg.inv(self.target_cov)
         # size of reference? should it only be target_set?
         if reference is None:

From 22a9af8f45c5a88867a641c289a69aefa3c61f58 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 11 Aug 2017 12:37:52 -0700
Subject: [PATCH 052/617] BF: needed pyinter for quasi_affine -- though
 quasi_affine may be deprecated soon

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index cc95f789a..6c0c8d676 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ mpmath
 pyinter
 statsmodels
 sklearn
+pyinter

From 2be4ea2ceadb1b10bd90ba0810991c8c35dd3a17 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 07:14:38 -0700
Subject: [PATCH 053/617] BF: fixed standard_ci call -- removed statsmodels

---
 selection/randomized/glm.py                            | 9 ---------
 selection/randomized/tests/test_multiple_queries_CI.py | 2 +-
 selection/randomized/tests/test_multiple_splits.py     | 2 +-
 selection/randomized/tests/test_split_compare.py       | 7 +++----
 4 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 00699837c..06e5798cc 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -557,12 +557,3 @@ def standard_ci(glm_loss, X, y , active, leftout_indices, alpha=0.1):
         LU[1, j] = observed[j] + sigma * quantile
     return LU.T
 
-
-def standard_ci_sm(X, y, active, leftout_indices, alpha=0.1):
-    XE = X[:, active]
-    X2, y2 = XE[leftout_indices, :], y[leftout_indices]
-    import statsmodels.discrete.discrete_model as sm
-    logit = sm.Logit(y2, X2)
-    result = logit.fit(disp=0)
-    LU = result.conf_int(alpha=alpha)
-    return LU.T
\ No newline at end of file
diff --git a/selection/randomized/tests/test_multiple_queries_CI.py b/selection/randomized/tests/test_multiple_queries_CI.py
index 31ad2463e..44a56a6b2 100644
--- a/selection/randomized/tests/test_multiple_queries_CI.py
+++ b/selection/randomized/tests/test_multiple_queries_CI.py
@@ -13,7 +13,7 @@
                            glm_target)
 from selection.tests.instance import logistic_instance
 from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-from selection.randomized.glm import standard_ci, standard_ci_sm
+from selection.randomized.glm import standard_ci
 from selection.randomized.query import naive_confidence_intervals
 
 @register_report(['pivots_clt', 'pivots_boot',
diff --git a/selection/randomized/tests/test_multiple_splits.py b/selection/randomized/tests/test_multiple_splits.py
index 76a0080cb..7125192bf 100644
--- a/selection/randomized/tests/test_multiple_splits.py
+++ b/selection/randomized/tests/test_multiple_splits.py
@@ -13,7 +13,7 @@
                            glm_target)
 from selection.tests.instance import logistic_instance
 from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-from selection.randomized.glm import standard_ci, standard_ci_sm
+from selection.randomized.glm import standard_ci
 from selection.randomized.query import naive_confidence_intervals
 
 @register_report(['pivots_clt', 'pivots_boot',
diff --git a/selection/randomized/tests/test_split_compare.py b/selection/randomized/tests/test_split_compare.py
index 5c99fe90a..900a9bc8c 100644
--- a/selection/randomized/tests/test_split_compare.py
+++ b/selection/randomized/tests/test_split_compare.py
@@ -13,7 +13,7 @@
                            glm_target)
 from selection.tests.instance import logistic_instance
 from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-from selection.randomized.glm import standard_ci, standard_ci_sm 
+from selection.randomized.glm import standard_ci
 from selection.randomized.query import naive_confidence_intervals
 
 @register_report(['pivots_clt', 'pivots_boot', 
@@ -125,10 +125,9 @@ def test_split_compare(s=3,
         LU_naive = naive_confidence_intervals(target_sampler, target_observed)
 
         if X.shape[0] - leftout_indices.sum() > nactive:
-            LU_split = standard_ci(X, y, active_union, leftout_indices)
-            LU_split_sm = standard_ci_sm(X, y, active_union, leftout_indices)
+            LU_split = standard_ci(rr.glm.logistic, X, y, active_union, leftout_indices)
         else:
-            LU_split = LU_split_sm = np.ones((nactive, 2)) * np.nan
+            LU_split = np.ones((nactive, 2)) * np.nan
 
         def coverage(LU):
             L, U = LU[:,0], LU[:,1]

From fef9772581e4100bfef3c4cbaf770ab48bee8a5f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 07:16:16 -0700
Subject: [PATCH 054/617] BF: made overall boolean

---
 selection/randomized/M_estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 1b5389803..9d476cf63 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -105,7 +105,7 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         self.active_penalty = active_penalty
         # solve the restricted problem
 
-        self._overall = active + unpenalized
+        self._overall = active + unpenalized > 0
         self._inactive = ~self._overall
         self._unpenalized = unpenalized
 

From 179116e59d698e67fc7ff0016f92c59aedef6f60 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 07:33:51 -0700
Subject: [PATCH 055/617] BF: using lowercase langevin variable name

---
 selection/reduced_optimization/credible_intervals.py   |  2 +-
 selection/reduced_optimization/dual_lasso.py           | 10 +++++-----
 .../reduced_optimization/forward_stepwise_reduced.py   |  4 ++--
 selection/reduced_optimization/lasso_reduced.py        | 10 +++++-----
 .../reduced_optimization/marginal_screening_reduced.py | 10 +++++-----
 .../reduced_optimization/ms_lasso_2stage_reduced.py    | 10 +++++-----
 selection/reduced_optimization/par_carved_reduced.py   |  4 ++--
 .../reduced_optimization/par_random_lasso_reduced.py   |  4 ++--
 selection/reduced_optimization/random_lasso_reduced.py |  4 ++--
 9 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/selection/reduced_optimization/credible_intervals.py b/selection/reduced_optimization/credible_intervals.py
index e8d59f61d..4c4644187 100644
--- a/selection/reduced_optimization/credible_intervals.py
+++ b/selection/reduced_optimization/credible_intervals.py
@@ -33,4 +33,4 @@ def next(self):
                 self._sqrt_step *= 0.8
             else:
                 self.state[:] = candidate
-                break
\ No newline at end of file
+                break
diff --git a/selection/reduced_optimization/dual_lasso.py b/selection/reduced_optimization/dual_lasso.py
index 2a030cbcc..09f8af9da 100644
--- a/selection/reduced_optimization/dual_lasso.py
+++ b/selection/reduced_optimization/dual_lasso.py
@@ -343,7 +343,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=50):
+    def posterior_samples(self, langevin_steps=1500, burnin=50):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -353,7 +353,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in xrange(Langevin_steps):
+        for i in xrange(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
@@ -362,7 +362,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
         samples = np.array(samples)
         return samples[burnin:, :]
 
-    def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -373,7 +373,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             sample = sampler.state.copy()
 
@@ -387,7 +387,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/Langevin_steps, post_risk_2/Langevin_steps
+        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
 
 
 
diff --git a/selection/reduced_optimization/forward_stepwise_reduced.py b/selection/reduced_optimization/forward_stepwise_reduced.py
index 23caccbd5..62f9a3b70 100644
--- a/selection/reduced_optimization/forward_stepwise_reduced.py
+++ b/selection/reduced_optimization/forward_stepwise_reduced.py
@@ -395,7 +395,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1000, burnin=100):
+    def posterior_samples(self, langevin_steps=1000, burnin=100):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -405,7 +405,7 @@ def posterior_samples(self, Langevin_steps=1000, burnin=100):
 
         samples = []
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
diff --git a/selection/reduced_optimization/lasso_reduced.py b/selection/reduced_optimization/lasso_reduced.py
index e8e5660ea..db23df0c2 100644
--- a/selection/reduced_optimization/lasso_reduced.py
+++ b/selection/reduced_optimization/lasso_reduced.py
@@ -478,7 +478,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=50):
+    def posterior_samples(self, langevin_steps=1500, burnin=50):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -488,7 +488,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             print(i, sampler.state.copy())
@@ -497,7 +497,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
         samples = np.array(samples)
         return samples[burnin:, :]
 
-    def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -508,7 +508,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             sample = sampler.state.copy()
 
@@ -522,4 +522,4 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/Langevin_steps, post_risk_2/Langevin_steps
+        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
diff --git a/selection/reduced_optimization/marginal_screening_reduced.py b/selection/reduced_optimization/marginal_screening_reduced.py
index 666ec8657..d01280d33 100644
--- a/selection/reduced_optimization/marginal_screening_reduced.py
+++ b/selection/reduced_optimization/marginal_screening_reduced.py
@@ -339,7 +339,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=50):
+    def posterior_samples(self, langevin_steps=1500, burnin=50):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by marginal screening: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -349,7 +349,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in xrange(Langevin_steps):
+        for i in xrange(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
@@ -358,7 +358,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
         samples = np.array(samples)
         return samples[burnin:, :]
 
-    def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -369,7 +369,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             sample = sampler.state.copy()
 
@@ -383,4 +383,4 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/Langevin_steps, post_risk_2/Langevin_steps
+        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
diff --git a/selection/reduced_optimization/ms_lasso_2stage_reduced.py b/selection/reduced_optimization/ms_lasso_2stage_reduced.py
index 9016757fb..a1be52d8c 100644
--- a/selection/reduced_optimization/ms_lasso_2stage_reduced.py
+++ b/selection/reduced_optimization/ms_lasso_2stage_reduced.py
@@ -407,7 +407,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=50):
+    def posterior_samples(self, langevin_steps=1500, burnin=50):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -417,7 +417,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
@@ -426,7 +426,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
         samples = np.array(samples)
         return samples[burnin:, :]
 
-    def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=1200, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=1200, burnin=0):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -437,7 +437,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=1200, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             sample = sampler.state.copy()
 
@@ -451,7 +451,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=1200, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/Langevin_steps, post_risk_2/Langevin_steps
+        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
 
 
 
diff --git a/selection/reduced_optimization/par_carved_reduced.py b/selection/reduced_optimization/par_carved_reduced.py
index 7c4c179a3..7b79e8e01 100644
--- a/selection/reduced_optimization/par_carved_reduced.py
+++ b/selection/reduced_optimization/par_carved_reduced.py
@@ -279,7 +279,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=100):
+    def posterior_samples(self, langevin_steps=1500, burnin=100):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective_post(x, 'grad')
@@ -289,7 +289,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=100):
 
         samples = []
 
-        for i in xrange(Langevin_steps):
+        for i in xrange(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             sys.stderr.write("sample number: " + str(i) + "\n")
diff --git a/selection/reduced_optimization/par_random_lasso_reduced.py b/selection/reduced_optimization/par_random_lasso_reduced.py
index a8db66d31..d810e458a 100644
--- a/selection/reduced_optimization/par_random_lasso_reduced.py
+++ b/selection/reduced_optimization/par_random_lasso_reduced.py
@@ -316,7 +316,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=100):
+    def posterior_samples(self, langevin_steps=1500, burnin=100):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective_post(x, 'grad')
@@ -326,7 +326,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=100):
 
         samples = []
 
-        for i in xrange(Langevin_steps):
+        for i in xrange(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             sys.stderr.write("sample number: " + str(i) + "\n")
diff --git a/selection/reduced_optimization/random_lasso_reduced.py b/selection/reduced_optimization/random_lasso_reduced.py
index 10ee842d7..e7e9bcdb7 100644
--- a/selection/reduced_optimization/random_lasso_reduced.py
+++ b/selection/reduced_optimization/random_lasso_reduced.py
@@ -318,7 +318,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=100):
+    def posterior_samples(self, langevin_steps=1500, burnin=100):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective_post(x, 'grad')
@@ -328,7 +328,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=100):
 
         samples = []
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()

From 83321384ec7f215329e72a7122b2b005c8ebd005 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 07:39:58 -0700
Subject: [PATCH 056/617] TEST: test_reduced_lasso running a small sample

---
 .../tests/test_reduced_lasso.py               | 99 +++++++------------
 1 file changed, 36 insertions(+), 63 deletions(-)

diff --git a/selection/reduced_optimization/tests/test_reduced_lasso.py b/selection/reduced_optimization/tests/test_reduced_lasso.py
index 19677759e..fb11201ce 100644
--- a/selection/reduced_optimization/tests/test_reduced_lasso.py
+++ b/selection/reduced_optimization/tests/test_reduced_lasso.py
@@ -4,17 +4,27 @@
 import os
 
 import numpy as np
-from selection.reduced_optimization.initial_soln import selection, instance
 
-from selection.reduced_optimization.lasso_reduced import nonnegative_softmax_scaled, neg_log_cube_probability, selection_probability_lasso, \
-    sel_prob_gradient_map_lasso, selective_inf_lasso
+from selection.api import randomization
+from ..initial_soln import selection, instance
+from ..lasso_reduced import (nonnegative_softmax_scaled, 
+                             neg_log_cube_probability, 
+                             selection_probability_lasso, 
+                             sel_prob_gradient_map_lasso, 
+                             selective_inf_lasso)
 
+from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from selection.tests.decorators import (set_sampling_params_iftrue,
+                                        set_seed_iftrue)
+
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
 def randomized_lasso_trial(X,
                            y,
                            beta,
-                           sigma):
-
-    from selection.api import randomization
+                           sigma,
+                           ndraw=1000,
+                           burnin=50):
 
     n, p = X.shape
 
@@ -50,7 +60,8 @@ def randomized_lasso_trial(X,
 
         inf = selective_inf_lasso(y, grad_map, prior_variance)
 
-        samples = inf.posterior_samples()
+        # for the tests, just take a few steps
+        samples = inf.posterior_samples(langevin_steps=ndraw, burnin=burnin)
 
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -105,66 +116,28 @@ def test_reduced_lasso():
     s = 10
     snr = 7.
 
-    ### GENERATE X
-    np.random.seed(0)  # ensures same X
-
     sample = instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
 
-    niter = 5
-
     ad_cov = 0.
     unad_cov = 0.
     ad_len = 0.
     unad_len = 0.
 
-    for i in range(niter):
-
-         ### GENERATE Y BASED ON SEED
-         np.random.seed(i+3)  # ensures different y
-         X, y, beta, nonzero, sigma = sample.generate_response()
-
-         ### RUN LASSO AND TEST
-         lasso = randomized_lasso_trial(X,
-                                        y,
-                                        beta,
-                                        sigma)
-
-         if lasso is not None:
-             ad_cov += lasso[0,0]
-             unad_cov += lasso[1,0]
-             ad_len += lasso[2, 0]
-             unad_len += lasso[3, 0]
-             print("\n")
-             print("iteration completed", i)
-             print("\n")
-             print("adjusted and unadjusted coverage", ad_cov, unad_cov)
-             print("adjusted and unadjusted lengths", ad_len, unad_len)
-
-# if __name__ == "__main__":
-# # read from command line
-#     seedn=int(sys.argv[1])
-#     outdir=sys.argv[2]
-
-#     outfile = os.path.join(outdir, "list_result_" + str(seedn) + ".txt")
-
-# ### set parameters
-#     n = 500
-#     p = 3000
-#     s = 0
-#     snr = 7.
-
-# ### GENERATE X
-#     np.random.seed(0)  # ensures same X
-
-#     sample = instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
-
-# ### GENERATE Y BASED ON SEED
-#     np.random.seed(seedn) # ensures different y
-#     X, y, beta, nonzero, sigma = sample.generate_response()
-
-#     lasso = randomized_lasso_trial(X,
-#                                    y,
-#                                    beta,
-#                                    sigma)
-
-#     np.savetxt(outfile, lasso)
+    X, y, beta, nonzero, sigma = sample.generate_response()
+
+    ### RUN LASSO AND TEST
+    lasso = randomized_lasso_trial(X,
+                                   y,
+                                   beta,
+                                   sigma)
+
+    if lasso is not None:
+        ad_cov += lasso[0,0]
+        unad_cov += lasso[1,0]
+        ad_len += lasso[2, 0]
+        unad_len += lasso[3, 0]
+        print("\n")
+        print("\n")
+        print("adjusted and unadjusted coverage", ad_cov, unad_cov)
+        print("adjusted and unadjusted lengths", ad_len, unad_len)
+        

From 7fc05e4ca66f8a6d5c476a71df1064167c9cfc8f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 07:33:51 -0700
Subject: [PATCH 057/617] BF: using lowercase langevin variable name

---
 selection/reduced_optimization/credible_intervals.py   |  2 +-
 selection/reduced_optimization/dual_lasso.py           | 10 +++++-----
 .../reduced_optimization/forward_stepwise_reduced.py   |  4 ++--
 selection/reduced_optimization/lasso_reduced.py        | 10 +++++-----
 .../reduced_optimization/marginal_screening_reduced.py | 10 +++++-----
 .../reduced_optimization/ms_lasso_2stage_reduced.py    | 10 +++++-----
 selection/reduced_optimization/par_carved_reduced.py   |  4 ++--
 .../reduced_optimization/par_random_lasso_reduced.py   |  4 ++--
 selection/reduced_optimization/random_lasso_reduced.py |  4 ++--
 9 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/selection/reduced_optimization/credible_intervals.py b/selection/reduced_optimization/credible_intervals.py
index e8d59f61d..4c4644187 100644
--- a/selection/reduced_optimization/credible_intervals.py
+++ b/selection/reduced_optimization/credible_intervals.py
@@ -33,4 +33,4 @@ def next(self):
                 self._sqrt_step *= 0.8
             else:
                 self.state[:] = candidate
-                break
\ No newline at end of file
+                break
diff --git a/selection/reduced_optimization/dual_lasso.py b/selection/reduced_optimization/dual_lasso.py
index 2a030cbcc..09f8af9da 100644
--- a/selection/reduced_optimization/dual_lasso.py
+++ b/selection/reduced_optimization/dual_lasso.py
@@ -343,7 +343,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=50):
+    def posterior_samples(self, langevin_steps=1500, burnin=50):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -353,7 +353,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in xrange(Langevin_steps):
+        for i in xrange(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
@@ -362,7 +362,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
         samples = np.array(samples)
         return samples[burnin:, :]
 
-    def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -373,7 +373,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             sample = sampler.state.copy()
 
@@ -387,7 +387,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/Langevin_steps, post_risk_2/Langevin_steps
+        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
 
 
 
diff --git a/selection/reduced_optimization/forward_stepwise_reduced.py b/selection/reduced_optimization/forward_stepwise_reduced.py
index 23caccbd5..62f9a3b70 100644
--- a/selection/reduced_optimization/forward_stepwise_reduced.py
+++ b/selection/reduced_optimization/forward_stepwise_reduced.py
@@ -395,7 +395,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1000, burnin=100):
+    def posterior_samples(self, langevin_steps=1000, burnin=100):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -405,7 +405,7 @@ def posterior_samples(self, Langevin_steps=1000, burnin=100):
 
         samples = []
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
diff --git a/selection/reduced_optimization/lasso_reduced.py b/selection/reduced_optimization/lasso_reduced.py
index e8e5660ea..db23df0c2 100644
--- a/selection/reduced_optimization/lasso_reduced.py
+++ b/selection/reduced_optimization/lasso_reduced.py
@@ -478,7 +478,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=50):
+    def posterior_samples(self, langevin_steps=1500, burnin=50):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -488,7 +488,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             print(i, sampler.state.copy())
@@ -497,7 +497,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
         samples = np.array(samples)
         return samples[burnin:, :]
 
-    def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -508,7 +508,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             sample = sampler.state.copy()
 
@@ -522,4 +522,4 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/Langevin_steps, post_risk_2/Langevin_steps
+        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
diff --git a/selection/reduced_optimization/marginal_screening_reduced.py b/selection/reduced_optimization/marginal_screening_reduced.py
index 666ec8657..d01280d33 100644
--- a/selection/reduced_optimization/marginal_screening_reduced.py
+++ b/selection/reduced_optimization/marginal_screening_reduced.py
@@ -339,7 +339,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=50):
+    def posterior_samples(self, langevin_steps=1500, burnin=50):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by marginal screening: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -349,7 +349,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in xrange(Langevin_steps):
+        for i in xrange(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
@@ -358,7 +358,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
         samples = np.array(samples)
         return samples[burnin:, :]
 
-    def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -369,7 +369,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             sample = sampler.state.copy()
 
@@ -383,4 +383,4 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=2000, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/Langevin_steps, post_risk_2/Langevin_steps
+        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
diff --git a/selection/reduced_optimization/ms_lasso_2stage_reduced.py b/selection/reduced_optimization/ms_lasso_2stage_reduced.py
index 9016757fb..a1be52d8c 100644
--- a/selection/reduced_optimization/ms_lasso_2stage_reduced.py
+++ b/selection/reduced_optimization/ms_lasso_2stage_reduced.py
@@ -407,7 +407,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=50):
+    def posterior_samples(self, langevin_steps=1500, burnin=50):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -417,7 +417,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
@@ -426,7 +426,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=50):
         samples = np.array(samples)
         return samples[burnin:, :]
 
-    def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=1200, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=1200, burnin=0):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -437,7 +437,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=1200, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             sample = sampler.state.copy()
 
@@ -451,7 +451,7 @@ def posterior_risk(self, estimator_1, estimator_2, Langevin_steps=1200, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/Langevin_steps, post_risk_2/Langevin_steps
+        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
 
 
 
diff --git a/selection/reduced_optimization/par_carved_reduced.py b/selection/reduced_optimization/par_carved_reduced.py
index 7c4c179a3..7b79e8e01 100644
--- a/selection/reduced_optimization/par_carved_reduced.py
+++ b/selection/reduced_optimization/par_carved_reduced.py
@@ -279,7 +279,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=100):
+    def posterior_samples(self, langevin_steps=1500, burnin=100):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective_post(x, 'grad')
@@ -289,7 +289,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=100):
 
         samples = []
 
-        for i in xrange(Langevin_steps):
+        for i in xrange(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             sys.stderr.write("sample number: " + str(i) + "\n")
diff --git a/selection/reduced_optimization/par_random_lasso_reduced.py b/selection/reduced_optimization/par_random_lasso_reduced.py
index a8db66d31..d810e458a 100644
--- a/selection/reduced_optimization/par_random_lasso_reduced.py
+++ b/selection/reduced_optimization/par_random_lasso_reduced.py
@@ -316,7 +316,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=100):
+    def posterior_samples(self, langevin_steps=1500, burnin=100):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective_post(x, 'grad')
@@ -326,7 +326,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=100):
 
         samples = []
 
-        for i in xrange(Langevin_steps):
+        for i in xrange(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             sys.stderr.write("sample number: " + str(i) + "\n")
diff --git a/selection/reduced_optimization/random_lasso_reduced.py b/selection/reduced_optimization/random_lasso_reduced.py
index 10ee842d7..e7e9bcdb7 100644
--- a/selection/reduced_optimization/random_lasso_reduced.py
+++ b/selection/reduced_optimization/random_lasso_reduced.py
@@ -318,7 +318,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, Langevin_steps=1500, burnin=100):
+    def posterior_samples(self, langevin_steps=1500, burnin=100):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective_post(x, 'grad')
@@ -328,7 +328,7 @@ def posterior_samples(self, Langevin_steps=1500, burnin=100):
 
         samples = []
 
-        for i in range(Langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()

From 8f335a0995cfd9ec1e7cd7f8aa908a73bc9691f3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 07:39:58 -0700
Subject: [PATCH 058/617] TEST: test_reduced_lasso running a small sample

---
 .../tests/test_reduced_lasso.py               | 99 +++++++------------
 1 file changed, 36 insertions(+), 63 deletions(-)

diff --git a/selection/reduced_optimization/tests/test_reduced_lasso.py b/selection/reduced_optimization/tests/test_reduced_lasso.py
index 19677759e..fb11201ce 100644
--- a/selection/reduced_optimization/tests/test_reduced_lasso.py
+++ b/selection/reduced_optimization/tests/test_reduced_lasso.py
@@ -4,17 +4,27 @@
 import os
 
 import numpy as np
-from selection.reduced_optimization.initial_soln import selection, instance
 
-from selection.reduced_optimization.lasso_reduced import nonnegative_softmax_scaled, neg_log_cube_probability, selection_probability_lasso, \
-    sel_prob_gradient_map_lasso, selective_inf_lasso
+from selection.api import randomization
+from ..initial_soln import selection, instance
+from ..lasso_reduced import (nonnegative_softmax_scaled, 
+                             neg_log_cube_probability, 
+                             selection_probability_lasso, 
+                             sel_prob_gradient_map_lasso, 
+                             selective_inf_lasso)
 
+from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from selection.tests.decorators import (set_sampling_params_iftrue,
+                                        set_seed_iftrue)
+
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
 def randomized_lasso_trial(X,
                            y,
                            beta,
-                           sigma):
-
-    from selection.api import randomization
+                           sigma,
+                           ndraw=1000,
+                           burnin=50):
 
     n, p = X.shape
 
@@ -50,7 +60,8 @@ def randomized_lasso_trial(X,
 
         inf = selective_inf_lasso(y, grad_map, prior_variance)
 
-        samples = inf.posterior_samples()
+        # for the tests, just take a few steps
+        samples = inf.posterior_samples(langevin_steps=ndraw, burnin=burnin)
 
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -105,66 +116,28 @@ def test_reduced_lasso():
     s = 10
     snr = 7.
 
-    ### GENERATE X
-    np.random.seed(0)  # ensures same X
-
     sample = instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
 
-    niter = 5
-
     ad_cov = 0.
     unad_cov = 0.
     ad_len = 0.
     unad_len = 0.
 
-    for i in range(niter):
-
-         ### GENERATE Y BASED ON SEED
-         np.random.seed(i+3)  # ensures different y
-         X, y, beta, nonzero, sigma = sample.generate_response()
-
-         ### RUN LASSO AND TEST
-         lasso = randomized_lasso_trial(X,
-                                        y,
-                                        beta,
-                                        sigma)
-
-         if lasso is not None:
-             ad_cov += lasso[0,0]
-             unad_cov += lasso[1,0]
-             ad_len += lasso[2, 0]
-             unad_len += lasso[3, 0]
-             print("\n")
-             print("iteration completed", i)
-             print("\n")
-             print("adjusted and unadjusted coverage", ad_cov, unad_cov)
-             print("adjusted and unadjusted lengths", ad_len, unad_len)
-
-# if __name__ == "__main__":
-# # read from command line
-#     seedn=int(sys.argv[1])
-#     outdir=sys.argv[2]
-
-#     outfile = os.path.join(outdir, "list_result_" + str(seedn) + ".txt")
-
-# ### set parameters
-#     n = 500
-#     p = 3000
-#     s = 0
-#     snr = 7.
-
-# ### GENERATE X
-#     np.random.seed(0)  # ensures same X
-
-#     sample = instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
-
-# ### GENERATE Y BASED ON SEED
-#     np.random.seed(seedn) # ensures different y
-#     X, y, beta, nonzero, sigma = sample.generate_response()
-
-#     lasso = randomized_lasso_trial(X,
-#                                    y,
-#                                    beta,
-#                                    sigma)
-
-#     np.savetxt(outfile, lasso)
+    X, y, beta, nonzero, sigma = sample.generate_response()
+
+    ### RUN LASSO AND TEST
+    lasso = randomized_lasso_trial(X,
+                                   y,
+                                   beta,
+                                   sigma)
+
+    if lasso is not None:
+        ad_cov += lasso[0,0]
+        unad_cov += lasso[1,0]
+        ad_len += lasso[2, 0]
+        unad_len += lasso[3, 0]
+        print("\n")
+        print("\n")
+        print("adjusted and unadjusted coverage", ad_cov, unad_cov)
+        print("adjusted and unadjusted lengths", ad_len, unad_len)
+        

From b60f4dad806cad0ea6a17be9fdf315398caf1145 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 07:44:45 -0700
Subject: [PATCH 059/617] DDOC: fixing import, trimming line

---
 selection/randomized/tests/test_cv.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 17ec84509..62be9d65c 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -13,7 +13,10 @@
 
 import selection.tests.reports as reports
 from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+from selection.tests.decorators import (wait_for_return_value, 
+                                        set_seed_iftrue, 
+                                        set_sampling_params_iftrue, 
+                                        register_report)
 from selection.randomized.cv_view import CV_view
 from statsmodels.sandbox.stats.multicomp import multipletests
 

From af6722e75114bd42995f64dd3d572c78d74a6568 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 11:35:05 -0700
Subject: [PATCH 060/617] BF: added feasibility check for debiased lasso, test
 for KKT conditions to dual problem

---
 selection/algorithms/debiased_lasso.py        | 17 +++-
 .../algorithms/tests/test_debiased_lasso.py   | 23 ++++-
 .../tests/test_selection_random_lasso.py      | 94 +++++++++----------
 3 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 11ae2db6d..35f4f8ccc 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -6,7 +6,7 @@
 
 from ..constraints.affine import constraints
 
-def _find_row_approx_inverse(Sigma, j, delta):
+def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):
     """
 
     Find an approximation of j-th row of inverse of Sigma.
@@ -19,8 +19,19 @@ def _find_row_approx_inverse(Sigma, j, delta):
     penalty = l1norm(p, lagrange=delta)
     iq = identity_quadratic(0, 0, elem_basis, 0)
     problem = simple_problem(loss, penalty)
-    linfunc = problem.solve(iq, min_its=100)
-    return -linfunc
+    dual_soln = problem.solve(iq, **solve_args)
+
+    soln = -dual_soln
+
+    # check feasibility -- if it fails miserably
+    # presume delta was too small
+
+    feasibility_gap = np.fabs(Sigma.dot(soln) - elem_basis).max()
+    if feasibility_gap > (1.01) * delta:
+        raise ValueError('does not seem to be a feasible point -- try increasing delta')
+
+    return soln
+
 
 def debiased_lasso_inference(lasso_obj, variables, delta):
 
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index c540dd530..38fa14483 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -6,7 +6,8 @@
 import selection.tests.reports as reports
 
 from selection.algorithms.lasso import lasso 
-from selection.algorithms.debiased_lasso import debiased_lasso_inference
+from selection.algorithms.debiased_lasso import (debiased_lasso_inference,
+                                                 _find_row_approx_inverse)
 import regreg.api as rr
 
 def test_gaussian(n=100, p=20):
@@ -24,3 +25,23 @@ def test_gaussian(n=100, p=20):
 
     print(debiased_lasso_inference(L, L.active, np.sqrt(2 * np.log(p) / n)))
     print(beta)
+
+def test_approx_inverse():
+
+    n, p = 50, 100
+    X = np.random.standard_normal((n, p))
+    S = X.T.dot(X) / n
+    j = 5
+    delta = 0.60
+    
+    soln = _find_row_approx_inverse(S, j, delta)
+
+    basis_vector = np.zeros(p)
+    basis_vector[j] = 1.
+
+    nt.assert_true(np.fabs(S.dot(soln) - basis_vector).max() < delta * 1.001)
+
+    U = - S.dot(-soln) - basis_vector
+    nt.assert_true(np.fabs(U).max() < delta * 1.001)
+    nt.assert_equal(np.argmax(np.fabs(U)), j)
+    nt.assert_equal(np.sign(U[j]), -np.sign(soln[j]))
diff --git a/selection/reduced_optimization/tests/test_selection_random_lasso.py b/selection/reduced_optimization/tests/test_selection_random_lasso.py
index a4cb8591a..bba9eab78 100644
--- a/selection/reduced_optimization/tests/test_selection_random_lasso.py
+++ b/selection/reduced_optimization/tests/test_selection_random_lasso.py
@@ -5,59 +5,53 @@
 from selection.reduced_optimization.initial_soln import selection
 from selection.tests.instance import logistic_instance, gaussian_instance
 
-#from selection.reduced_optimization.random_lasso_reduced import log_likelihood, selection_probability_random_lasso, sel_inf_random_lasso
-from selection.reduced_optimization.par_random_lasso_reduced import selection_probability_random_lasso, sel_inf_random_lasso
-from selection.reduced_optimization.estimator import M_estimator_approx
+from ..par_random_lasso_reduced import (selection_probability_random_lasso, 
+                                        sel_inf_random_lasso)
+from ..estimator import M_estimator_approx
 from selection.api import randomization
 
+def test_selection():
+    n = 500
+    p = 100
+    s = 0
+    signal = 0.
 
-n = 500
-p = 100
-s = 0
-snr = 0.
-
-np.random.seed(3)  # ensures different y
-X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
-lam = 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-n, p = X.shape
-
-loss = rr.glm.gaussian(X, y)
-epsilon = 1. / np.sqrt(n)
-
-W = np.ones(p) * lam
-penalty = rr.group_lasso(np.arange(p),weights=dict(zip(np.arange(p), W)), lagrange=1.)
-randomization = randomization.isotropic_gaussian((p,), scale=1.)
-
-M_est = M_estimator_approx(loss, epsilon, penalty, randomization, 'gaussian', 'parametric')
-M_est.solve_approx()
-active = M_est._overall
-active_set = np.asarray([i for i in range(p) if active[i]])
-nactive = np.sum(active)
-
-prior_variance = 1000.
-noise_variance = sigma ** 2
-
-generative_mean = np.zeros(p)
-generative_mean[:nactive] = M_est.initial_soln[active]
-sel_split = selection_probability_random_lasso(M_est, generative_mean)
-min = sel_split.minimize2(nstep=200)
-print(min[0], min[1])
-
-test_point = np.append(M_est.observed_score_state, np.abs(M_est.initial_soln[M_est._overall]))
-# print("gradient at test point", sel_split.smooth_objective(test_point, mode= "grad"))
-# print("break up of gradients 1", sel_split.active_conj_loss.smooth_objective(test_point, mode= "grad"))
-# print("break up of gradients 2", sel_split.likelihood_loss.smooth_objective(test_point, mode= "grad"))
-# print("break up of gradients 3", sel_split.nonnegative_barrier.smooth_objective(test_point, mode= "grad"))
-# print("break up of gradient 4 ", sel_split.cube_loss.smooth_objective(test_point, mode= "grad"))
-#
-print("value of likelihood", sel_split.likelihood_loss.smooth_objective(test_point, mode= "func"))
-#
-inv_cov = np.linalg.inv(M_est.score_cov)
-lik = (M_est.observed_score_state-generative_mean).T.dot(inv_cov).dot(M_est.observed_score_state-generative_mean)/2.
-print("value of likelihood check", lik)
-grad = inv_cov.dot(M_est.observed_score_state-generative_mean)
-print("grad at likelihood loss", grad)
+    np.random.seed(3)  # ensures different y
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, signal=signal)
+    lam = 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+    n, p = X.shape
+
+    loss = rr.glm.gaussian(X, y)
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),weights=dict(zip(np.arange(p), W)), lagrange=1.)
+    randomizer = randomization.isotropic_gaussian((p,), scale=1.)
+
+    M_est = M_estimator_approx(loss, epsilon, penalty, randomizer, 'gaussian', 'parametric')
+    M_est.solve_approx()
+    active = M_est._overall
+    active_set = np.asarray([i for i in range(p) if active[i]])
+    nactive = np.sum(active)
+
+    prior_variance = 1000.
+    noise_variance = sigma ** 2
+
+    generative_mean = np.zeros(p)
+    generative_mean[:nactive] = M_est.initial_soln[active]
+    sel_split = selection_probability_random_lasso(M_est, generative_mean)
+    min = sel_split.minimize2(nstep=200)
+    print(min[0], min[1])
+
+    test_point = np.append(M_est.observed_score_state, np.abs(M_est.initial_soln[M_est._overall]))
+    print("value of likelihood", sel_split.likelihood_loss.smooth_objective(test_point, mode= "func"))
+
+    inv_cov = np.linalg.inv(M_est.score_cov)
+    lik = (M_est.observed_score_state-generative_mean).T.dot(inv_cov).dot(M_est.observed_score_state-generative_mean)/2.
+    print("value of likelihood check", lik)
+    grad = inv_cov.dot(M_est.observed_score_state-generative_mean)
+    print("grad at likelihood loss", grad)
 
 
 

From 514551d6f76f0f69516618bffaf38ddc22c6f912 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 11:35:05 -0700
Subject: [PATCH 061/617] BF: added feasibility check for debiased lasso, test
 for KKT conditions to dual problem

---
 selection/algorithms/debiased_lasso.py        | 17 +++-
 .../algorithms/tests/test_debiased_lasso.py   | 23 ++++-
 .../tests/test_selection_random_lasso.py      | 94 +++++++++----------
 3 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 11ae2db6d..35f4f8ccc 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -6,7 +6,7 @@
 
 from ..constraints.affine import constraints
 
-def _find_row_approx_inverse(Sigma, j, delta):
+def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):
     """
 
     Find an approximation of j-th row of inverse of Sigma.
@@ -19,8 +19,19 @@ def _find_row_approx_inverse(Sigma, j, delta):
     penalty = l1norm(p, lagrange=delta)
     iq = identity_quadratic(0, 0, elem_basis, 0)
     problem = simple_problem(loss, penalty)
-    linfunc = problem.solve(iq, min_its=100)
-    return -linfunc
+    dual_soln = problem.solve(iq, **solve_args)
+
+    soln = -dual_soln
+
+    # check feasibility -- if it fails miserably
+    # presume delta was too small
+
+    feasibility_gap = np.fabs(Sigma.dot(soln) - elem_basis).max()
+    if feasibility_gap > (1.01) * delta:
+        raise ValueError('does not seem to be a feasible point -- try increasing delta')
+
+    return soln
+
 
 def debiased_lasso_inference(lasso_obj, variables, delta):
 
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index c540dd530..38fa14483 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -6,7 +6,8 @@
 import selection.tests.reports as reports
 
 from selection.algorithms.lasso import lasso 
-from selection.algorithms.debiased_lasso import debiased_lasso_inference
+from selection.algorithms.debiased_lasso import (debiased_lasso_inference,
+                                                 _find_row_approx_inverse)
 import regreg.api as rr
 
 def test_gaussian(n=100, p=20):
@@ -24,3 +25,23 @@ def test_gaussian(n=100, p=20):
 
     print(debiased_lasso_inference(L, L.active, np.sqrt(2 * np.log(p) / n)))
     print(beta)
+
+def test_approx_inverse():
+
+    n, p = 50, 100
+    X = np.random.standard_normal((n, p))
+    S = X.T.dot(X) / n
+    j = 5
+    delta = 0.60
+    
+    soln = _find_row_approx_inverse(S, j, delta)
+
+    basis_vector = np.zeros(p)
+    basis_vector[j] = 1.
+
+    nt.assert_true(np.fabs(S.dot(soln) - basis_vector).max() < delta * 1.001)
+
+    U = - S.dot(-soln) - basis_vector
+    nt.assert_true(np.fabs(U).max() < delta * 1.001)
+    nt.assert_equal(np.argmax(np.fabs(U)), j)
+    nt.assert_equal(np.sign(U[j]), -np.sign(soln[j]))
diff --git a/selection/reduced_optimization/tests/test_selection_random_lasso.py b/selection/reduced_optimization/tests/test_selection_random_lasso.py
index a4cb8591a..bba9eab78 100644
--- a/selection/reduced_optimization/tests/test_selection_random_lasso.py
+++ b/selection/reduced_optimization/tests/test_selection_random_lasso.py
@@ -5,59 +5,53 @@
 from selection.reduced_optimization.initial_soln import selection
 from selection.tests.instance import logistic_instance, gaussian_instance
 
-#from selection.reduced_optimization.random_lasso_reduced import log_likelihood, selection_probability_random_lasso, sel_inf_random_lasso
-from selection.reduced_optimization.par_random_lasso_reduced import selection_probability_random_lasso, sel_inf_random_lasso
-from selection.reduced_optimization.estimator import M_estimator_approx
+from ..par_random_lasso_reduced import (selection_probability_random_lasso, 
+                                        sel_inf_random_lasso)
+from ..estimator import M_estimator_approx
 from selection.api import randomization
 
+def test_selection():
+    n = 500
+    p = 100
+    s = 0
+    signal = 0.
 
-n = 500
-p = 100
-s = 0
-snr = 0.
-
-np.random.seed(3)  # ensures different y
-X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
-lam = 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-n, p = X.shape
-
-loss = rr.glm.gaussian(X, y)
-epsilon = 1. / np.sqrt(n)
-
-W = np.ones(p) * lam
-penalty = rr.group_lasso(np.arange(p),weights=dict(zip(np.arange(p), W)), lagrange=1.)
-randomization = randomization.isotropic_gaussian((p,), scale=1.)
-
-M_est = M_estimator_approx(loss, epsilon, penalty, randomization, 'gaussian', 'parametric')
-M_est.solve_approx()
-active = M_est._overall
-active_set = np.asarray([i for i in range(p) if active[i]])
-nactive = np.sum(active)
-
-prior_variance = 1000.
-noise_variance = sigma ** 2
-
-generative_mean = np.zeros(p)
-generative_mean[:nactive] = M_est.initial_soln[active]
-sel_split = selection_probability_random_lasso(M_est, generative_mean)
-min = sel_split.minimize2(nstep=200)
-print(min[0], min[1])
-
-test_point = np.append(M_est.observed_score_state, np.abs(M_est.initial_soln[M_est._overall]))
-# print("gradient at test point", sel_split.smooth_objective(test_point, mode= "grad"))
-# print("break up of gradients 1", sel_split.active_conj_loss.smooth_objective(test_point, mode= "grad"))
-# print("break up of gradients 2", sel_split.likelihood_loss.smooth_objective(test_point, mode= "grad"))
-# print("break up of gradients 3", sel_split.nonnegative_barrier.smooth_objective(test_point, mode= "grad"))
-# print("break up of gradient 4 ", sel_split.cube_loss.smooth_objective(test_point, mode= "grad"))
-#
-print("value of likelihood", sel_split.likelihood_loss.smooth_objective(test_point, mode= "func"))
-#
-inv_cov = np.linalg.inv(M_est.score_cov)
-lik = (M_est.observed_score_state-generative_mean).T.dot(inv_cov).dot(M_est.observed_score_state-generative_mean)/2.
-print("value of likelihood check", lik)
-grad = inv_cov.dot(M_est.observed_score_state-generative_mean)
-print("grad at likelihood loss", grad)
+    np.random.seed(3)  # ensures different y
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, signal=signal)
+    lam = 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+    n, p = X.shape
+
+    loss = rr.glm.gaussian(X, y)
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),weights=dict(zip(np.arange(p), W)), lagrange=1.)
+    randomizer = randomization.isotropic_gaussian((p,), scale=1.)
+
+    M_est = M_estimator_approx(loss, epsilon, penalty, randomizer, 'gaussian', 'parametric')
+    M_est.solve_approx()
+    active = M_est._overall
+    active_set = np.asarray([i for i in range(p) if active[i]])
+    nactive = np.sum(active)
+
+    prior_variance = 1000.
+    noise_variance = sigma ** 2
+
+    generative_mean = np.zeros(p)
+    generative_mean[:nactive] = M_est.initial_soln[active]
+    sel_split = selection_probability_random_lasso(M_est, generative_mean)
+    min = sel_split.minimize2(nstep=200)
+    print(min[0], min[1])
+
+    test_point = np.append(M_est.observed_score_state, np.abs(M_est.initial_soln[M_est._overall]))
+    print("value of likelihood", sel_split.likelihood_loss.smooth_objective(test_point, mode= "func"))
+
+    inv_cov = np.linalg.inv(M_est.score_cov)
+    lik = (M_est.observed_score_state-generative_mean).T.dot(inv_cov).dot(M_est.observed_score_state-generative_mean)/2.
+    print("value of likelihood check", lik)
+    grad = inv_cov.dot(M_est.observed_score_state-generative_mean)
+    print("grad at likelihood loss", grad)
 
 
 

From cd930e7b37795cc5b64790a375f234fd658220a8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 11:42:29 -0700
Subject: [PATCH 062/617] check assertion is raised if delta too small

---
 selection/algorithms/tests/test_debiased_lasso.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index 38fa14483..5dc036a73 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -45,3 +45,4 @@ def test_approx_inverse():
     nt.assert_true(np.fabs(U).max() < delta * 1.001)
     nt.assert_equal(np.argmax(np.fabs(U)), j)
     nt.assert_equal(np.sign(U[j]), -np.sign(soln[j]))
+    nt.assert_raises(ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta)

From f50f1f4884da99e4253c117af870ed059274011a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 12 Aug 2017 11:48:04 -0700
Subject: [PATCH 063/617] DOC: docstring for the approximate inverse

---
 selection/algorithms/debiased_lasso.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 35f4f8ccc..f26c085cd 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -11,6 +11,18 @@ def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1
 
     Find an approximation of j-th row of inverse of Sigma.
 
+    Solves the problem
+
+    .. math::
+
+        \text{min}_{\theta} \frac{1}{2} \theta^TS\theta
+
+    subject to $\|\Sigma \hat{\theta} - e_j\|_{\infty} \leq \delta$ with
+    $e_j$ the $j$-th elementary basis vector and `S` as $\Sigma$, 
+    and `delta` as $\delta$.
+
+    Described in Table 1, display (4) of https://arxiv.org/pdf/1306.3171.pdf
+
     """
     p = Sigma.shape[0]
     elem_basis = np.zeros(p, np.float)

From c58c2c4c2f847d1e09f34d7ab88e6edda62229ba Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 13:25:43 -0700
Subject: [PATCH 064/617] WIP: fixing selection.randomized.tests

---
 selection/randomized/M_estimator.py           | 14 ++++---
 selection/randomized/cv.py                    |  2 +
 selection/randomized/cv_view.py               |  4 +-
 selection/randomized/query.py                 |  6 +--
 selection/randomized/tests/test_condition.py  | 10 +++--
 selection/randomized/tests/test_cv.py         | 40 +++++++++++-------
 .../test_cv_corrected_nonrandomized_lasso.py  | 27 ++++++------
 .../randomized/tests/test_cv_lee_et_al.py     | 13 +++---
 selection/randomized/tests/test_estimation.py | 21 +++++-----
 selection/randomized/tests/test_intervals.py  |  2 -
 .../tests/test_marginalize_subgrad.py         | 41 ++++++++++---------
 selection/randomized/tests/test_naive.py      |  7 +---
 .../randomized/tests/test_nonrandomized.py    | 29 +------------
 selection/randomized/tests/test_power.py      |  1 -
 .../tests/test_randomization_to_zero.py       |  9 ++--
 .../tests/test_without_screening.py           | 29 +++++++------
 16 files changed, 121 insertions(+), 134 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 9d476cf63..95d873732 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -362,15 +362,17 @@ def projection(self, opt_state):
 
     def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
         """
-        Maybe we should allow subgradients of only some variables...
+        ADD DOCSTRING
+
+        conditioning_groups and marginalizing_groups should be disjoint
         """
-        if not self._setup:
-            raise ValueError('setup_sampler should be called before using this function')
 
-        #if marginalizing_groups is not None and self._inactive is not None:
+        if (conditioning_groups * marginalizing_groups).sum() > 0:
+            raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
 
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
 
-        #idx = 0
         groups = np.unique(self.penalty.groups)
         condition_inactive_groups = np.zeros_like(groups, dtype=bool)
         condition_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
@@ -438,8 +440,10 @@ def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
 
 
         self.opt_transform = (new_linear, new_offset)
+
         # for group LASSO this should not induce a bigger jacobian as
         # the subgradients are in the interior of a ball
+
         self.selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
 
         # reset variables
diff --git a/selection/randomized/cv.py b/selection/randomized/cv.py
index ddd3ce5cd..b3c85d198 100644
--- a/selection/randomized/cv.py
+++ b/selection/randomized/cv.py
@@ -26,6 +26,8 @@ def __init__(self, loss, folds, lam_seq, objective_randomization=None, epsilon=N
                 self.epsilon = np.true_divide(1, np.sqrt(n))
         self.K = len(np.unique(self.folds))
 
+        self.ndim = len(lam_seq)
+
     def CV_err(self,
                penalty,
                loss = None,
diff --git a/selection/randomized/cv_view.py b/selection/randomized/cv_view.py
index 6e6fd5708..1031767d9 100644
--- a/selection/randomized/cv_view.py
+++ b/selection/randomized/cv_view.py
@@ -24,7 +24,7 @@ def __init__(self, glm_loss, loss_label, lasso_randomization=None, epsilon=None,
 
     def solve(self, glmnet=False, K=5):
 
-        if glmnet==False:
+        if glmnet == False:
             X, y = self.loss.data
             n, p = X.shape
             if self.loss_label == "gaussian":
@@ -48,6 +48,8 @@ def solve(self, glmnet=False, K=5):
             CV_compute = CV_glmnet(self.loss, self.loss_label)
 
         self.lam_CVR, self.SD, CVR_val, CV1_val, self.lam_seq = CV_compute.choose_lambda_CVR(self.scale1, self.scale2)
+        self.ndim = self.lam_seq.shape[0]
+
         if (self.scale1 is not None) and (self.scale2 is not None):
             self.SD = self.SD+self.scale1**2+self.scale2**2
         (self.observed_opt_state, self.observed_score_state) = (CVR_val, CV1_val)
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 50f429d8f..27162b4ad 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -402,11 +402,11 @@ def __init__(self,
         for i in range(self.nqueries):
             if parametric == False:
                 target_cov, cross_cov = multi_view.form_covariances(target_info,  
-                                  cross_terms=[multi_view.score_info[i]],
-                                  nsample=multi_view.nboot[i])
+                                                                    cross_terms=[multi_view.score_info[i]],
+                                                                    nsample=multi_view.nboot[i])
             else:
                 target_cov, cross_cov = multi_view.form_covariances(target_info, 
-                                  cross_terms=[multi_view.score_info[i]])
+                                                                    cross_terms=[multi_view.score_info[i]])
 
             self.target_cov = target_cov
             self.score_cov.append(cross_cov)
diff --git a/selection/randomized/tests/test_condition.py b/selection/randomized/tests/test_condition.py
index 97204b4e1..b157dddc4 100644
--- a/selection/randomized/tests/test_condition.py
+++ b/selection/randomized/tests/test_condition.py
@@ -37,7 +37,7 @@ def test_condition(s=0,
                    ndraw=10000, burnin=2000,
                    loss='logistic',
                    nviews=1,
-                   scalings=False):
+                   scalings=True):
 
     if loss=="gaussian":
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, signal=signal, sigma=1)
@@ -80,8 +80,12 @@ def test_condition(s=0,
 
         if scalings: # try condition on some scalings
             for i in range(nviews):
-                views[i].decompose_subgradient(conditioning_groups=np.zeros(p, bool),
-                                               marginalizing_groups=np.ones(p, bool))
+                conditioning_groups = np.zeros(p, bool)
+                conditioning_groups[:int(p/2)] = True
+                marginalizing_groups = np.ones(p, bool)
+                marginalizing_groups[:int(p/2)] = False
+                views[i].decompose_subgradient(conditioning_groups=conditioning_groups,
+                                               marginalizing_groups=marginalizing_groups)
                 views[i].condition_on_scalings()
         else:
             for i in range(nviews):
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 62be9d65c..280a84c79 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -20,26 +20,31 @@
 from selection.randomized.cv_view import CV_view
 from statsmodels.sandbox.stats.multicomp import multipletests
 
+if SMALL_SAMPLES:
+    nboot = 10
+else: 
+    nboot = -1
 
 @register_report(['truth', 'cover', 'ci_length_clt', 'naive_pvalues', 'naive_cover', 'ci_length_naive',
                     'active', 'BH_decisions', 'active_var'])
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
-def test_cv(n=100, p=50, s=0, signal=7.5, K=5, rho=0.,
-             randomizer = 'gaussian',
-             randomizer_scale = 1.,
-             scale1 = 0.1,
-             scale2 = 0.2,
-             lam_frac = 1.,
-             loss = 'gaussian',
-             intervals = 'old',
-             bootstrap = False,
-             condition_on_CVR = True,
-             marginalize_subgrad = True,
-             ndraw = 10000,
-             burnin = 2000):
-
+def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
+            randomizer = 'gaussian',
+            randomizer_scale = 1.,
+            scale1 = 0.1,
+            scale2 = 0.2,
+            lam_frac = 1.,
+            loss = 'gaussian',
+            intervals = 'old',
+            bootstrap = False,
+            condition_on_CVR = True,
+            marginalize_subgrad = True,
+            ndraw = 10000,
+            burnin = 2000,
+            nboot = nboot):
+    
     print(n,p,s, condition_on_CVR, scale1, scale2)
     if randomizer == 'laplace':
         randomizer = randomization.laplace((p,), scale=randomizer_scale)
@@ -56,6 +61,7 @@ def test_cv(n=100, p=50, s=0, signal=7.5, K=5, rho=0.,
         glm_loss = rr.glm.logistic(X, y)
 
     epsilon = 1./np.sqrt(n)
+
     # view 1
     cv = CV_view(glm_loss, 
                  loss_label=loss, 
@@ -85,6 +91,9 @@ def test_cv(n=100, p=50, s=0, signal=7.5, K=5, rho=0.,
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
     M_est1 = glm_group_lasso(glm_loss, epsilon, penalty, randomizer)
 
+    if nboot > 0:
+        cv.nboot = M_est1.nboot = nboot
+
     mv = multiple_queries([cv, M_est1])
     mv.solve()
 
@@ -95,6 +104,7 @@ def test_cv(n=100, p=50, s=0, signal=7.5, K=5, rho=0.,
         return None
 
     nonzero = np.where(beta)[0]
+
     if set(nonzero).issubset(np.nonzero(active_union)[0]):
 
         active_set = np.nonzero(active_union)[0]
@@ -180,7 +190,7 @@ def report(niter=50, **kwargs):
     fig.savefig(pdf_label)
 
 
-if __name__ == '__main__':
+def main():
     np.random.seed(500)
     kwargs = {'n': 600, 'p': 20, 's': 0, 'signal': 3.5, 'K': 5, 'rho': 0.,
               'randomizer': 'gaussian', 'randomizer_scale': 1.5,
diff --git a/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py b/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
index 7dc70ff3f..c62abb08c 100644
--- a/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
+++ b/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
@@ -20,19 +20,18 @@
                   'naive_pvalues', 'covered_naive', 'ci_length_naive',
                   'active_var'])
 @set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
-def test_cv_corrected_nonrandomized_lasso(n=3000,
-                                    p=1000,
-                                    s=10,
-                                    signal = 3.5,
-                                    rho = 0.,
-                                    sigma = 1.,
-                                    K = 5,
-                                    loss="gaussian",
-                                    X = None,
-                                    check_screen=True,
-                                    intervals=False):
+def test_cv_corrected_nonrandomized_lasso(n=300,
+                                          p=100,
+                                          s=3,
+                                          signal = 3.5,
+                                          rho = 0.,
+                                          sigma = 1.,
+                                          K = 5,
+                                          loss="gaussian",
+                                          X = None,
+                                          check_screen=True,
+                                          intervals=False):
 
     print (n, p, s, rho)
     if X is not None:
@@ -71,7 +70,6 @@ def test_cv_corrected_nonrandomized_lasso(n=3000,
     L.covariance_estimator = glm_sandwich_estimator(L.loglike, B=2000)
     soln = L.fit()
 
-
     active = soln !=0
     nactive = active.sum()
     print("nactive", nactive)
@@ -86,7 +84,6 @@ def coef_boot(indices):
         # bootstrap of just coefficients
         return selected_boot(indices)[:active.sum()]
 
-
     if (check_screen==False) or (set(truth).issubset(np.nonzero(active)[0])):
 
         active_set = np.nonzero(active)[0]
@@ -101,7 +98,7 @@ def coef_boot(indices):
         # covariance of L.constraints is more accurate than cov[0]
         # but estimates the same thing (i.e. more bootstrap replicates)
         A = cov[1].T.dot(np.linalg.pinv(L.constraints.covariance))
-        residual = CV_val_randomized- A.dot(one_step)
+        residual = CV_val_randomized - A.dot(one_step)
 
         # minimizer indicator
 
diff --git a/selection/randomized/tests/test_cv_lee_et_al.py b/selection/randomized/tests/test_cv_lee_et_al.py
index a30ee0517..8d2899872 100644
--- a/selection/randomized/tests/test_cv_lee_et_al.py
+++ b/selection/randomized/tests/test_cv_lee_et_al.py
@@ -5,8 +5,12 @@
 from selection.tests.instance import gaussian_instance
 from selection.algorithms.lasso import lasso
 import selection.tests.reports as reports
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+
+from selection.tests.flags import SET_SEED
+from selection.tests.decorators import (wait_for_return_value, 
+                                        set_seed_iftrue, 
+                                        set_sampling_params_iftrue, 
+                                        register_report)
 from statsmodels.sandbox.stats.multicomp import multipletests
 from selection.randomized.cv_view import CV_view
 from scipy.stats import norm as ndist
@@ -50,12 +54,11 @@ def F(param):
                   'naive_pvalues', 'covered_naive', 'ci_length_naive',
                   'active_var','BH_decisions'])
 @set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
 def test_lee_et_al(n=300,
                    p=100,
                    s=10,
-                   signal = 3.5,
+                   signal=3.5,
                    rho = 0.,
                    sigma = 1.,
                    cross_validation=True,
@@ -218,7 +221,7 @@ def report(niter=100, design="random", **kwargs):
     fig1.savefig('naive_pvalues.pdf')
 
 
-if __name__ == '__main__':
+def main():
 
     np.random.seed(500)
     kwargs = {'s': 0, 'n': 500, 'p': 100, 'signal': 3.5, 'sigma': 1, 'rho': 0., 'intervals':False,
diff --git a/selection/randomized/tests/test_estimation.py b/selection/randomized/tests/test_estimation.py
index 7d66e699d..cc43db886 100644
--- a/selection/randomized/tests/test_estimation.py
+++ b/selection/randomized/tests/test_estimation.py
@@ -1,9 +1,10 @@
 from __future__ import print_function
 import numpy as np
+import matplotlib.pyplot as plt
 
 from selection.tests.instance import gaussian_instance
 
-def MSE(signal=1, n=100, p=10, s=1):
+def test_MSE(signal=1, n=100, p=10, s=1):
 
     ninstance = 1
     total_mse = 0
@@ -11,6 +12,7 @@ def MSE(signal=1, n=100, p=10, s=1):
     data_instance = gaussian_instance(n, p, s, signal)
     tau = 1.
     for i in range(ninstance):
+
         X, y, true_beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, signal=signal)
         random_Z = np.random.standard_normal(p)
         lam, epsilon, active, betaE, cube, initial_soln = selection(X, y, random_Z) # selection not defined -- is in a file that was deleted
@@ -60,21 +62,20 @@ def MSE_three(signal=5, n=100, p=10, s=0):
     if nvalid_instance > 0:
         return total_mse_mle/float(nvalid_instance), total_mse_unbiased/float(nvalid_instance), total_mse_umvu/float(nvalid_instance)
 
-
 def plot_estimation_three():
     signal_seq = np.linspace(-10, 10, num=50)
     filter = np.zeros(signal_seq.shape[0], dtype=bool)
     mse_mle_seq, mse_unbiased_seq, mse_umvu_seq = [], [], []
 
     for i in range(signal_seq.shape[0]):
-            print("parameter value", signal_seq[i])
-            mse = MSE_three(signal_seq[i])
-            if mse is not None:
-                mse_mle, mse_unbiased, mse_umvu = mse
-                mse_mle_seq.append(mse_mle)
-                mse_unbiased_seq.append(mse_unbiased)
-                mse_umvu_seq.append(mse_umvu)
-                filter[i] = True
+        print("parameter value", signal_seq[i])
+        mse = MSE_three(signal_seq[i])
+        if mse is not None:
+            mse_mle, mse_unbiased, mse_umvu = mse
+            mse_mle_seq.append(mse_mle)
+            mse_unbiased_seq.append(mse_unbiased)
+            mse_umvu_seq.append(mse_umvu)
+            filter[i] = True
 
     plt.clf()
     plt.title("MSE")
diff --git a/selection/randomized/tests/test_intervals.py b/selection/randomized/tests/test_intervals.py
index 3dba7da6f..7ab3deebe 100644
--- a/selection/randomized/tests/test_intervals.py
+++ b/selection/randomized/tests/test_intervals.py
@@ -181,5 +181,3 @@ def report(niter=50, **kwargs):
     fig.savefig('Group_lasso.pdf')
 
 
-if __name__== '__main__':
-    report()
diff --git a/selection/randomized/tests/test_marginalize_subgrad.py b/selection/randomized/tests/test_marginalize_subgrad.py
index 9be105be1..967ba0a82 100644
--- a/selection/randomized/tests/test_marginalize_subgrad.py
+++ b/selection/randomized/tests/test_marginalize_subgrad.py
@@ -31,22 +31,22 @@
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @set_seed_iftrue(SET_SEED)
 @wait_for_return_value()
-def test_marginalize(s=0,
-                    n=600,
-                    p=200,
-                    rho=0.,
-                    signal=3.5,
-                    lam_frac = 2.5,
-                    ndraw=10000,
-                    burnin=2000,
-                    loss='gaussian',
-                    randomizer = 'gaussian',
-                    randomizer_scale = 1.,
-                    nviews=3,
-                    scalings=False,
-                    subgrad =True,
-                    parametric=False,
-                    intervals='old'):
+def test_marginalize(s=4,
+                     n=600,
+                     p=200,
+                     rho=0.,
+                     signal=3.5,
+                     lam_frac = 2.5,
+                     ndraw=10000,
+                     burnin=2000,
+                     loss='gaussian',
+                     randomizer = 'gaussian',
+                     randomizer_scale = 1.,
+                     nviews=3,
+                     scalings=True,
+                     subgrad =True,
+                     parametric=False,
+                     intervals='old'):
     print(n,p,s)
 
     if randomizer == 'laplace':
@@ -98,9 +98,11 @@ def test_marginalize(s=0,
         if nactive==s:
             return None
 
+        # BUG: if this scalings code is moveed after the decompose_subgradient,
+        # code seems to run fine
+
         if scalings: # try condition on some scalings
             for i in range(nviews):
-                views[i].condition_on_subgradient()
                 views[i].condition_on_scalings()
         if subgrad:
             for i in range(nviews):
@@ -108,7 +110,8 @@ def test_marginalize(s=0,
                conditioning_groups[:(p/2)] = True
                marginalizing_groups = np.zeros(p, dtype=bool)
                marginalizing_groups[(p/2):] = True
-               views[i].decompose_subgradient(conditioning_groups=np.zeros(p, dtype=bool), marginalizing_groups=np.ones(p, bool))
+               views[i].decompose_subgradient(conditioning_groups=conditioning_groups, 
+                                              marginalizing_groups=marginalizing_groups)
 
         active_set = np.nonzero(active_union)[0]
         target_sampler, target_observed = glm_target(loss,
@@ -184,5 +187,3 @@ def report(niter=50, **kwargs):
     fig.savefig('marginalized_subgrad_pivots.pdf')
 
 
-if __name__ == '__main__':
-    report()
diff --git a/selection/randomized/tests/test_naive.py b/selection/randomized/tests/test_naive.py
index e9e6708d5..cd2353af5 100644
--- a/selection/randomized/tests/test_naive.py
+++ b/selection/randomized/tests/test_naive.py
@@ -28,11 +28,8 @@ def compute_projection_parameters(n, p, s, signal, rho, sigma, active):
     return proj_param
 
 
-
-
 @register_report(['naive_pvalues', 'covered_naive', 'ci_length_naive', 'active_var'])
 @set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
 def test_naive(n=300,
                p=100,
@@ -168,9 +165,7 @@ def report(niter=50, design="random", **kwargs):
     fig.suptitle("Naive p-values", fontsize=20)
     fig.savefig('naive_pvalues.pdf')
 
-
-if __name__ == '__main__':
-
+def main():
     np.random.seed(500)
     kwargs = {'s': 0, 'n': 100, 'p': 50, 'signal': 3.5, 'sigma': 1, 'rho': 0., 'intervals':True}
     report(niter=100, **kwargs)
diff --git a/selection/randomized/tests/test_nonrandomized.py b/selection/randomized/tests/test_nonrandomized.py
index 23a2be5e3..a1da8b4ae 100644
--- a/selection/randomized/tests/test_nonrandomized.py
+++ b/selection/randomized/tests/test_nonrandomized.py
@@ -9,9 +9,7 @@
 from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
 from selection.tests.flags import SMALL_SAMPLES, SET_SEED
 
-
 @register_report(['pivot', 'covered_clt'])
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @wait_for_return_value()
 def test_nonrandomized(s=0,
                        n=200,
@@ -89,30 +87,5 @@ def report(niter=100, **kwargs):
     fig.savefig('nonrandomized_pivots.pdf') # will have both bootstrap and CLT on plot
 
 
-if __name__=='__main__':
+def main():
     report()
-
-# if __name__=='__main__':
-#
-#     pvals = []
-#     for i in range(100):
-#         print(i)
-#         pval = test_nonrandomized()
-#         print(pval)
-#         if pval is not None:
-#             pvals.append(pval)
-#
-#     import matplotlib.pyplot as plt
-#     import statsmodels.api as sm
-#
-#     fig = plt.figure()
-#     ax = fig.gca()
-#
-#     ecdf = sm.distributions.ECDF(pvals)
-#     G = np.linspace(0, 1)
-#     F = ecdf(G)
-#     ax.plot(G, F, '-o', c='b', lw=2)
-#     ax.plot([0, 1], [0, 1], 'k-', lw=2)
-#     ax.set_xlim([0, 1])
-#     ax.set_ylim([0, 1])
-#     plt.show()
diff --git a/selection/randomized/tests/test_power.py b/selection/randomized/tests/test_power.py
index fe1b8a6a3..d39f67be3 100644
--- a/selection/randomized/tests/test_power.py
+++ b/selection/randomized/tests/test_power.py
@@ -77,7 +77,6 @@ def test_power(s=30,
             lam = cv.one_SD_rule(direction="up")
             print("one SD rule lambda", lam)
 
-
     W = lam_frac * np.ones(p) * lam
     penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
diff --git a/selection/randomized/tests/test_randomization_to_zero.py b/selection/randomized/tests/test_randomization_to_zero.py
index cbbd43a2b..3c2219162 100644
--- a/selection/randomized/tests/test_randomization_to_zero.py
+++ b/selection/randomized/tests/test_randomization_to_zero.py
@@ -128,10 +128,10 @@ def test_multiple_queries_individual_coeff_small(ndraw=10000,
     s, n, p = 3, 100, 20
 
     randomizer = randomization.laplace((p,), scale=1)
-    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0, signal=20.)
+    X, y, beta, true_active = logistic_instance(n=n, p=p, s=s, rho=0, signal=20.)
 
     nonzero = np.where(beta)[0]
-    lam_frac = 3.
+    lam_frac = 1.2
 
     loss = rr.glm.logistic(X, y)
     epsilon = 1.
@@ -155,7 +155,6 @@ def test_multiple_queries_individual_coeff_small(ndraw=10000,
     pvalues = []
     true_beta = beta[active_vars]
 
-    print(nonzero, active_set)
     if set(nonzero).issubset(active_set):
 
         for j in range(nactive):
@@ -188,7 +187,7 @@ def test_parametric_covariance_small(ndraw=10000, burnin=2000, nsim=None): # nsi
     s, n, p = 3, 100, 10
 
     randomizer = randomization.laplace((p,), scale=1)
-    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0, signal=10)
+    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0, signal=15)
 
     nonzero = np.where(beta)[0]
     lam_frac = 1.
@@ -229,7 +228,7 @@ def test_parametric_covariance_small(ndraw=10000, burnin=2000, nsim=None): # nsi
         linear_func[1,-2] = 1. # also null
 
         target_observed = linear_func.dot(target_observed)
-        target_sampler = mv.setup_target((target, linear_func), target_observed)
+        target_sampler = mv.setup_target((target, linear_func), target_observed, parametric=True)
 
         test_stat = lambda x: np.linalg.norm(x)
         pval = target_sampler.hypothesis_test(test_stat, 
diff --git a/selection/randomized/tests/test_without_screening.py b/selection/randomized/tests/test_without_screening.py
index b1735a9f9..c75fb94fd 100644
--- a/selection/randomized/tests/test_without_screening.py
+++ b/selection/randomized/tests/test_without_screening.py
@@ -28,20 +28,20 @@
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @set_seed_iftrue(SET_SEED)
 @wait_for_return_value()
-def test_without_screening(s=30,
-                        n=3000,
-                        p=1000,
-                        rho=0.,
-                        signal=3.5,
-                        lam_frac = 1.,
-                        ndraw=10000,
-                        burnin=2000,
-                        loss='gaussian',
-                        randomizer ='laplace',
-                        randomizer_scale =1.,
-                        scalings=False,
-                        subgrad =True,
-                        check_screen = False):
+def test_without_screening(s=10,
+                           n=300,
+                           p=100,
+                           rho=0.,
+                           signal=3.5,
+                           lam_frac = 1.,
+                           ndraw=10000,
+                           burnin=2000,
+                           loss='gaussian',
+                           randomizer ='laplace',
+                           randomizer_scale =1.,
+                           scalings=False,
+                           subgrad =True,
+                           check_screen=False):
 
     if loss=="gaussian":
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, signal=signal, sigma=1, random_signs=False)
@@ -94,7 +94,6 @@ def test_without_screening(s=30,
         if subgrad:
             M_est.decompose_subgradient(conditioning_groups=np.zeros(p, dtype=bool), marginalizing_groups=np.ones(p, bool))
 
-
         boot_target1, boot_target_observed1 = pairs_bootstrap_glm(loss, active_union, inactive=~active_union)
         boot_target2, boot_target_observed2 = pairs_bootstrap_glm(loss_indep, active_union, inactive=~active_union)
         target_observed = (boot_target_observed1-boot_target_observed2)[:nactive]

From b154e4162313d27244ad82272ae8c4696c9cb5d6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 13:53:14 -0700
Subject: [PATCH 065/617] test for CV_glmnet

---
 selection/randomized/cv_glmnet.py            | 33 ++++----------------
 selection/randomized/tests/test_cv_glmnet.py | 23 ++++++++++++++
 2 files changed, 29 insertions(+), 27 deletions(-)
 create mode 100644 selection/randomized/tests/test_cv_glmnet.py

diff --git a/selection/randomized/cv_glmnet.py b/selection/randomized/cv_glmnet.py
index a23deac6f..d8f5e2850 100644
--- a/selection/randomized/cv_glmnet.py
+++ b/selection/randomized/cv_glmnet.py
@@ -12,10 +12,10 @@ class CV_glmnet(object):
 
     def __init__(self, loss, loss_label):
         self.loss = loss
-        if loss_label=="gaussian":
-            self.family=robjects.StrVector('g')
-        elif loss_label=="logistic":
-            self.family=robjects.StrVector('b')
+        if loss_label == "gaussian":
+            self.family = robjects.StrVector('g')
+        elif loss_label == "logistic":
+            self.family = robjects.StrVector('b')
 
     def using_glmnet(self, loss=None):
         robjects.r('''
@@ -62,13 +62,14 @@ def using_glmnet(self, loss=None):
         if not hasattr(self, 'lam_seq'):
             self.lam_seq = lam_seq
         CV_err = np.array(result[3])
+
         # this is stupid but glmnet sometime cuts my given seq of lambdas
         if CV_err.shape[0]<self.lam_seq.shape[0]:
             CV_err_longer = np.ones(self.lam_seq.shape[0])*np.max(CV_err)
             CV_err_longer[:(self.lam_seq.shape[0]-1)]=CV_err
             CV_err = CV_err_longer
         SD = np.array(result[4])
-        #print("lam_minCV", lam_minCV)
+
         return lam_minCV, lam_1SE, lam_seq, CV_err, SD
 
 
@@ -98,7 +99,6 @@ def bootstrap_CVR_curve(self, scale1=None, scale2=None):
 
         def _bootstrap_CVerr_curve(indices):
             loss_star = self.loss.subsample(indices)
-            # loss_star = rr.glm.gaussian(X[indices,:], y[indices])
             _, _, CVR_val, CV1_val, _ = self.choose_lambda_CVR(scale1, scale2, loss_star)
             return np.array(CVR_val), np.array(CV1_val)
 
@@ -111,24 +111,3 @@ def _CV1_boot(indices):
         return _CVR_boot, _CV1_boot
 
 
-if __name__ == '__main__':
-    np.random.seed(2)
-    n, p = 3000, 1000
-    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=30, rho=0., sigma=1)
-    loss = rr.glm.gaussian(X,y)
-    CV_glmnet_compute = CV_glmnet(loss)
-    lam_CV, lam_1SD, lam_seq, CV_err, SD = CV_glmnet_compute.using_glmnet()
-    print("CV error curve (nonrandomized):", CV_err)
-    lam_grid_size = CV_glmnet_compute.lam_seq.shape[0]
-    lam_CVR, SD, CVR, CV1, lam_seq = CV_glmnet_compute.choose_lambda_CVR(scale1=0.1, scale2=0.1)
-    print("nonrandomized index:", list(lam_seq).index(lam_CV)) # index of the minimizer
-    print("lam for nonrandomized CV plus sigma rule:",lam_CV,lam_1SD)
-    print("lam_CVR:",lam_CVR)
-    print("randomized index:", list(lam_seq).index(lam_CVR))
-    import matplotlib.pyplot as plt
-    plt.plot(np.log(lam_seq), CV_err)
-    plt.plot(np.log(lam_seq), CVR)
-    #plt.ylabel('some numbers')
-    plt.show()
-
-
diff --git a/selection/randomized/tests/test_cv_glmnet.py b/selection/randomized/tests/test_cv_glmnet.py
new file mode 100644
index 000000000..cd0b05a7a
--- /dev/null
+++ b/selection/randomized/tests/test_cv_glmnet.py
@@ -0,0 +1,23 @@
+import numpy as np
+import regreg.api as rr
+
+from ..cv_glmnet import CV_glmnet
+from ...tests.instance import gaussian_instance
+
+def test_cv_glmnet():
+    np.random.seed(2)
+    n, p = 3000, 1000
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=30, rho=0., sigma=1)
+    loss = rr.glm.gaussian(X,y)
+    CV_glmnet_gaussian = CV_glmnet(loss, 'gaussian')
+    lam_CV, lam_1SD, lam_seq, CV_err, SD = CV_glmnet_gaussian.using_glmnet()
+    print("CV error curve (nonrandomized):", CV_err)
+    lam_grid_size = CV_glmnet_gaussian.lam_seq.shape[0]
+    lam_CVR, SD, CVR, CV1, lam_seq = CV_glmnet_gaussian.choose_lambda_CVR(scale1=0.1, scale2=0.1)
+    print("nonrandomized index:", list(lam_seq).index(lam_CV)) # index of the minimizer
+    print("lam for nonrandomized CV plus sigma rule:",lam_CV,lam_1SD)
+    print("lam_CVR:",lam_CVR)
+    print("randomized index:", list(lam_seq).index(lam_CVR))
+
+
+

From 6df141a9da55b9dcae01d9fcd8695b6ef7bc44a3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:09:34 -0700
Subject: [PATCH 066/617] BF: rpy2 not installed by default on travis -- want
 to make sure CV still runs

---
 selection/randomized/cv_glmnet.py     | 27 +++++++++++++++++++--------
 selection/randomized/cv_view.py       |  8 +-------
 selection/randomized/tests/test_cv.py | 19 ++++++++++++++++++-
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/selection/randomized/cv_glmnet.py b/selection/randomized/cv_glmnet.py
index d8f5e2850..612ea397e 100644
--- a/selection/randomized/cv_glmnet.py
+++ b/selection/randomized/cv_glmnet.py
@@ -1,12 +1,23 @@
-from rpy2.robjects.packages import importr
-from rpy2 import robjects
-glmnet = importr('glmnet')
-from selection.tests.instance import gaussian_instance
-import rpy2.robjects.numpy2ri
-rpy2.robjects.numpy2ri.activate()
+"""
+This module uses glmnet to run CV as part of cv_view.
+
+If a user attempts to import the module without rpy2 installed, it
+will raise an ImportError. So, this should not be in any api import.
+"""
+
 import numpy as np
 import regreg.api as rr
-from selection.api import randomization
+
+from ..tests.instance import gaussian_instance
+from .randomization import randomization
+
+try:
+    from rpy2.robjects.packages import importr
+    from rpy2 import robjects
+    import rpy2.robjects.numpy2ri
+    rpy2.robjects.numpy2ri.activate()
+except ImportError:
+    raise ImportError('rpy2 seems not to be installed')
 
 class CV_glmnet(object):
 
@@ -64,7 +75,7 @@ def using_glmnet(self, loss=None):
         CV_err = np.array(result[3])
 
         # this is stupid but glmnet sometime cuts my given seq of lambdas
-        if CV_err.shape[0]<self.lam_seq.shape[0]:
+        if CV_err.shape[0] < self.lam_seq.shape[0]:
             CV_err_longer = np.ones(self.lam_seq.shape[0])*np.max(CV_err)
             CV_err_longer[:(self.lam_seq.shape[0]-1)]=CV_err
             CV_err = CV_err_longer
diff --git a/selection/randomized/cv_view.py b/selection/randomized/cv_view.py
index 1031767d9..6c4e8d3bc 100644
--- a/selection/randomized/cv_view.py
+++ b/selection/randomized/cv_view.py
@@ -28,14 +28,12 @@ def solve(self, glmnet=False, K=5):
             X, y = self.loss.data
             n, p = X.shape
             if self.loss_label == "gaussian":
-                # lam_seq = np.mean(np.fabs(np.dot(X.T, y)))
-                lam_seq = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000))) +\
+                lam_seq = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000))) +
                                           self.lasso_randomization.sample((1000,))).max(0))
             elif self.loss_label == 'logistic':
                 lam_seq = np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 1000))) +\
                           self.lasso_randomization.sample((1000,))).max(0))
             self.lam_seq = np.exp(np.linspace(np.log(1.e-3), np.log(1), 30)) * lam_seq
-            # lam_seq = np.exp(np.linspace(np.log(1.e-2), np.log(2), 30)) * np.fabs(X.T.dot(y)+lasso_randomization.sample((10,))).max()
 
             folds = np.arange(n) % K
             np.random.shuffle(folds)
@@ -67,7 +65,6 @@ def solve(self, glmnet=False, K=5):
             self.CVR_boot, self.CV1_boot = CV_compute.bootstrap_CVR_curve(self.scale1, self.scale2)
             self._solved = True
 
-
     def setup_sampler(self):
         return self.CV1_boot
 
@@ -96,7 +93,6 @@ def condition_on_opt_state(self):
         self.opt_transform = (None, self.observed_opt_state)
 
 
-#DEBUG = True
 def projection(Z, idx):
     Z = np.asarray(Z)
     keep = np.ones_like(Z, np.bool)
@@ -117,8 +113,6 @@ def projection(Z, idx):
     if root_found:
         val = (np.sum(Z_sort[:(i+1)]) + Z[idx]) / (i+2)
         dval = val - Z[idx] + np.sum(keep * (Z <= val) * (val - Z))
-        #if DEBUG:
-        #    print('derivative is:', dval)
     else:
         val = np.mean(Z)
 
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 280a84c79..f8c959173 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -36,6 +36,7 @@ def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
             scale1 = 0.1,
             scale2 = 0.2,
             lam_frac = 1.,
+            glmnet = True,
             loss = 'gaussian',
             intervals = 'old',
             bootstrap = False,
@@ -69,7 +70,23 @@ def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
                  epsilon=epsilon, 
                  scale1=scale1, 
                  scale2=scale2)
-    cv.solve(glmnet=True)
+    if glmnet:
+        try:
+            cv.solve(glmnet=glmnet)
+        except ImportError:
+            cv.solve(glmnet=False)
+    else:
+        cv.solve(glmnet=False)
+
+    # for the test make sure we also run the python code
+
+    cv_py = CV_view(glm_loss, 
+                    loss_label=loss, 
+                    lasso_randomization=randomizer, 
+                    epsilon=epsilon, 
+                    scale1=scale1, 
+                    scale2=scale2)
+    cv_py.solve(glmnet=False)
 
     lam = cv.lam_CVR
     print("lam", lam)

From c8c95557375f6d50a627d6d57e018840fa1be6c0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:17:49 -0700
Subject: [PATCH 067/617] WIP: updating travis script to ensure rpy2 tests get
 run

---
 .travis.yml                                   |  6 ++++++
 dev-requirements.txt                          |  5 +++++
 doc-requirements.txt                          | 10 ++++++++++
 requirements.txt                              |  1 +
 selection/info.py                             |  2 --
 selection/sampling/tests/test_pca_langevin.py |  7 ++++---
 6 files changed, 26 insertions(+), 5 deletions(-)
 create mode 100644 dev-requirements.txt
 create mode 100644 doc-requirements.txt

diff --git a/.travis.yml b/.travis.yml
index 05b5b91f5..e3cf40672 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,6 +9,7 @@ addons:
     packages:
         - libblas-dev
         - liblapack-dev
+	- r-base-dev
 env:
     global:
         # Maximal dependencies
@@ -36,6 +37,11 @@ script:
     - cd for_testing
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
     - 'echo "backend : agg" > matplotlibrc'
+    - sudo apt-get update
+    - sudo apt-get install -y r-base r-base-dev
+    - sudo Rscript -e "install.packages(c('glmnet', 'Matrix'), repos='http://cloud.r-project.org');"
+    - pip install -r doc-requirements.txt # installs rpy2 among other things
+
     # Doctests only on platforms that have compatible fp output
     - if [ `uname` == "Darwin" ] ||
       [ "${TRAVIS_PYTHON_VERSION:0:1}" == "3" ]; then
diff --git a/dev-requirements.txt b/dev-requirements.txt
new file mode 100644
index 000000000..1f2dea29b
--- /dev/null
+++ b/dev-requirements.txt
@@ -0,0 +1,5 @@
+# Requirements for developing regreg
+# Check these dependencies against regreg/info.py
+-r requirements.txt
+cython>=0.18
+nose
diff --git a/doc-requirements.txt b/doc-requirements.txt
new file mode 100644
index 000000000..84e1679eb
--- /dev/null
+++ b/doc-requirements.txt
@@ -0,0 +1,10 @@
+# Requirements for building docs
+# Check these dependencies against doc/conf.py
+-r dev-requirements.txt
+sphinx>=1.4
+numpydoc
+matplotlib
+texext
+rpy2
+nb2plots
+sklearn
diff --git a/requirements.txt b/requirements.txt
index 6c0c8d676..54ee26eba 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ pyinter
 statsmodels
 sklearn
 pyinter
+
diff --git a/selection/info.py b/selection/info.py
index d60bb874a..5edfc6207 100644
--- a/selection/info.py
+++ b/selection/info.py
@@ -44,7 +44,6 @@
 NUMPY_MIN_VERSION='1.3'
 SCIPY_MIN_VERSION = '0.7'
 CYTHON_MIN_VERSION = '0.11.1'
-SKLEARN_MIN_VERSION = "0.14.1"
 MPMATH_MIN_VERSION = "0.18"
 PYINTER_MIN_VERSION = "0.1.6"
 
@@ -69,6 +68,5 @@
 PROVIDES            = ["fixed_lambda"]
 REQUIRES            = ["numpy (>=%s)" % NUMPY_MIN_VERSION,
                        "scipy (>=%s)" % SCIPY_MIN_VERSION,
-                       "sklearn (>=%s)" % SKLEARN_MIN_VERSION,
                        "mpmath (>=%s)" % MPMATH_MIN_VERSION,
                        "pyinter"]
diff --git a/selection/sampling/tests/test_pca_langevin.py b/selection/sampling/tests/test_pca_langevin.py
index 9071fbed9..e249c9b5d 100644
--- a/selection/sampling/tests/test_pca_langevin.py
+++ b/selection/sampling/tests/test_pca_langevin.py
@@ -52,14 +52,15 @@ def _grad_log_wishart_white(eigenvals, n):
 
 def main(n=50):
 
-    from sklearn.isotonic import IsotonicRegression
+    from regreg.atoms._isotonic import _isotonic_regression
     import matplotlib.pyplot as plt
     initial = np.ones(n) + 0.01 * np.random.standard_normal(n)
     grad_map = lambda val: _grad_log_wishart_white(val, n)
 
     def projection_map(vals):
-        iso = IsotonicRegression(y_min=1.e-6)
-        vals = np.asarray(vals)
+        iso = np.zeros_like(vals)
+        _isotonic_regression(vals, np.ones_like(vals), iso)
+        vals = np.asarray(iso)
         return np.maximum(vals, 1.e-6)
 
     sampler = projected_langevin(initial,

From f1353b7dc3aba3a36778f2d4890d90305e32f338 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:19:04 -0700
Subject: [PATCH 068/617] BF: tab in travis file

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index e3cf40672..4dbb193c7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ addons:
     packages:
         - libblas-dev
         - liblapack-dev
-	- r-base-dev
+        - r-base-dev
 env:
     global:
         # Maximal dependencies

From 94b5e36e20d006bca876575a727a72ebe97d55b8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:24:06 -0700
Subject: [PATCH 069/617] BF: cd'ed into new directory too soon

---
 .travis.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 4dbb193c7..7ec6ae4ab 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,9 +32,6 @@ install:
 # command to run tests, e.g. python setup.py test
 script:
     - pip install nose
-    # Change into an innocuous directory and find tests from installation
-    - mkdir for_testing
-    - cd for_testing
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
     - 'echo "backend : agg" > matplotlibrc'
     - sudo apt-get update
@@ -42,6 +39,10 @@ script:
     - sudo Rscript -e "install.packages(c('glmnet', 'Matrix'), repos='http://cloud.r-project.org');"
     - pip install -r doc-requirements.txt # installs rpy2 among other things
 
+    # Change into an innocuous directory and find tests from installation
+    - mkdir for_testing
+    - cd for_testing
+
     # Doctests only on platforms that have compatible fp output
     - if [ `uname` == "Darwin" ] ||
       [ "${TRAVIS_PYTHON_VERSION:0:1}" == "3" ]; then

From f1eed057acdc0dca244a974412e69529ce56b692 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:31:27 -0700
Subject: [PATCH 070/617] BF: travis still not installing rpy2

---
 dev-requirements.txt | 1 -
 doc-requirements.txt | 1 -
 2 files changed, 2 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1f2dea29b..b9a60175e 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,5 +1,4 @@
 # Requirements for developing regreg
 # Check these dependencies against regreg/info.py
 -r requirements.txt
-cython>=0.18
 nose
diff --git a/doc-requirements.txt b/doc-requirements.txt
index 84e1679eb..1b77f35d6 100644
--- a/doc-requirements.txt
+++ b/doc-requirements.txt
@@ -7,4 +7,3 @@ matplotlib
 texext
 rpy2
 nb2plots
-sklearn

From 18e89c63b122dea677a08965616e2ac0d6af4480 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:32:41 -0700
Subject: [PATCH 071/617] BF: snr to signal

---
 selection/sampling/tests/test_kfstep.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/sampling/tests/test_kfstep.py b/selection/sampling/tests/test_kfstep.py
index fcce6734a..4670c7900 100644
--- a/selection/sampling/tests/test_kfstep.py
+++ b/selection/sampling/tests/test_kfstep.py
@@ -62,7 +62,7 @@ def _projection(state):
 
 def test_kfstep(k=4, s=3, n=100, p=10, Langevin_steps=10000, burning=2000):
 
-    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0, snr=10)
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, random_signs=True, s=s, sigma=1.,rho=0, signal=10)
     epsilon = 0.
 
     randomization = laplace(loc=0, scale=1.)

From 06f5043f6a56d781ad188fbd664239fcc9d95f25 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:36:39 -0700
Subject: [PATCH 072/617] trying to fix imports so exception is not raised
 without rpy2

---
 selection/randomized/cv_glmnet.py | 17 ++++++++++++-----
 selection/randomized/cv_view.py   |  9 +++++----
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/selection/randomized/cv_glmnet.py b/selection/randomized/cv_glmnet.py
index 612ea397e..f56a1d976 100644
--- a/selection/randomized/cv_glmnet.py
+++ b/selection/randomized/cv_glmnet.py
@@ -5,6 +5,7 @@
 will raise an ImportError. So, this should not be in any api import.
 """
 
+import warnings
 import numpy as np
 import regreg.api as rr
 
@@ -16,19 +17,25 @@
     from rpy2 import robjects
     import rpy2.robjects.numpy2ri
     rpy2.robjects.numpy2ri.activate()
+    have_rpy2 = True
 except ImportError:
-    raise ImportError('rpy2 seems not to be installed')
+    warnings.warn('rpy2 seems not to be installed -- CV_glmnet class will not work')
+    hav_rpy2 = False
 
 class CV_glmnet(object):
 
     def __init__(self, loss, loss_label):
         self.loss = loss
-        if loss_label == "gaussian":
-            self.family = robjects.StrVector('g')
-        elif loss_label == "logistic":
-            self.family = robjects.StrVector('b')
+        if have_rpy2:
+            if loss_label == "gaussian":
+                self.family = robjects.StrVector('g')
+            elif loss_label == "logistic":
+                self.family = robjects.StrVector('b')
+            importr('glmnet')
 
     def using_glmnet(self, loss=None):
+        if not have_rpy2:
+            raise ImportError("""rpy2 failed to load""")
         robjects.r('''
             glmnet_cv = function(X,y, family, lam_seq=NA){
             y = as.matrix(y)
diff --git a/selection/randomized/cv_view.py b/selection/randomized/cv_view.py
index 6c4e8d3bc..05d9f71c1 100644
--- a/selection/randomized/cv_view.py
+++ b/selection/randomized/cv_view.py
@@ -1,11 +1,12 @@
 import functools
 import numpy as np
 import regreg.api as rr
+
 from .query import query
-from selection.randomized.cv import CV
-from selection.randomized.cv_glmnet import CV_glmnet
-from selection.randomized.glm import bootstrap_cov
-from selection.api import randomization
+from .cv import CV
+from .cv_glmnet import CV_glmnet
+from .glm import bootstrap_cov
+from .randomization import randomization
 
 class CV_view(query):
 

From 2c98f75dcfc343d73922db98174fe98cab2ce06d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:44:55 -0700
Subject: [PATCH 073/617] using python-rpy2 apt package

---
 .travis.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 7ec6ae4ab..486572d24 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,11 +9,17 @@ addons:
     packages:
         - libblas-dev
         - liblapack-dev
-        - r-base-dev
 env:
     global:
         # Maximal dependencies
         - DEPENDS="cython numpy scipy matplotlib"
+matrix:
+  include:
+    - python: 3.5
+      sudo: true
+      dist: trusty
+      env:
+        - DOC_BUILD=1
 before_install:
   - source travis-tools/utils.sh
   - travis_before_install
@@ -35,7 +41,7 @@ script:
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
     - 'echo "backend : agg" > matplotlibrc'
     - sudo apt-get update
-    - sudo apt-get install -y r-base r-base-dev
+    - sudo apt-get install -y r-base r-base-dev python-rpy2
     - sudo Rscript -e "install.packages(c('glmnet', 'Matrix'), repos='http://cloud.r-project.org');"
     - pip install -r doc-requirements.txt # installs rpy2 among other things
 

From 86a3e51c6f7da0a178d749d0871844a6899abaf1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:48:46 -0700
Subject: [PATCH 074/617] BF: hav_rpy->have_rpy; using trusty

---
 .travis.yml                       | 2 +-
 selection/randomized/cv_glmnet.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 486572d24..72fa30f42 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,5 @@
 language: python
+dist: trusty
 python:
   - 2.7
   - 3.3
@@ -17,7 +18,6 @@ matrix:
   include:
     - python: 3.5
       sudo: true
-      dist: trusty
       env:
         - DOC_BUILD=1
 before_install:
diff --git a/selection/randomized/cv_glmnet.py b/selection/randomized/cv_glmnet.py
index f56a1d976..86206a2d4 100644
--- a/selection/randomized/cv_glmnet.py
+++ b/selection/randomized/cv_glmnet.py
@@ -20,7 +20,8 @@
     have_rpy2 = True
 except ImportError:
     warnings.warn('rpy2 seems not to be installed -- CV_glmnet class will not work')
-    hav_rpy2 = False
+    have_rpy2 = False
+    pass
 
 class CV_glmnet(object):
 

From 83f9a873feb8467259215e725d5aa2356ae8f670 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 14:55:46 -0700
Subject: [PATCH 075/617] removing rpy2 as doc requirement

---
 doc-requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc-requirements.txt b/doc-requirements.txt
index 1b77f35d6..a006abf8f 100644
--- a/doc-requirements.txt
+++ b/doc-requirements.txt
@@ -5,5 +5,4 @@ sphinx>=1.4
 numpydoc
 matplotlib
 texext
-rpy2
 nb2plots

From 59a6eb639027d3667ef4da8094f560f9a3e559c8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 15:12:05 -0700
Subject: [PATCH 076/617] BF: shapes of identity in selector

---
 selection/randomized/M_estimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 95d873732..bc7660f39 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -536,8 +536,8 @@ def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
         X_restricted = X[:,active]
         loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
     else:
-        I_restricted = ra.selector(active, X.input_shape[0], ra.identity(X.input_shape))
-        loss_restricted = rr.affine_smooth(Mest_loss, I_restricted)
+        I_restricted = ra.selector(active, X.input_shape[0], ra.identity((active.sum(),)))
+        loss_restricted = rr.affine_smooth(Mest_loss, I_restricted.T)
     beta_E = loss_restricted.solve(**solve_args)
     
     return beta_E

From 020e29c86db8d0c311db42e40ba9bf305c64530c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 15:39:54 -0700
Subject: [PATCH 077/617] making CV methods use python when glmnet not
 available

---
 .../test_cv_corrected_nonrandomized_lasso.py  | 36 ++++++------
 .../randomized/tests/test_cv_lee_et_al.py     | 34 +++++------
 selection/randomized/tests/test_naive.py      | 38 +++++++------
 selection/randomized/tests/test_power.py      | 57 ++++++++++---------
 selection/randomized/tests/test_sqrt_lasso.py | 41 ++++---------
 5 files changed, 96 insertions(+), 110 deletions(-)

diff --git a/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py b/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
index c62abb08c..dfe1c5ec1 100644
--- a/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
+++ b/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
@@ -2,19 +2,20 @@
 from scipy.stats import norm as ndist
 import pandas as pd
 import regreg.api as rr
-import selection.api as sel
-from selection.tests.instance import (gaussian_instance, logistic_instance)
-from selection.randomized.glm import (pairs_bootstrap_glm,
-                                      glm_nonparametric_bootstrap)
-from selection.algorithms.lasso import (glm_sandwich_estimator,
+
+from ...tests.instance import (gaussian_instance, logistic_instance)
+import selection.tests.reports as reports
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+
+from ...algorithms.lasso import (glm_sandwich_estimator,
                                         lasso)
-from selection.constraints.affine import (constraints,
+from ..glm import (pairs_bootstrap_glm,
+                                      glm_nonparametric_bootstrap)
+from ...constraints.affine import (constraints,
                                           stack)
-from selection.randomized.cv_view import CV_view
-import selection.tests.reports as reports
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
-from selection.randomized.tests.test_cv_lee_et_al import pivot, equal_tailed_interval
+from ..cv_view import CV_view, have_glmnet
+from .test_cv_lee_et_al import pivot, equal_tailed_interval
 
 @register_report(['pvalue', 'cover', 'ci_length_clt',
                   'naive_pvalues', 'covered_naive', 'ci_length_naive',
@@ -24,13 +25,14 @@
 def test_cv_corrected_nonrandomized_lasso(n=300,
                                           p=100,
                                           s=3,
-                                          signal = 3.5,
-                                          rho = 0.,
-                                          sigma = 1.,
-                                          K = 5,
+                                          signal=3.5,
+                                          rho=0.,
+                                          sigma=1.,
+                                          K=5,
                                           loss="gaussian",
-                                          X = None,
+                                          X=None,
                                           check_screen=True,
+                                          glmnet=True,
                                           intervals=False):
 
     print (n, p, s, rho)
@@ -58,7 +60,7 @@ def test_cv_corrected_nonrandomized_lasso(n=300,
     cv = CV_view(glm_loss, loss_label=loss, lasso_randomization=None, epsilon=None,
                  scale1=0.01, scale2=0.01)
     # views.append(cv)
-    cv.solve(glmnet=True)
+    cv.solve(glmnet=glmnet and have_glmnet)
     lam_CV_randomized = cv.lam_CVR
     print("minimizer of CVR", lam_CV_randomized)
 
diff --git a/selection/randomized/tests/test_cv_lee_et_al.py b/selection/randomized/tests/test_cv_lee_et_al.py
index 8d2899872..9fb7c0e61 100644
--- a/selection/randomized/tests/test_cv_lee_et_al.py
+++ b/selection/randomized/tests/test_cv_lee_et_al.py
@@ -1,21 +1,22 @@
 import numpy as np
 import regreg.api as rr
 import pandas as pd
-import selection.api as sel
-from selection.tests.instance import gaussian_instance
-from selection.algorithms.lasso import lasso
-import selection.tests.reports as reports
-
-from selection.tests.flags import SET_SEED
-from selection.tests.decorators import (wait_for_return_value, 
-                                        set_seed_iftrue, 
-                                        set_sampling_params_iftrue, 
-                                        register_report)
-from statsmodels.sandbox.stats.multicomp import multipletests
-from selection.randomized.cv_view import CV_view
 from scipy.stats import norm as ndist
 from scipy.optimize import bisect
-from selection.randomized.query import (naive_pvalues, naive_confidence_intervals)
+from statsmodels.sandbox.stats.multicomp import multipletests
+
+from ...tests.instance import gaussian_instance
+from ...algorithms.lasso import lasso
+
+import selection.tests.reports as reports
+from ...tests.flags import SET_SEED
+from ...tests.decorators import (wait_for_return_value, 
+                                 set_seed_iftrue, 
+                                 set_sampling_params_iftrue, 
+                                 register_report)
+
+from ..cv_view import (CV_view, have_glmnet)
+from ..query import (naive_pvalues, naive_confidence_intervals)
 
 
 def restricted_gaussian(Z, interval=[-5.,5.]):
@@ -63,8 +64,9 @@ def test_lee_et_al(n=300,
                    sigma = 1.,
                    cross_validation=True,
                    condition_on_CVR=False,
-                   lam_frac = 0.6,
-                   X = None,
+                   lam_frac=0.6,
+                   glmnet=True,
+                   X=None,
                    check_screen=True,
                    intervals=False):
 
@@ -83,7 +85,7 @@ def test_lee_et_al(n=300,
         cv = CV_view(rr.glm.gaussian(X,y), loss_label="gaussian", lasso_randomization=None, epsilon=None,
                      scale1=None, scale2=None)
         # views.append(cv)
-        cv.solve(glmnet=True)
+        cv.solve(glmnet=glmnet and have_glmnet)
         lam = cv.lam_CVR
         print("minimizer of CVR", lam)
 
diff --git a/selection/randomized/tests/test_naive.py b/selection/randomized/tests/test_naive.py
index cd2353af5..31b7309c3 100644
--- a/selection/randomized/tests/test_naive.py
+++ b/selection/randomized/tests/test_naive.py
@@ -1,17 +1,18 @@
 import numpy as np
 import regreg.api as rr
 import pandas as pd
-import selection.api as sel
-from selection.tests.instance import gaussian_instance
-from selection.algorithms.lasso import lasso
-import selection.tests.reports as reports
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
-from statsmodels.sandbox.stats.multicomp import multipletests
-from selection.randomized.cv_view import CV_view
 from scipy.stats import norm as ndist
 from scipy.optimize import bisect
-from selection.randomized.query import (naive_pvalues, naive_confidence_intervals)
+
+from statsmodels.sandbox.stats.multicomp import multipletests
+
+from ...tests.instance import gaussian_instance
+from ...algorithms.lasso import lasso
+import selection.tests.reports as reports
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+from ..cv_view import CV_view, have_glmnet
+from ..query import (naive_pvalues, naive_confidence_intervals)
 
 def compute_projection_parameters(n, p, s, signal, rho, sigma, active):
     multiple = 10**2
@@ -34,16 +35,17 @@ def compute_projection_parameters(n, p, s, signal, rho, sigma, active):
 def test_naive(n=300,
                p=100,
                s=10,
-               signal = 3.5,
-               rho = 0.,
-               sigma = 1.,
+               signal=3.5,
+               rho=0.,
+               sigma=1.,
                cross_validation=True,
                condition_on_CVR=False,
-               lam_frac = 1.,
-               X = None,
-               check_screen = False,
-               check_projection_param = False,
-               check_selected_param = True,
+               lam_frac=1.,
+               X=None,
+               glmnet=True,
+               check_screen=False,
+               check_projection_param=False,
+               check_selected_param=True,
                intervals = False):
 
     print(n, p, s)
@@ -62,7 +64,7 @@ def test_naive(n=300,
         cv = CV_view(rr.glm.gaussian(X,y), loss_label="gaussian", lasso_randomization=None, epsilon=None,
                      scale1=None, scale2=None)
 
-        cv.solve(glmnet=True)
+        cv.solve(glmnet=glmnet and have_glmnet)
         lam = cv.lam_CVR
 
         if condition_on_CVR:
diff --git a/selection/randomized/tests/test_power.py b/selection/randomized/tests/test_power.py
index d39f67be3..af39aeeef 100644
--- a/selection/randomized/tests/test_power.py
+++ b/selection/randomized/tests/test_power.py
@@ -1,25 +1,25 @@
 from __future__ import print_function
 import numpy as np
+from statsmodels.sandbox.stats.multicomp import multipletests
 
 import regreg.api as rr
-import selection.tests.reports as reports
-
-
-from selection.tests.flags import SET_SEED, SMALL_SAMPLES
-from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.tests.decorators import (wait_for_return_value,
-                                        set_seed_iftrue,
-                                        set_sampling_params_iftrue,
-                                        register_report)
-import selection.tests.reports as reports
-
-from selection.api import (randomization,
-                           glm_group_lasso,
-                           glm_group_lasso_parametric,
-                           multiple_queries,
-                           glm_target)
-from statsmodels.sandbox.stats.multicomp import multipletests
-from selection.randomized.cv_view import CV_view
+
+from ...tests.flags import SET_SEED, SMALL_SAMPLES
+from ...tests.instance import logistic_instance, gaussian_instance
+from ...tests.decorators import (wait_for_return_value,
+                                 set_seed_iftrue,
+                                 set_sampling_params_iftrue,
+                                 register_report)
+from ...tests.reports import (reports,
+                              collect_multiple_runs,
+                              pivot_plot_simple)
+
+from ..api import (randomization,
+                   glm_group_lasso,
+                   glm_group_lasso_parametric,
+                   multiple_queries,
+                   glm_target)
+from ..cv_view import CV_view, have_glmnet
 
 
 @register_report(['pvalue', 'active_var'])
@@ -42,7 +42,8 @@ def test_power(s=30,
                loss='gaussian',
                scalings=False,
                subgrad =True,
-               parametric=True):
+               parametric=True,
+               glmnet=True):
 
     print(n,p,s)
     if loss=="gaussian":
@@ -67,7 +68,7 @@ def test_power(s=30,
         cv = CV_view(glm_loss, loss_label=loss, lasso_randomization=randomizer, epsilon=epsilon,
                      scale1=0.01, scale2=0.01)
         #views.append(cv)
-        cv.solve(glmnet=True)
+        cv.solve(glmnet=glmnet and have_glmnet)
         lam = cv.lam_CVR
         print("minimizer of CVR", lam)
 
@@ -157,14 +158,14 @@ def simple_rejections(pvalues, active_var, s, alpha=0.05):
 
 def report(niter=50, **kwargs):
     np.random.seed(500)
-    condition_report = reports.reports['test_power']
-    runs = reports.collect_multiple_runs(condition_report['test'],
-                                         condition_report['columns'],
-                                         niter,
-                                         reports.summarize_all,
-                                         **kwargs)
-
-    fig = reports.pivot_plot_simple(runs)
+    condition_report = reports['test_power']
+    runs = collect_multiple_runs(condition_report['test'],
+                                 condition_report['columns'],
+                                 niter,
+                                 reports.summarize_all,
+                                 **kwargs)
+
+    fig = pivot_plot_simple(runs)
     fig.savefig('marginalized_subgrad_pivots.pdf')
 
 
diff --git a/selection/randomized/tests/test_sqrt_lasso.py b/selection/randomized/tests/test_sqrt_lasso.py
index f523fa2aa..08af3fdda 100644
--- a/selection/randomized/tests/test_sqrt_lasso.py
+++ b/selection/randomized/tests/test_sqrt_lasso.py
@@ -1,22 +1,18 @@
 import numpy as np
 
 import regreg.api as rr
-from selection.api import (randomization,
-                           glm_group_lasso,
-                           multiple_queries,
-                           glm_target)
-from selection.tests.instance import (gaussian_instance,
+from ..api import (randomization,
+                   glm_group_lasso,
+                   multiple_queries,
+                   glm_target)
+from ...tests.instance import (gaussian_instance,
                                       logistic_instance)
-from selection.algorithms.sqrt_lasso import (sqlasso_objective,
-                                             choose_lambda)
-from selection.randomized.query import naive_confidence_intervals
-from selection.randomized.query import naive_pvalues
-
-import selection.tests.reports as reports
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
-from selection.randomized.cv_view import CV_view
+from ...algorithms.sqrt_lasso import (sqlasso_objective,
+                                      choose_lambda)
+from ..query import naive_confidence_intervals, naive_pvalues
 
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
 
 def choose_lambda_with_randomization(X, randomization, quantile=0.90, ndraw=10000):
     X = rr.astransform(X)
@@ -162,20 +158,3 @@ def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
         return pivots_truth, sel_covered, sel_length, naive_pvals, naive_covered, naive_length, active_var, BH_desicions, active_var
 
 
-def report(niter=10, **kwargs):
-
-    kwargs = {'s': 30, 'n': 3000, 'p': 1000, 'signal': 3.5, 'bootstrap': False}
-    intervals_report = reports.reports['test_cv']
-    CV_runs = reports.collect_multiple_runs(intervals_report['test'],
-                                             intervals_report['columns'],
-                                             niter,
-                                             reports.summarize_all,
-                                             **kwargs)
-
-    fig = reports.pivot_plot_plus_naive(CV_runs)
-    fig.suptitle("CV pivots")
-    fig.savefig('cv_pivots.pdf')
-
-
-if __name__ == '__main__':
-    report()

From be17b4dec2856f3ca0028d410a7f8affba86026e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 15:47:45 -0700
Subject: [PATCH 078/617] adding repository that regreg uses

---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 72fa30f42..a6e17a8ba 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,6 +40,8 @@ script:
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
     - 'echo "backend : agg" > matplotlibrc'
+    - sudo apt-get install software-properties-common
+    - sudo add-apt-repository -y ppa:marutter/rrutter
     - sudo apt-get update
     - sudo apt-get install -y r-base r-base-dev python-rpy2
     - sudo Rscript -e "install.packages(c('glmnet', 'Matrix'), repos='http://cloud.r-project.org');"

From c393252edfbe99e003b74f86aae224a2b5f8e937 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 15:48:13 -0700
Subject: [PATCH 079/617] seeing if using new repository makes rpy2 work

---
 doc-requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc-requirements.txt b/doc-requirements.txt
index a006abf8f..25571f74f 100644
--- a/doc-requirements.txt
+++ b/doc-requirements.txt
@@ -6,3 +6,4 @@ numpydoc
 matplotlib
 texext
 nb2plots
+rpy2

From f9bcb53cda7cfc3d419d4174f87d07d33de31909 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 15:49:28 -0700
Subject: [PATCH 080/617] BF: forgot to add changes to cv_view, cv_glmnet

---
 selection/randomized/cv_glmnet.py | 12 ++++++------
 selection/randomized/cv_view.py   |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/selection/randomized/cv_glmnet.py b/selection/randomized/cv_glmnet.py
index 86206a2d4..7d961f678 100644
--- a/selection/randomized/cv_glmnet.py
+++ b/selection/randomized/cv_glmnet.py
@@ -17,26 +17,26 @@
     from rpy2 import robjects
     import rpy2.robjects.numpy2ri
     rpy2.robjects.numpy2ri.activate()
-    have_rpy2 = True
+    importr('glmnet')
+    have_glmnet = True
 except ImportError:
     warnings.warn('rpy2 seems not to be installed -- CV_glmnet class will not work')
-    have_rpy2 = False
+    have_glmnet = False
     pass
 
 class CV_glmnet(object):
 
     def __init__(self, loss, loss_label):
         self.loss = loss
-        if have_rpy2:
+        if have_glmnet:
             if loss_label == "gaussian":
                 self.family = robjects.StrVector('g')
             elif loss_label == "logistic":
                 self.family = robjects.StrVector('b')
-            importr('glmnet')
 
     def using_glmnet(self, loss=None):
-        if not have_rpy2:
-            raise ImportError("""rpy2 failed to load""")
+        if not have_glmnet:
+            raise ImportError("""glmnet failed to load with rpy2""")
         robjects.r('''
             glmnet_cv = function(X,y, family, lam_seq=NA){
             y = as.matrix(y)
diff --git a/selection/randomized/cv_view.py b/selection/randomized/cv_view.py
index 05d9f71c1..bbdcd2ea6 100644
--- a/selection/randomized/cv_view.py
+++ b/selection/randomized/cv_view.py
@@ -4,7 +4,7 @@
 
 from .query import query
 from .cv import CV
-from .cv_glmnet import CV_glmnet
+from .cv_glmnet import CV_glmnet, have_glmnet
 from .glm import bootstrap_cov
 from .randomization import randomization
 

From 3fbddd63ee4ccfdc6f6a62cfb8ab00f9a0ff2265 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 16:20:07 -0700
Subject: [PATCH 081/617] installing selectiveInference R code

---
 .travis.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a6e17a8ba..93d15f1af 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,6 +30,7 @@ before_install:
   - pip install -r requirements.txt
   - pip install -e .
   - cd ..
+
 install:
   # Install selection
   - pip install -r requirements.txt
@@ -43,8 +44,11 @@ script:
     - sudo apt-get install software-properties-common
     - sudo add-apt-repository -y ppa:marutter/rrutter
     - sudo apt-get update
-    - sudo apt-get install -y r-base r-base-dev python-rpy2
-    - sudo Rscript -e "install.packages(c('glmnet', 'Matrix'), repos='http://cloud.r-project.org');"
+    - sudo apt-get install -y r-base r-base-dev 
+    # - sudo Rscript -e "install.packages(c('glmnet', 'Matrix', 'lars'), repos='http://cloud.r-project.org');"
+    - git clone https://github.com/selective-inference/R-software.git
+    - cd R-software
+    - R CMD install selectiveInference
     - pip install -r doc-requirements.txt # installs rpy2 among other things
 
     # Change into an innocuous directory and find tests from installation

From bd7d6529e0d34748014aa1d1641ad7ee0fef28d1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 16:22:18 -0700
Subject: [PATCH 082/617] no mpl figs

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 93d15f1af..0b4f602a2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,7 +40,7 @@ install:
 script:
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
-    - 'echo "backend : agg" > matplotlibrc'
+    - echo "backend : agg" > matplotlibrc
     - sudo apt-get install software-properties-common
     - sudo add-apt-repository -y ppa:marutter/rrutter
     - sudo apt-get update

From d9dd453695a53141180165735c66b11fe84d64e5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 17:07:49 -0700
Subject: [PATCH 083/617] getting selectiveInference installed always

---
 .travis.yml | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 0b4f602a2..10922927b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,25 +30,27 @@ before_install:
   - pip install -r requirements.txt
   - pip install -e .
   - cd ..
+  - sudo apt-get install software-properties-common
+  - sudo add-apt-repository -y ppa:marutter/rrutter
+  - sudo apt-get update
+  - sudo apt-get install -y r-base r-base-dev 
+  # - sudo Rscript -e "install.packages(c('glmnet', 'Matrix', 'lars'), repos='http://cloud.r-project.org');"
+  - git clone https://github.com/selective-inference/R-software.git
+  - cd R-software
+  - sudo R CMD install selectiveInference
+  - cd ..
 
 install:
   # Install selection
   - pip install -r requirements.txt
   - pip install -e .
   - travis_install $INSTALL_TYPE
+
 # command to run tests, e.g. python setup.py test
 script:
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
     - echo "backend : agg" > matplotlibrc
-    - sudo apt-get install software-properties-common
-    - sudo add-apt-repository -y ppa:marutter/rrutter
-    - sudo apt-get update
-    - sudo apt-get install -y r-base r-base-dev 
-    # - sudo Rscript -e "install.packages(c('glmnet', 'Matrix', 'lars'), repos='http://cloud.r-project.org');"
-    - git clone https://github.com/selective-inference/R-software.git
-    - cd R-software
-    - R CMD install selectiveInference
     - pip install -r doc-requirements.txt # installs rpy2 among other things
 
     # Change into an innocuous directory and find tests from installation

From 90768fdf68880e4df9d7ed802f628013ad3027b8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 17:08:37 -0700
Subject: [PATCH 084/617] using INSTALL

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 10922927b..92b0055ae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,7 @@ before_install:
   # - sudo Rscript -e "install.packages(c('glmnet', 'Matrix', 'lars'), repos='http://cloud.r-project.org');"
   - git clone https://github.com/selective-inference/R-software.git
   - cd R-software
-  - sudo R CMD install selectiveInference
+  - sudo R CMD INSTALL selectiveInference
   - cd ..
 
 install:

From 5e6706c034ec0aaa3b5660596396c04c091697f7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 17:14:39 -0700
Subject: [PATCH 085/617] made one travis test just to compare to R

---
 .travis.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 92b0055ae..ac74a6421 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,8 +18,9 @@ matrix:
   include:
     - python: 3.5
       sudo: true
+      dist: trusty
       env:
-        - DOC_BUILD=1
+        - R_TESTS=1
 before_install:
   - source travis-tools/utils.sh
   - travis_before_install
@@ -66,6 +67,11 @@ script:
       cp ../.coveragerc .;
       COVER_ARGS="--with-coverage --cover-package selection";
       fi
-    - env USE_SMALL_SAMPLES=1 SET_SEED=1 nosetests $DOCTEST_ARGS --verbose $COVER_ARGS selection
+    - |
+      if [ "$R_TESTS" ]; then
+        nosetests ../selection/algorithms/tests/test_compareR.py
+      else 
+        env USE_SMALL_SAMPLES=1 SET_SEED=1 nosetests $DOCTEST_ARGS --verbose $COVER_ARGS selection
+      fi
 after_success:
     - if [ "${COVERAGE}" == "1" ]; then coveralls; fi

From c3afac12558d1236d3e840140d190c3db8c8e77a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 18:24:26 -0700
Subject: [PATCH 086/617] using devtools to install package

---
 .travis.yml | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index ac74a6421..b40f3e4bf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,11 +35,8 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev 
-  # - sudo Rscript -e "install.packages(c('glmnet', 'Matrix', 'lars'), repos='http://cloud.r-project.org');"
-  - git clone https://github.com/selective-inference/R-software.git
-  - cd R-software
-  - sudo R CMD INSTALL selectiveInference
-  - cd ..
+  - sudo Rscript -e "install.packages(c('devtools'), repos='http://cloud.r-project.org');"
+  - sudo Rscript -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:
   # Install selection

From a50d6f8eb02e5aaecef2dd9fb7c580659f813589 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 18:31:37 -0700
Subject: [PATCH 087/617] forgot library

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index b40f3e4bf..e44a3332b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -36,7 +36,7 @@ before_install:
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev 
   - sudo Rscript -e "install.packages(c('devtools'), repos='http://cloud.r-project.org');"
-  - sudo Rscript -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
+  - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:
   # Install selection

From eef320e1db0ad18c362d09faf523b8ffe9b54128 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 18:35:11 -0700
Subject: [PATCH 088/617] trying again with devtools

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index e44a3332b..a380bf9de 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev 
-  - sudo Rscript -e "install.packages(c('devtools'), repos='http://cloud.r-project.org');"
+  - sudo Rscript -e "install.packages('devtools', repos='http://cloud.r-project.org');"
   - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:

From 6a69323270a4df207e0afcc6b403b09bab07c112 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 18:35:39 -0700
Subject: [PATCH 089/617] trying again with devtools

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a380bf9de..3e24df554 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,8 +35,8 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev 
-  - sudo Rscript -e "install.packages('devtools', repos='http://cloud.r-project.org');"
-  - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
+  - Rscript -e "install.packages('devtools', repos='http://cloud.r-project.org');"
+  - Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:
   # Install selection

From 168fadbcb02d455076be2ccea1cec5ff521ecfee Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 18:36:21 -0700
Subject: [PATCH 090/617] using sudo

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 3e24df554..a380bf9de 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,8 +35,8 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev 
-  - Rscript -e "install.packages('devtools', repos='http://cloud.r-project.org');"
-  - Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
+  - sudo Rscript -e "install.packages('devtools', repos='http://cloud.r-project.org');"
+  - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:
   # Install selection

From bcad9d157f9052c751bbf9e19489076e3deb826e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 19:43:25 -0700
Subject: [PATCH 091/617] quotes around echo?

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index a380bf9de..885e6007a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -46,9 +46,9 @@ install:
 
 # command to run tests, e.g. python setup.py test
 script:
+    - 'echo "backend : agg" > matplotlibrc'
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
-    - echo "backend : agg" > matplotlibrc
     - pip install -r doc-requirements.txt # installs rpy2 among other things
 
     # Change into an innocuous directory and find tests from installation

From e225475c0e4b0dffaecc469f6f4ea58285113113 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 19:56:39 -0700
Subject: [PATCH 092/617] move matplotlibrc to testing directory

---
 .travis.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 885e6007a..3f56de9bd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -46,7 +46,7 @@ install:
 
 # command to run tests, e.g. python setup.py test
 script:
-    - 'echo "backend : agg" > matplotlibrc'
+
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
     - pip install -r doc-requirements.txt # installs rpy2 among other things
@@ -54,6 +54,7 @@ script:
     # Change into an innocuous directory and find tests from installation
     - mkdir for_testing
     - cd for_testing
+    - 'echo "backend : agg" > matplotlibrc'
 
     # Doctests only on platforms that have compatible fp output
     - if [ `uname` == "Darwin" ] ||

From f86a9a24cf582228c97f54ed7352df71e38019b4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 20:15:43 -0700
Subject: [PATCH 093/617] a different repo?

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 3f56de9bd..07a0c4c1a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev 
-  - sudo Rscript -e "install.packages('devtools', repos='http://cloud.r-project.org');"
+  - sudo Rscript -e "install.packages('devtools', repos='http://cran.r-project.org');"
   - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:

From 3c768f1ea87c3d20fd578f819d545f47ed2006e4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 20:17:27 -0700
Subject: [PATCH 094/617] not using python3.3

---
 .travis.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 07a0c4c1a..dbddfab7e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,8 @@ language: python
 dist: trusty
 python:
   - 2.7
-  - 3.3
+  - 3.4
+  - 3.5
 notifications:
   email: false
 addons:

From 106bba0cd11d8bb972c4b0fbdc708fadf35557de Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 20:39:35 -0700
Subject: [PATCH 095/617] using apt for devtools

---
 .travis.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index dbddfab7e..1223cea89 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,8 +35,7 @@ before_install:
   - sudo apt-get install software-properties-common
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
-  - sudo apt-get install -y r-base r-base-dev 
-  - sudo Rscript -e "install.packages('devtools', repos='http://cran.r-project.org');"
+  - sudo apt-get install -y r-base r-base-dev r-cran-devtools
   - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:

From 77fc7015304bbec08b1dfdfbdbe54a51967a1ffc Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 20:46:57 -0700
Subject: [PATCH 096/617] larger cran repo

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 1223cea89..4dda471dc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -33,6 +33,7 @@ before_install:
   - pip install -e .
   - cd ..
   - sudo apt-get install software-properties-common
+  - sudo add-apt-repository -y ppa:marutter/c2d4u
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev r-cran-devtools

From a53eb8418e26117f5bba018326e675a7e8e848ad Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 20:50:46 -0700
Subject: [PATCH 097/617] no need to install devtools with rscript

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4dda471dc..5bdab4df7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,7 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev r-cran-devtools
-  - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
+  - sudo Rscript -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:
   # Install selection

From 03f15481bb153fa96daaf9cb7bf96be7f35976ac Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 20:54:22 -0700
Subject: [PATCH 098/617] need library though

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 5bdab4df7..4dda471dc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,7 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev r-cran-devtools
-  - sudo Rscript -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
+  - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:
   # Install selection

From a940bf9902db7c721e34ce359ddace44312fed97 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 20:57:54 -0700
Subject: [PATCH 099/617] a separate line?

---
 .travis.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4dda471dc..665b9bfd8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,8 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev r-cran-devtools
-  - sudo Rscript -e "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')"
+  - sudo Rscript -e "library(devtools)"
+  - sudo Rscript -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 install:
   # Install selection

From 929f1814afaeb3b7e411f4db532ad28fc9e06f64 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 21:05:32 -0700
Subject: [PATCH 100/617] maybe a script?

---
 .travis.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 665b9bfd8..534595c4c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,8 +37,9 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev r-cran-devtools
-  - sudo Rscript -e "library(devtools)"
-  - sudo Rscript -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
+  - 'echo "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')" > install.R'
+  - sudo Rscript install.R
+
 
 install:
   # Install selection

From e3ab381ded35288640607f579cbe16bb019050f7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 21:06:46 -0700
Subject: [PATCH 101/617] two -e calls?

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 534595c4c..f6318f579 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,7 +38,7 @@ before_install:
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev r-cran-devtools
   - 'echo "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')" > install.R'
-  - sudo Rscript install.R
+  - sudo Rscript -e "library(devtools)" -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 
 install:

From d0d5e6111b79e250ac6695d299b52b3480aa4356 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 21:07:38 -0700
Subject: [PATCH 102/617] deleting echo

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index f6318f579..ed02a6596 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,6 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev r-cran-devtools
-  - 'echo "library(devtools); install_github('selective-inference/R-software', subdir='selectiveInference')" > install.R'
   - sudo Rscript -e "library(devtools)" -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 

From e4a1ee9fe9d44681232d1af00749f9026fdc19fd Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 21:12:49 -0700
Subject: [PATCH 103/617] removing sudo line?

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ed02a6596..18b056e6c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,7 +18,6 @@ env:
 matrix:
   include:
     - python: 3.5
-      sudo: true
       dist: trusty
       env:
         - R_TESTS=1

From 51585e723e8f2eb9b011297baaa4efebdfb925dd Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 21:19:46 -0700
Subject: [PATCH 104/617] removing dist line?

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 18b056e6c..ff909ab2b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,7 +18,6 @@ env:
 matrix:
   include:
     - python: 3.5
-      dist: trusty
       env:
         - R_TESTS=1
 before_install:

From 5efdb32c5cc53c3ec938a37d9863e7f408c46d19 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 21:22:40 -0700
Subject: [PATCH 105/617] also python2.7

---
 .travis.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index ff909ab2b..6fa49190c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,6 +20,10 @@ matrix:
     - python: 3.5
       env:
         - R_TESTS=1
+  include:
+    - python: 2.7
+      env:
+        - R_TESTS=1
 before_install:
   - source travis-tools/utils.sh
   - travis_before_install

From e68970bd60d24906fd5a6ff705599ea08de20c61 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 14 Aug 2017 21:40:03 -0700
Subject: [PATCH 106/617] unnecessary include

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 6fa49190c..129539f65 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,7 +20,6 @@ matrix:
     - python: 3.5
       env:
         - R_TESTS=1
-  include:
     - python: 2.7
       env:
         - R_TESTS=1

From 2982eb74eb73ca93b0fa60054380fcb68084cdaf Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 00:01:35 -0700
Subject: [PATCH 107/617] DOC: adding some docstrings to glm, fixing absolute
 to relative imports, renaming standard_CI

---
 selection/randomized/M_estimator.py           |   2 +-
 selection/randomized/glm.py                   | 262 +++++++++++++++---
 selection/randomized/tests/test_cvglmnet.py   |  27 ++
 .../tests/test_multiple_queries_CI.py         |  10 +-
 .../randomized/tests/test_multiple_splits.py  |   9 +-
 .../randomized/tests/test_split_compare.py    |  12 +-
 selection/randomized/tests/test_sqrt_lasso.py |   2 +-
 7 files changed, 262 insertions(+), 62 deletions(-)
 create mode 100644 selection/randomized/tests/test_cvglmnet.py

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index bc7660f39..c662774a2 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -536,7 +536,7 @@ def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
         X_restricted = X[:,active]
         loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
     else:
-        I_restricted = ra.selector(active, X.input_shape[0], ra.identity((active.sum(),)))
+        I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
         loss_restricted = rr.affine_smooth(Mest_loss, I_restricted.T)
     beta_E = loss_restricted.solve(**solve_args)
     
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 06e5798cc..5828962f2 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -1,13 +1,14 @@
 import functools # for bootstrap partial mapping
 
 import numpy as np
+from scipy.stats import norm as ndist
+
 from regreg.api import glm
 
 from .M_estimator import restricted_Mest, M_estimator, M_estimator_split
 from .greedy_step import greedy_score_step
 from .threshold_score import threshold_score
 
-from regreg.api import glm
 
 def pairs_bootstrap_glm(glm_loss,
                         active, 
@@ -16,7 +17,44 @@ def pairs_bootstrap_glm(glm_loss,
                         scaling=1.,
                         solve_args={'min_its':50, 'tol':1.e-10}):
     """
-    pairs bootstrap of (beta_hat_active, -grad_inactive(beta_hat_active))
+    Construct a non-parametric bootstrap sampler that 
+    samples the estimates ($\bar{\beta}_E^*$) of a generalized 
+    linear model (GLM) restricted to `active`
+    as well as, optionally, the inactive coordinates of the score of the 
+    GLM evaluated at the estimates ($\nabla \ell(\bar{\beta}_E)[-E]$) where
+    $\bar{\beta}_E$ is padded with zeros where necessary.
+    
+    Parameters
+    ----------
+
+    glm_loss : regreg.smooth.glm.glm
+        The loss of the generalized linear model.
+
+    active : np.bool
+        Boolean indexing array
+
+    beta_full : np.float (optional)
+        Solution to the restricted problem, zero except where active is nonzero.
+
+    inactive : np.bool (optional)
+        Boolean indexing array
+
+    scaling : float
+        Scaling to keep entries of roughly constant order. Active entries
+        are multiplied by sqrt(scaling) inactive ones are divided
+        by sqrt(scaling).
+
+    solve_args : dict
+        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        beta_full is None.
+
+    Returns
+    -------
+
+    bootstrap_sampler : callable
+        A callable object that takes a sample of indices and returns
+        the corresponding bootstrap sample.
+
     """
     X, Y = glm_loss.data
 
@@ -47,7 +85,7 @@ def pairs_bootstrap_glm(glm_loss,
 
     nactive = active.sum()
     if inactive is not None:
-        X_full = np.hstack([X_active,X_inactive])
+        X_full = np.hstack([X_active, X_inactive])
         beta_overall = np.zeros(X_full.shape[1])
         beta_overall[:nactive] = beta_active
     else:
@@ -81,13 +119,103 @@ def _boot_score(X_full, Y, ntotal, _bootQinv, _bootI, nactive, _sqrt_scaling, be
 
     return functools.partial(_boot_score, X_full, Y, ntotal, _bootQinv, _bootI, nactive, _sqrt_scaling, beta_overall), observed
 
+def pairs_inactive_score_glm(glm_loss, 
+                             active, 
+                             beta_active, 
+                             scaling=1.,
+                             solve_args={'min_its':50, 'tol':1.e-10}):
+
+    """
+    Construct a non-parametric bootstrap sampler that 
+    samples the inactive coordinates of the score of the 
+    GLM evaluated at the estimates ($\nabla \ell(\bar{\beta}_E)[-E]$) where
+    $\bar{\beta}_E$ is padded with zeros where necessary.
+    
+    Parameters
+    ----------
+
+    glm_loss : regreg.smooth.glm.glm
+        The loss of the generalized linear model.
+
+    active : np.bool
+        Boolean indexing array
+
+    beta_active : np.float (optional)
+        Solution to the restricted problem.
+
+    scaling : float
+        Scaling to keep entries of roughly constant order. Active entries
+        are multiplied by sqrt(scaling) inactive ones are divided
+        by sqrt(scaling).
+
+    solve_args : dict
+        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        beta_full is None.
+
+    Returns
+    -------
+
+    bootstrap_sampler : callable
+        A callable object that takes a sample of indices and returns
+        the corresponding bootstrap sample.
+
+    """
+
+    inactive = ~active
+    beta_full = np.zeros(glm_loss.shape)
+    beta_full[active] = beta_active
+
+    _full_boot_score = pairs_bootstrap_glm(glm_loss, 
+                                           active, 
+                                           beta_full=beta_full,
+                                           inactive=inactive,
+                                           scaling=scaling,
+                                           solve_args=solve_args)[0]
+    nactive = active.sum()
+
+    def _boot_score(indices):
+        return _full_boot_score(indices)[nactive:]
+
+    return _boot_score
+
+
 def pairs_bootstrap_score(glm_loss,
                           active, 
                           beta_active=None, 
                           solve_args={'min_its':50, 'tol':1.e-10}):
     """
-    pairs bootstrap of (beta_hat_active, -grad_inactive(beta_hat_active))
+    Construct a non-parametric bootstrap sampler that 
+    samples the score ($\nabla \ell(\bar{\beta}_E)) ofa generalized 
+    linear model (GLM) restricted to `active`
+    as well as, optionally, the inactive coordinates of the score of the 
+    GLM evaluated at the estimates ($\nabla \ell(\bar{\beta}_E)[-E]$) where
+    $\bar{\beta}_E$ is padded with zeros where necessary.
+    
+    Parameters
+    ----------
+
+    glm_loss : regreg.smooth.glm.glm
+        The loss of the generalized linear model.
+
+    active : np.bool
+        Boolean indexing array
+
+    beta_active : np.float (optional)
+        Solution to the restricted problem. 
+
+    solve_args : dict
+        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        beta_full is None.
+
+    Returns
+    -------
+
+    bootstrap_sampler : callable
+        A callable object that takes a sample of indices and returns
+        the corresponding bootstrap sample.
+
     """
+
     X, Y = glm_loss.data
 
     if beta_active is None:
@@ -112,7 +240,39 @@ def set_alpha_matrix(glm_loss,
                      inactive=None,
                      scaling=1.,
                      solve_args={'min_its': 50, 'tol': 1.e-10}):
+    """
+    DESCRIBE WHAT THIS DOES
 
+    Parameters
+    ----------
+
+    glm_loss : regreg.smooth.glm.glm
+        The loss of the generalized linear model.
+
+    active : np.bool
+        Boolean indexing array
+
+    beta_full : np.float (optional)
+        Solution to the restricted problem, zero except where active is nonzero.
+
+    inactive : np.bool (optional)
+        Boolean indexing array
+
+    scaling : float
+        Scaling to keep entries of roughly constant order. Active entries
+        are multiplied by sqrt(scaling) inactive ones are divided
+        by sqrt(scaling).
+
+    solve_args : dict
+        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        beta_full is None.
+
+    Returns
+    -------
+
+    ???????
+
+    """
     X, Y = glm_loss.data
 
     if beta_full is None:
@@ -153,6 +313,40 @@ def _parametric_cov_glm(glm_loss,
                         beta_full=None,
                         inactive=None,
                         solve_args={'min_its': 50, 'tol': 1.e-10}):
+    """
+    Compute parametric covariance of
+    the estimates ($\bar{\beta}_E^*$) of a generalized 
+    linear model (GLM) restricted to `active`
+    as well as, optionally, the inactive coordinates of the score of the 
+    GLM evaluated at the estimates ($\nabla \ell(\bar{\beta}_E)[-E]$) where
+    $\bar{\beta}_E$ is padded with zeros where necessary.
+
+    Parameters
+    ----------
+
+    glm_loss : regreg.smooth.glm.glm
+        The loss of the generalized linear model.
+
+    active : np.bool
+        Boolean indexing array
+
+    beta_full : np.float (optional)
+        Solution to the restricted problem, zero except where active is nonzero.
+
+    inactive : np.bool (optional)
+        Boolean indexing array
+
+    solve_args : dict
+        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        beta_full is None.
+
+    Returns
+    -------
+
+    Sigma : np.float
+        Covariance matrix.
+
+    """
     X, Y = glm_loss.data
     n, p = X.shape
 
@@ -172,46 +366,24 @@ def _parametric_cov_glm(glm_loss,
         X_inactive = X[:, inactive]
         ntotal += inactive.sum()
 
-    _bootW = np.diag(glm_loss.saturated_loss.hessian(X_active.dot(beta_active)))
-    _bootQ = X_active.T.dot(_bootW.dot(X_active))
-    _bootQinv = np.linalg.inv(_bootQ)
+    _W = np.diag(glm_loss.saturated_loss.hessian(X_active.dot(beta_active)))
+    _Q = X_active.T.dot(_W.dot(X_active))
+    _Qinv = np.linalg.inv(_Q)
     if inactive is not None:
-        _bootC = X_inactive.T.dot(_bootW.dot(X_active))
-        _bootI = _bootC.dot(_bootQinv)
+        _C = X_inactive.T.dot(_W.dot(X_active))
+        _I = _C.dot(_Qinv)
 
     nactive = active.sum()
 
     mat = np.zeros((p, n))
-    mat[:nactive, :] = _bootQinv.dot(X_active.T)
-    if ntotal>nactive:
-        mat1 = np.dot(np.dot(_bootW, X_active), np.dot(_bootQinv, X_active.T))
+    mat[:nactive, :] = _Qinv.dot(X_active.T)
+    if ntotal > nactive:
+        mat1 = np.dot(np.dot(_W, X_active), np.dot(_Qinv, X_active.T))
         mat[nactive:, :] = X[:, inactive].T.dot(np.identity(n) - mat1)
 
-    Sigma_full = np.dot(mat, np.dot(_bootW, mat.T))
+    Sigma_full = np.dot(mat, np.dot(_W, mat.T))
     return Sigma_full
 
-def pairs_inactive_score_glm(glm_loss, active, beta_active, scaling=1.):
-
-    """
-    Bootstrap inactive score at \bar{\beta}_E
-
-    Will be used with forward stepwise.
-    """
-    inactive = ~active
-    beta_full = np.zeros(glm_loss.shape)
-    beta_full[active] = beta_active
-
-    _full_boot_score = pairs_bootstrap_glm(glm_loss, 
-                                           active, 
-                                           beta_full=beta_full,
-                                           inactive=inactive,
-                                           scaling=scaling)[0]
-    nactive = active.sum()
-    def _boot_score(indices):
-        return _full_boot_score(indices)[nactive:]
-
-    return _boot_score
-
 def target(loss, 
            active, 
            queries,
@@ -324,6 +496,8 @@ def _target(indices):
 
     return target_sampler, target_observed
 
+#### Subclasses of different randomized views
+
 class glm_group_lasso(M_estimator):
 
     def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
@@ -492,8 +666,11 @@ def _boot_score(Y_star):
 
     return _boot_score, observed
 
-def parametric_cov(glm_loss, target_with_linear_func, cross_terms=(),
+def parametric_cov(glm_loss, 
+                   target_with_linear_func, 
+                   cross_terms=(),
                    solve_args={'min_its':50, 'tol':1.e-10}):
+
     # cross_terms are different active sets
 
     target, linear_func = target_with_linear_func
@@ -530,16 +707,16 @@ def _WQ(active):
 
 def glm_parametric_covariance(glm_loss, solve_args={'min_its':50, 'tol':1.e-10}):
     """
-    The m out of n bootstrap.
+    A constructor for parametric covariance
     """
     return functools.partial(parametric_cov, glm_loss, solve_args=solve_args)
 
 
-def standard_ci(glm_loss, X, y , active, leftout_indices, alpha=0.1):
-
-    import regreg.api as rr
-
-    loss = glm_loss(X[leftout_indices, ], y[leftout_indices])
+def standard_split_ci(glm_loss, X, y, active, leftout_indices, alpha=0.1):
+    """
+    Data plitting confidence intervals via bootstrap.
+    """
+    loss = glm_loss(X[leftout_indices,], y[leftout_indices])
     boot_target, target_observed = pairs_bootstrap_glm(loss, active)
     nactive = np.sum(active)
     size= np.sum(leftout_indices)
@@ -548,7 +725,6 @@ def standard_ci(glm_loss, X, y , active, leftout_indices, alpha=0.1):
     sampler = lambda: np.random.choice(size, size=(size,), replace=True)
     target_cov = bootstrap_cov(sampler, boot_target_restricted)
 
-    from scipy.stats import norm as ndist
     quantile = - ndist.ppf(alpha / float(2))
     LU = np.zeros((2, target_observed.shape[0]))
     for j in range(observed.shape[0]):
diff --git a/selection/randomized/tests/test_cvglmnet.py b/selection/randomized/tests/test_cvglmnet.py
new file mode 100644
index 000000000..9fe8b2ffd
--- /dev/null
+++ b/selection/randomized/tests/test_cvglmnet.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+import regreg.api as rr
+
+from ..cv_glmnet import CV_glmnet
+from ...tests.instance import gaussian_instance
+
+def test_cv_glmnet():
+    np.random.seed(2)
+    n, p = 3000, 1000
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=30, rho=0., sigma=1)
+    loss = rr.glm.gaussian(X,y)
+    CV_glmnet_compute = CV_glmnet(loss, 'gaussian')
+    lam_CV, lam_1SD, lam_seq, CV_err, SD = CV_glmnet_compute.using_glmnet()
+    print("CV error curve (nonrandomized):", CV_err)
+    lam_grid_size = CV_glmnet_compute.lam_seq.shape[0]
+    lam_CVR, SD, CVR, CV1, lam_seq = CV_glmnet_compute.choose_lambda_CVR(scale1=0.1, scale2=0.1)
+    print("nonrandomized index:", list(lam_seq).index(lam_CV)) # index of the minimizer
+    print("lam for nonrandomized CV plus sigma rule:",lam_CV,lam_1SD)
+    print("lam_CVR:",lam_CVR)
+    print("randomized index:", list(lam_seq).index(lam_CVR))
+    import matplotlib.pyplot as plt
+    plt.plot(np.log(lam_seq), CV_err)
+    plt.plot(np.log(lam_seq), CVR)
+    plt.show()
+
+
diff --git a/selection/randomized/tests/test_multiple_queries_CI.py b/selection/randomized/tests/test_multiple_queries_CI.py
index 44a56a6b2..b421aefbf 100644
--- a/selection/randomized/tests/test_multiple_queries_CI.py
+++ b/selection/randomized/tests/test_multiple_queries_CI.py
@@ -5,16 +5,14 @@
 
 import selection.tests.reports as reports
 
-
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from selection.api import (randomization,
                            glm_group_lasso,
                            multiple_queries,
                            glm_target)
-from selection.tests.instance import logistic_instance
-from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-from selection.randomized.glm import standard_ci
-from selection.randomized.query import naive_confidence_intervals
+from ...tests.instance import logistic_instance
+from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+from ..query import naive_confidence_intervals
 
 @register_report(['pivots_clt', 'pivots_boot',
                   'covered_clt', 'ci_length_clt',
diff --git a/selection/randomized/tests/test_multiple_splits.py b/selection/randomized/tests/test_multiple_splits.py
index 7125192bf..da199bd1d 100644
--- a/selection/randomized/tests/test_multiple_splits.py
+++ b/selection/randomized/tests/test_multiple_splits.py
@@ -6,15 +6,14 @@
 import selection.tests.reports as reports
 
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from selection.api import (randomization,
                            split_glm_group_lasso,
                            multiple_queries,
                            glm_target)
-from selection.tests.instance import logistic_instance
-from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-from selection.randomized.glm import standard_ci
-from selection.randomized.query import naive_confidence_intervals
+from ...tests.instance import logistic_instance
+from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+from ..query import naive_confidence_intervals
 
 @register_report(['pivots_clt', 'pivots_boot',
                   'covered_clt', 'ci_length_clt',
diff --git a/selection/randomized/tests/test_split_compare.py b/selection/randomized/tests/test_split_compare.py
index 900a9bc8c..fabadd0cd 100644
--- a/selection/randomized/tests/test_split_compare.py
+++ b/selection/randomized/tests/test_split_compare.py
@@ -6,15 +6,15 @@
 import selection.tests.reports as reports
 
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from selection.api import (randomization, 
                            split_glm_group_lasso, 
                            multiple_queries, 
                            glm_target)
-from selection.tests.instance import logistic_instance
-from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-from selection.randomized.glm import standard_ci
-from selection.randomized.query import naive_confidence_intervals
+from ...tests.instance import logistic_instance
+from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+from ..glm import standard_split_ci
+from ..query import naive_confidence_intervals
 
 @register_report(['pivots_clt', 'pivots_boot', 
                   'covered_clt', 'ci_length_clt', 
@@ -125,7 +125,7 @@ def test_split_compare(s=3,
         LU_naive = naive_confidence_intervals(target_sampler, target_observed)
 
         if X.shape[0] - leftout_indices.sum() > nactive:
-            LU_split = standard_ci(rr.glm.logistic, X, y, active_union, leftout_indices)
+            LU_split = standard_split_ci(rr.glm.logistic, X, y, active_union, leftout_indices)
         else:
             LU_split = np.ones((nactive, 2)) * np.nan
 
diff --git a/selection/randomized/tests/test_sqrt_lasso.py b/selection/randomized/tests/test_sqrt_lasso.py
index 08af3fdda..59299d8cc 100644
--- a/selection/randomized/tests/test_sqrt_lasso.py
+++ b/selection/randomized/tests/test_sqrt_lasso.py
@@ -93,7 +93,7 @@ def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
             M_est1.decompose_subgradient(conditioning_groups=np.zeros(p, dtype=bool),
                                          marginalizing_groups=np.ones(p, bool))
 
-        target_sampler, target_observed = glm_target(glm_loss,
+        target_sampler, target_observed = glm_target(loss,
                                                      active_union,
                                                      mv,
                                                      bootstrap=bootstrap)

From 89bea117ce46058e7220de5d898717fab9d0d497 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 00:04:07 -0700
Subject: [PATCH 108/617] removing duplicate test

---
 selection/randomized/tests/test_cvglmnet.py | 27 ---------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 selection/randomized/tests/test_cvglmnet.py

diff --git a/selection/randomized/tests/test_cvglmnet.py b/selection/randomized/tests/test_cvglmnet.py
deleted file mode 100644
index 9fe8b2ffd..000000000
--- a/selection/randomized/tests/test_cvglmnet.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import numpy as np
-
-import regreg.api as rr
-
-from ..cv_glmnet import CV_glmnet
-from ...tests.instance import gaussian_instance
-
-def test_cv_glmnet():
-    np.random.seed(2)
-    n, p = 3000, 1000
-    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=30, rho=0., sigma=1)
-    loss = rr.glm.gaussian(X,y)
-    CV_glmnet_compute = CV_glmnet(loss, 'gaussian')
-    lam_CV, lam_1SD, lam_seq, CV_err, SD = CV_glmnet_compute.using_glmnet()
-    print("CV error curve (nonrandomized):", CV_err)
-    lam_grid_size = CV_glmnet_compute.lam_seq.shape[0]
-    lam_CVR, SD, CVR, CV1, lam_seq = CV_glmnet_compute.choose_lambda_CVR(scale1=0.1, scale2=0.1)
-    print("nonrandomized index:", list(lam_seq).index(lam_CV)) # index of the minimizer
-    print("lam for nonrandomized CV plus sigma rule:",lam_CV,lam_1SD)
-    print("lam_CVR:",lam_CVR)
-    print("randomized index:", list(lam_seq).index(lam_CVR))
-    import matplotlib.pyplot as plt
-    plt.plot(np.log(lam_seq), CV_err)
-    plt.plot(np.log(lam_seq), CVR)
-    plt.show()
-
-

From 7bd40b4e3f43e11aa4be751a026ab16ca8bfd47b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 07:52:08 -0700
Subject: [PATCH 109/617] added hessian to sqrt lasso

---
 selection/algorithms/sqrt_lasso.py | 38 ++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/selection/algorithms/sqrt_lasso.py b/selection/algorithms/sqrt_lasso.py
index 94621d225..c979d3d42 100644
--- a/selection/algorithms/sqrt_lasso.py
+++ b/selection/algorithms/sqrt_lasso.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+from scipy import sparse
 from scipy.stats import norm as ndist, chi as chidist
 from scipy.interpolate import interp1d
 
@@ -37,9 +38,8 @@ def __init__(self, X, Y,
                  initial=None,
                  offset=None):
 
-        X = rr.astransform(X)
         rr.smooth_atom.__init__(self,
-                                X.input_shape,
+                                rr.astransform(X).input_shape,
                                 coef=1.,
                                 offset=offset,
                                 quadratic=quadratic,
@@ -75,6 +75,40 @@ def smooth_objective(self, x, mode='both', check_feasibility=False):
         else:
             raise ValueError("mode incorrectly specified")
 
+    def hessian(self, beta):
+        """
+
+        Compute the Hessian of the loss $ \nabla^2 \ell(X\beta)$.
+
+        Parameters
+        ----------
+
+        beta : ndarray
+            Parameters.
+
+        Returns
+        -------
+
+        hess : ndarray
+            Hessian of the loss at $\beta$, defined everywhere 
+            the residual is not 0.
+
+        """
+
+        f, g = self._sqerror.smooth_objective(beta, mode='both')
+
+        if self._is_transform:
+            raise ValueError('refusing to form Hessian for arbitrary affine_transform, use an ndarray or scipy.sparse')
+
+        if not hasattr(self, "_H"):
+            X = self.data[0]            
+            if not sparse.issparse(X): # assuming it is an ndarray
+                self._H = X.T.dot(X)
+            else:
+                self._H = X.T * X
+
+        return self._H / f - np.multiply.outer(g, g) / f**3
+
 def solve_sqrt_lasso(X, Y, weights=None, initial=None, quadratic=None, solve_args={}):
     """
 

From 61e0734a8d8ae15c260eb5433d2bd948f3c09f31 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 08:25:20 -0700
Subject: [PATCH 110/617] WIP: work on reduced optimization tests

---
 .../tests => sandbox/bayesian}/__init__.py    |   0
 .../bayesian}/carved_bayesian.py              |   0
 .../bayesian}/crime_data_attempt.py           |   0
 .../bayesian}/crime_data_set.py               |   0
 .../bayesian}/dual_bayesian.py                |   0
 .../bayesian}/dual_lasso_test.py              |   0
 sandbox/bayesian/hiv_inference.py             | 242 ++++++++++++++++++
 .../bayesian}/lasso_selection.py              |   0
 .../bayesian}/logistic_bayesian.py            |   0
 .../tests => sandbox/bayesian}/mixed_model.py |   0
 .../bayesian}/ms_lasso_2stage.py              |   0
 .../random_reduced_lasso_bayesian_model.py    |   0
 .../bayesian}/random_reduced_lasso_test.py    |   0
 .../bayesian}/random_reduced_logistic_test.py |   0
 .../tests => sandbox/bayesian}/read_file.py   |   0
 .../reduced_forward_stepwise_test.py          |   0
 .../bayesian}/reduced_lasso_bayesian_model.py |   0
 .../bayesian}/reduced_marginal_screening.py   |   0
 selection/randomized/tests/__init__.py        |   4 +-
 selection/randomized/tests/test_cv.py         |  23 +-
 .../tests/check_carved.py                     |  27 --
 .../tests/hiv_inference.py                    | 241 -----------------
 .../reduced_optimization/tests/test_carved.py |  31 +++
 ...epwise_bayesian.py => test_fs_bayesian.py} |   0
 24 files changed, 287 insertions(+), 281 deletions(-)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/__init__.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/carved_bayesian.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/crime_data_attempt.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/crime_data_set.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/dual_bayesian.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/dual_lasso_test.py (100%)
 create mode 100644 sandbox/bayesian/hiv_inference.py
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/lasso_selection.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/logistic_bayesian.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/mixed_model.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/ms_lasso_2stage.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/random_reduced_lasso_bayesian_model.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/random_reduced_lasso_test.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/random_reduced_logistic_test.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/read_file.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/reduced_forward_stepwise_test.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/reduced_lasso_bayesian_model.py (100%)
 rename {selection/reduced_optimization/tests => sandbox/bayesian}/reduced_marginal_screening.py (100%)
 delete mode 100644 selection/reduced_optimization/tests/check_carved.py
 delete mode 100644 selection/reduced_optimization/tests/hiv_inference.py
 create mode 100644 selection/reduced_optimization/tests/test_carved.py
 rename selection/reduced_optimization/tests/{forward_stepwise_bayesian.py => test_fs_bayesian.py} (100%)

diff --git a/selection/reduced_optimization/tests/__init__.py b/sandbox/bayesian/__init__.py
similarity index 100%
rename from selection/reduced_optimization/tests/__init__.py
rename to sandbox/bayesian/__init__.py
diff --git a/selection/reduced_optimization/tests/carved_bayesian.py b/sandbox/bayesian/carved_bayesian.py
similarity index 100%
rename from selection/reduced_optimization/tests/carved_bayesian.py
rename to sandbox/bayesian/carved_bayesian.py
diff --git a/selection/reduced_optimization/tests/crime_data_attempt.py b/sandbox/bayesian/crime_data_attempt.py
similarity index 100%
rename from selection/reduced_optimization/tests/crime_data_attempt.py
rename to sandbox/bayesian/crime_data_attempt.py
diff --git a/selection/reduced_optimization/tests/crime_data_set.py b/sandbox/bayesian/crime_data_set.py
similarity index 100%
rename from selection/reduced_optimization/tests/crime_data_set.py
rename to sandbox/bayesian/crime_data_set.py
diff --git a/selection/reduced_optimization/tests/dual_bayesian.py b/sandbox/bayesian/dual_bayesian.py
similarity index 100%
rename from selection/reduced_optimization/tests/dual_bayesian.py
rename to sandbox/bayesian/dual_bayesian.py
diff --git a/selection/reduced_optimization/tests/dual_lasso_test.py b/sandbox/bayesian/dual_lasso_test.py
similarity index 100%
rename from selection/reduced_optimization/tests/dual_lasso_test.py
rename to sandbox/bayesian/dual_lasso_test.py
diff --git a/sandbox/bayesian/hiv_inference.py b/sandbox/bayesian/hiv_inference.py
new file mode 100644
index 000000000..0ae306a3e
--- /dev/null
+++ b/sandbox/bayesian/hiv_inference.py
@@ -0,0 +1,242 @@
+import os, numpy as np, pandas, statsmodels.api as sm
+import time
+import matplotlib.pyplot as plt
+import regreg.api as rr
+from selection.reduced_optimization.initial_soln import selection
+from selection.randomized.api import randomization
+from selection.reduced_optimization.lasso_reduced import nonnegative_softmax_scaled, neg_log_cube_probability, selection_probability_lasso, \
+    sel_prob_gradient_map_lasso, selective_inf_lasso
+
+
+def main():
+    if not os.path.exists("NRTI_DATA.txt"):
+        NRTI = pandas.read_table("http://hivdb.stanford.edu/pages/published_analysis/genophenoPNAS2006/DATA/NRTI_DATA.txt", na_values="NA")
+    else:
+        NRTI = pandas.read_table("NRTI_DATA.txt")
+
+    NRTI_specific = []
+    NRTI_muts = []
+    mixtures = np.zeros(NRTI.shape[0])
+    for i in range(1,241):
+        d = NRTI['P%d' % i]
+        for mut in np.unique(d):
+            if mut not in ['-','.'] and len(mut) == 1:
+                test = np.equal(d, mut)
+                if test.sum() > 10:
+                    NRTI_specific.append(np.array(np.equal(d, mut)))
+                    NRTI_muts.append("P%d%s" % (i,mut))
+
+    NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts)
+    print("here")
+
+    # Next, standardize the data, keeping only those where Y is not missing
+
+    X_NRTI = np.array(NRTI_specific, np.float)
+    Y = NRTI['3TC'] # shorthand
+    keep = ~np.isnan(Y).astype(np.bool)
+    X_NRTI = X_NRTI[np.nonzero(keep)]; Y=Y[keep]
+    Y = np.array(np.log(Y), np.float); Y -= Y.mean()
+    X_NRTI -= X_NRTI.mean(0)[None, :]; X_NRTI /= X_NRTI.std(0)[None,:]
+    X = X_NRTI # shorthand
+    n, p = X.shape
+    X /= np.sqrt(n)
+
+    ols_fit = sm.OLS(Y, X).fit()
+    sigma_3TC = np.linalg.norm(ols_fit.resid) / np.sqrt(n-p-1)
+    OLS_3TC = ols_fit.params
+
+    # Design matrix
+    # Columns are site / amino acid pairs
+
+
+    #solving the Lasso at theoretical lambda
+    tau = 1.0
+    print(tau**2)
+    random_Z = np.random.normal(loc=0.0, scale= tau, size= p)
+    sel = selection(X, Y, random_Z, sigma=sigma_3TC)
+
+    lam, epsilon, active, betaE, cube, initial_soln = sel
+
+    print("value of tuning parameter",lam)
+    print("nactive", active.sum())
+
+    active_set_0 = [NRTI_muts[i] for i in range(p) if active[i]]
+    print("active variables", active_set_0)
+    active_set = [i for i in range(p) if active[i]]
+
+    noise_variance = sigma_3TC**2
+    nactive = betaE.shape[0]
+    active_sign = np.sign(betaE)
+    feasible_point = np.fabs(betaE)
+    lagrange = lam * np.ones(p)
+
+    generative_X = X[:, active]
+    prior_variance = 1000.
+    randomizer = randomization.isotropic_gaussian((p,), 1.)
+
+    Q = np.linalg.inv(prior_variance* (generative_X.dot(generative_X.T)) + noise_variance* np.identity(n))
+    post_mean = prior_variance * ((generative_X.T.dot(Q)).dot(Y))
+    post_var = prior_variance* np.identity(nactive) - ((prior_variance**2)*(generative_X.T.dot(Q).dot(generative_X)))
+    unadjusted_intervals = np.vstack([post_mean - 1.65*(post_var.diagonal()),post_mean + 1.65*(post_var.diagonal())])
+    unadjusted_intervals = np.vstack([post_mean, unadjusted_intervals])
+    #print(unadjusted_intervals)
+
+    grad_map = sel_prob_gradient_map_lasso(X,
+                                           feasible_point,
+                                           active,
+                                           active_sign,
+                                           lagrange,
+                                           generative_X,
+                                           noise_variance,
+                                           randomizer,
+                                           epsilon)
+
+    inf = selective_inf_lasso(Y, grad_map, prior_variance)
+
+    #map = inf.map_solve(nstep = 500)[::-1]
+
+    toc = time.time()
+    samples = inf.posterior_samples()
+    tic = time.time()
+    print('sampling time', tic - toc)
+
+    adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
+    sel_mean = np.mean(samples, axis=0)
+    adjusted_intervals = np.vstack([sel_mean, adjusted_intervals])
+
+    print("active variables", active_set_0)
+    print("selective mean", sel_mean)
+    #print("selective map", map[1])
+    print("selective map and intervals", adjusted_intervals)
+    print("usual posterior based map & intervals", unadjusted_intervals)
+
+    intervals = np.vstack([unadjusted_intervals, adjusted_intervals])
+
+    ###################################################################################
+
+    un_mean = intervals[0,:]
+    un_lower_error = list(un_mean-intervals[1,:])
+    un_upper_error = list(intervals[2,:]-un_mean)
+    unStd = [un_lower_error, un_upper_error]
+
+    ad_mean = intervals[3,:]
+    ad_lower_error = list(ad_mean-intervals[4,:])
+    ad_upper_error = list(intervals[5,:]- ad_mean)
+    adStd = [ad_lower_error, ad_upper_error]
+
+
+    N = len(un_mean)               # number of data entries
+    ind = np.arange(N)              # the x locations for the groups
+    width = 0.35                    # bar width
+
+    width_0 = 0.10
+
+    print('here')
+
+    fig, ax = plt.subplots()
+
+    rects1 = ax.bar(ind, un_mean,                  # data
+                    width,                          # bar width
+                    color='royalblue',        # bar colour
+                    yerr=unStd,  # data for error bars
+                    error_kw={'ecolor':'darkblue',    # error-bars colour
+                              'linewidth':2})       # error-bar width
+
+    rects2 = ax.bar(ind + width, ad_mean,
+                    width,
+                    color='red',
+                    yerr=adStd,
+                    error_kw={'ecolor':'maroon',
+                              'linewidth':2})
+
+    axes = plt.gca()
+    axes.set_ylim([-8, 70])             # y-axis bounds
+
+    ax.set_ylabel(' ')
+    ax.set_title('selected variables'.format(active_set))
+    ax.set_xticks(ind + 1.2* width)
+
+    ax.set_xticklabels(active_set_0, rotation=90)
+
+
+    #ax.set_xticklabels(('Coef1', 'Coef2', 'Coef3', 'Coef4', 'Coef5', 'Coef6'))
+
+    ax.legend((rects1[0], rects2[0]), ('Unadjusted', 'Adjusted'), loc='upper left')
+
+    print('here')
+
+    #def autolabel(rects):
+    #    for rect in rects:
+    #        height = rect.get_height()
+    #        ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
+    #                '%d' % int(height),
+    #                ha='center',            # vertical alignment
+    #                va='bottom'             # horizontal alignment
+    #                )
+
+    #autolabel(rects1)
+    #autolabel(rects2)
+
+    #plt.show()                              # render the plot
+
+    plt.savefig('/Users/snigdhapanigrahi/Results_reduced_optimization/credible_hiv_selected_0.pdf', bbox_inches='tight')
+
+    ##################################################
+    ind = np.zeros(len(active_set), np.bool)
+
+    index = active_set_0.index('P184V')
+    ind[index] = 1
+
+    active_set_0.pop(index)
+
+    active_set.pop(index)
+
+    intervals = intervals[:, ~ind]
+
+
+    un_mean = intervals[0,:]
+    un_lower_error = list(un_mean-intervals[1,:])
+    un_upper_error = list(intervals[2,:]-un_mean)
+    unStd = [un_lower_error, un_upper_error]
+    ad_mean = intervals[3,:]
+    ad_lower_error = list(ad_mean-intervals[4,:])
+    ad_upper_error = list(intervals[5,:]- ad_mean)
+    adStd = [ad_lower_error, ad_upper_error]
+
+
+    N = len(un_mean)               # number of data entries
+    ind = np.arange(N)              # the x locations for the groups
+    width = 0.35                    # bar width
+
+    print('here')
+
+    fig, ax = plt.subplots()
+
+    rects1 = ax.bar(ind, un_mean,                  # data
+                    width,                          # bar width
+                    color='royalblue',        # bar colour
+                    yerr=unStd,  # data for error bars
+                    error_kw={'ecolor':'darkblue',    # error-bars colour
+                              'linewidth':2})       # error-bar width
+
+    rects2 = ax.bar(ind + width, ad_mean,
+                    width,
+                    color='red',
+                    yerr=adStd,
+                    error_kw={'ecolor':'maroon',
+                              'linewidth':2})
+
+    axes = plt.gca()
+    axes.set_ylim([-8, 12])             # y-axis bounds
+
+    ax.set_ylabel(' ')
+    ax.set_title('selected variables'.format(active_set))
+    ax.set_xticks(ind + 1.2* width)
+
+    ax.set_xticklabels(active_set_0, rotation=90)
+
+    ax.legend((rects1[0], rects2[0]), ('Unadjusted', 'Adjusted'), loc='upper right')
+
+    print('here')
+
+    plt.savefig('/Users/snigdhapanigrahi/Results_reduced_optimization/credible_hiv_selected_1.pdf', bbox_inches='tight')
diff --git a/selection/reduced_optimization/tests/lasso_selection.py b/sandbox/bayesian/lasso_selection.py
similarity index 100%
rename from selection/reduced_optimization/tests/lasso_selection.py
rename to sandbox/bayesian/lasso_selection.py
diff --git a/selection/reduced_optimization/tests/logistic_bayesian.py b/sandbox/bayesian/logistic_bayesian.py
similarity index 100%
rename from selection/reduced_optimization/tests/logistic_bayesian.py
rename to sandbox/bayesian/logistic_bayesian.py
diff --git a/selection/reduced_optimization/tests/mixed_model.py b/sandbox/bayesian/mixed_model.py
similarity index 100%
rename from selection/reduced_optimization/tests/mixed_model.py
rename to sandbox/bayesian/mixed_model.py
diff --git a/selection/reduced_optimization/tests/ms_lasso_2stage.py b/sandbox/bayesian/ms_lasso_2stage.py
similarity index 100%
rename from selection/reduced_optimization/tests/ms_lasso_2stage.py
rename to sandbox/bayesian/ms_lasso_2stage.py
diff --git a/selection/reduced_optimization/tests/random_reduced_lasso_bayesian_model.py b/sandbox/bayesian/random_reduced_lasso_bayesian_model.py
similarity index 100%
rename from selection/reduced_optimization/tests/random_reduced_lasso_bayesian_model.py
rename to sandbox/bayesian/random_reduced_lasso_bayesian_model.py
diff --git a/selection/reduced_optimization/tests/random_reduced_lasso_test.py b/sandbox/bayesian/random_reduced_lasso_test.py
similarity index 100%
rename from selection/reduced_optimization/tests/random_reduced_lasso_test.py
rename to sandbox/bayesian/random_reduced_lasso_test.py
diff --git a/selection/reduced_optimization/tests/random_reduced_logistic_test.py b/sandbox/bayesian/random_reduced_logistic_test.py
similarity index 100%
rename from selection/reduced_optimization/tests/random_reduced_logistic_test.py
rename to sandbox/bayesian/random_reduced_logistic_test.py
diff --git a/selection/reduced_optimization/tests/read_file.py b/sandbox/bayesian/read_file.py
similarity index 100%
rename from selection/reduced_optimization/tests/read_file.py
rename to sandbox/bayesian/read_file.py
diff --git a/selection/reduced_optimization/tests/reduced_forward_stepwise_test.py b/sandbox/bayesian/reduced_forward_stepwise_test.py
similarity index 100%
rename from selection/reduced_optimization/tests/reduced_forward_stepwise_test.py
rename to sandbox/bayesian/reduced_forward_stepwise_test.py
diff --git a/selection/reduced_optimization/tests/reduced_lasso_bayesian_model.py b/sandbox/bayesian/reduced_lasso_bayesian_model.py
similarity index 100%
rename from selection/reduced_optimization/tests/reduced_lasso_bayesian_model.py
rename to sandbox/bayesian/reduced_lasso_bayesian_model.py
diff --git a/selection/reduced_optimization/tests/reduced_marginal_screening.py b/sandbox/bayesian/reduced_marginal_screening.py
similarity index 100%
rename from selection/reduced_optimization/tests/reduced_marginal_screening.py
rename to sandbox/bayesian/reduced_marginal_screening.py
diff --git a/selection/randomized/tests/__init__.py b/selection/randomized/tests/__init__.py
index b7537336e..66ecfa8ef 100644
--- a/selection/randomized/tests/__init__.py
+++ b/selection/randomized/tests/__init__.py
@@ -1,4 +1,4 @@
 import numpy as np
 
-from selection.tests.decorators import wait_for_return_value, set_sampling_params_iftrue
-from selection.tests.instance import logistic_instance, gaussian_instance
+from ...tests.decorators import wait_for_return_value, set_sampling_params_iftrue
+from ...tests.instance import logistic_instance, gaussian_instance
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index f8c959173..39e692c1a 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -1,24 +1,25 @@
 import numpy as np
 import pandas as pd
+from statsmodels.sandbox.stats.multicomp import multipletests
+
 import regreg.api as rr
+
 from selection.api import (randomization,
                            glm_group_lasso,
                            multiple_queries,
                            glm_target)
-from selection.tests.instance import (gaussian_instance,
+from ...tests.instance import (gaussian_instance,
                                       logistic_instance)
 
-from selection.randomized.query import naive_confidence_intervals
-from selection.randomized.query import naive_pvalues
+from ..query import naive_confidence_intervals, naive_pvalues
 
-import selection.tests.reports as reports
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (wait_for_return_value, 
-                                        set_seed_iftrue, 
-                                        set_sampling_params_iftrue, 
-                                        register_report)
-from selection.randomized.cv_view import CV_view
-from statsmodels.sandbox.stats.multicomp import multipletests
+import ...tests.reports as reports
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (wait_for_return_value, 
+                                 set_seed_iftrue, 
+                                 set_sampling_params_iftrue, 
+                                 register_report)
+from ..cv_view import CV_view
 
 if SMALL_SAMPLES:
     nboot = 10
diff --git a/selection/reduced_optimization/tests/check_carved.py b/selection/reduced_optimization/tests/check_carved.py
deleted file mode 100644
index 0f98103d3..000000000
--- a/selection/reduced_optimization/tests/check_carved.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from __future__ import print_function
-import numpy as np
-import time
-import regreg.api as rr
-from selection.reduced_optimization.estimator import M_estimator_approx_carved
-from selection.tests.instance import logistic_instance, gaussian_instance
-
-
-n = 500
-p = 100
-s = 0
-snr = 0.
-
-X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
-lam = 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-n, p = X.shape
-
-loss = rr.glm.gaussian(X, y)
-total_size = loss.saturated_loss.shape[0]
-subsample_size = int(0.8* total_size)
-epsilon = 1. / np.sqrt(n)
-
-W = np.ones(p) * lam
-penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-M_est = M_estimator_approx_carved(loss, epsilon, subsample_size, penalty, 'parametric')
-M_est.solve_approx()
diff --git a/selection/reduced_optimization/tests/hiv_inference.py b/selection/reduced_optimization/tests/hiv_inference.py
deleted file mode 100644
index cdd636ddd..000000000
--- a/selection/reduced_optimization/tests/hiv_inference.py
+++ /dev/null
@@ -1,241 +0,0 @@
-import os, numpy as np, pandas, statsmodels.api as sm
-import time
-import matplotlib.pyplot as plt
-import regreg.api as rr
-from selection.reduced_optimization.initial_soln import selection
-from selection.randomized.api import randomization
-from selection.reduced_optimization.lasso_reduced import nonnegative_softmax_scaled, neg_log_cube_probability, selection_probability_lasso, \
-    sel_prob_gradient_map_lasso, selective_inf_lasso
-
-
-if not os.path.exists("NRTI_DATA.txt"):
-    NRTI = pandas.read_table("http://hivdb.stanford.edu/pages/published_analysis/genophenoPNAS2006/DATA/NRTI_DATA.txt", na_values="NA")
-else:
-    NRTI = pandas.read_table("NRTI_DATA.txt")
-
-NRTI_specific = []
-NRTI_muts = []
-mixtures = np.zeros(NRTI.shape[0])
-for i in range(1,241):
-    d = NRTI['P%d' % i]
-    for mut in np.unique(d):
-        if mut not in ['-','.'] and len(mut) == 1:
-            test = np.equal(d, mut)
-            if test.sum() > 10:
-                NRTI_specific.append(np.array(np.equal(d, mut)))
-                NRTI_muts.append("P%d%s" % (i,mut))
-
-NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts)
-print("here")
-
-# Next, standardize the data, keeping only those where Y is not missing
-
-X_NRTI = np.array(NRTI_specific, np.float)
-Y = NRTI['3TC'] # shorthand
-keep = ~np.isnan(Y).astype(np.bool)
-X_NRTI = X_NRTI[np.nonzero(keep)]; Y=Y[keep]
-Y = np.array(np.log(Y), np.float); Y -= Y.mean()
-X_NRTI -= X_NRTI.mean(0)[None, :]; X_NRTI /= X_NRTI.std(0)[None,:]
-X = X_NRTI # shorthand
-n, p = X.shape
-X /= np.sqrt(n)
-
-ols_fit = sm.OLS(Y, X).fit()
-sigma_3TC = np.linalg.norm(ols_fit.resid) / np.sqrt(n-p-1)
-OLS_3TC = ols_fit.params
-
-# Design matrix
-# Columns are site / amino acid pairs
-
-
-#solving the Lasso at theoretical lambda
-tau = 1.0
-print(tau**2)
-random_Z = np.random.normal(loc=0.0, scale= tau, size= p)
-sel = selection(X, Y, random_Z, sigma=sigma_3TC)
-
-lam, epsilon, active, betaE, cube, initial_soln = sel
-
-print("value of tuning parameter",lam)
-print("nactive", active.sum())
-
-active_set_0 = [NRTI_muts[i] for i in range(p) if active[i]]
-print("active variables", active_set_0)
-active_set = [i for i in range(p) if active[i]]
-
-noise_variance = sigma_3TC**2
-nactive = betaE.shape[0]
-active_sign = np.sign(betaE)
-feasible_point = np.fabs(betaE)
-lagrange = lam * np.ones(p)
-
-generative_X = X[:, active]
-prior_variance = 1000.
-randomizer = randomization.isotropic_gaussian((p,), 1.)
-
-Q = np.linalg.inv(prior_variance* (generative_X.dot(generative_X.T)) + noise_variance* np.identity(n))
-post_mean = prior_variance * ((generative_X.T.dot(Q)).dot(Y))
-post_var = prior_variance* np.identity(nactive) - ((prior_variance**2)*(generative_X.T.dot(Q).dot(generative_X)))
-unadjusted_intervals = np.vstack([post_mean - 1.65*(post_var.diagonal()),post_mean + 1.65*(post_var.diagonal())])
-unadjusted_intervals = np.vstack([post_mean, unadjusted_intervals])
-#print(unadjusted_intervals)
-
-grad_map = sel_prob_gradient_map_lasso(X,
-                                       feasible_point,
-                                       active,
-                                       active_sign,
-                                       lagrange,
-                                       generative_X,
-                                       noise_variance,
-                                       randomizer,
-                                       epsilon)
-
-inf = selective_inf_lasso(Y, grad_map, prior_variance)
-
-#map = inf.map_solve(nstep = 500)[::-1]
-
-toc = time.time()
-samples = inf.posterior_samples()
-tic = time.time()
-print('sampling time', tic - toc)
-
-adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
-sel_mean = np.mean(samples, axis=0)
-adjusted_intervals = np.vstack([sel_mean, adjusted_intervals])
-
-print("active variables", active_set_0)
-print("selective mean", sel_mean)
-#print("selective map", map[1])
-print("selective map and intervals", adjusted_intervals)
-print("usual posterior based map & intervals", unadjusted_intervals)
-
-intervals = np.vstack([unadjusted_intervals, adjusted_intervals])
-
-###################################################################################
-
-un_mean = intervals[0,:]
-un_lower_error = list(un_mean-intervals[1,:])
-un_upper_error = list(intervals[2,:]-un_mean)
-unStd = [un_lower_error, un_upper_error]
-
-ad_mean = intervals[3,:]
-ad_lower_error = list(ad_mean-intervals[4,:])
-ad_upper_error = list(intervals[5,:]- ad_mean)
-adStd = [ad_lower_error, ad_upper_error]
-
-
-N = len(un_mean)               # number of data entries
-ind = np.arange(N)              # the x locations for the groups
-width = 0.35                    # bar width
-
-width_0 = 0.10
-
-print('here')
-
-fig, ax = plt.subplots()
-
-rects1 = ax.bar(ind, un_mean,                  # data
-                width,                          # bar width
-                color='royalblue',        # bar colour
-                yerr=unStd,  # data for error bars
-                error_kw={'ecolor':'darkblue',    # error-bars colour
-                          'linewidth':2})       # error-bar width
-
-rects2 = ax.bar(ind + width, ad_mean,
-                width,
-                color='red',
-                yerr=adStd,
-                error_kw={'ecolor':'maroon',
-                          'linewidth':2})
-
-axes = plt.gca()
-axes.set_ylim([-8, 70])             # y-axis bounds
-
-ax.set_ylabel(' ')
-ax.set_title('selected variables'.format(active_set))
-ax.set_xticks(ind + 1.2* width)
-
-ax.set_xticklabels(active_set_0, rotation=90)
-
-
-#ax.set_xticklabels(('Coef1', 'Coef2', 'Coef3', 'Coef4', 'Coef5', 'Coef6'))
-
-ax.legend((rects1[0], rects2[0]), ('Unadjusted', 'Adjusted'), loc='upper left')
-
-print('here')
-
-#def autolabel(rects):
-#    for rect in rects:
-#        height = rect.get_height()
-#        ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
-#                '%d' % int(height),
-#                ha='center',            # vertical alignment
-#                va='bottom'             # horizontal alignment
-#                )
-
-#autolabel(rects1)
-#autolabel(rects2)
-
-#plt.show()                              # render the plot
-
-plt.savefig('/Users/snigdhapanigrahi/Results_reduced_optimization/credible_hiv_selected_0.pdf', bbox_inches='tight')
-
-##################################################
-ind = np.zeros(len(active_set), np.bool)
-
-index = active_set_0.index('P184V')
-ind[index] = 1
-
-active_set_0.pop(index)
-
-active_set.pop(index)
-
-intervals = intervals[:, ~ind]
-
-
-un_mean = intervals[0,:]
-un_lower_error = list(un_mean-intervals[1,:])
-un_upper_error = list(intervals[2,:]-un_mean)
-unStd = [un_lower_error, un_upper_error]
-ad_mean = intervals[3,:]
-ad_lower_error = list(ad_mean-intervals[4,:])
-ad_upper_error = list(intervals[5,:]- ad_mean)
-adStd = [ad_lower_error, ad_upper_error]
-
-
-N = len(un_mean)               # number of data entries
-ind = np.arange(N)              # the x locations for the groups
-width = 0.35                    # bar width
-
-print('here')
-
-fig, ax = plt.subplots()
-
-rects1 = ax.bar(ind, un_mean,                  # data
-                width,                          # bar width
-                color='royalblue',        # bar colour
-                yerr=unStd,  # data for error bars
-                error_kw={'ecolor':'darkblue',    # error-bars colour
-                          'linewidth':2})       # error-bar width
-
-rects2 = ax.bar(ind + width, ad_mean,
-                width,
-                color='red',
-                yerr=adStd,
-                error_kw={'ecolor':'maroon',
-                          'linewidth':2})
-
-axes = plt.gca()
-axes.set_ylim([-8, 12])             # y-axis bounds
-
-ax.set_ylabel(' ')
-ax.set_title('selected variables'.format(active_set))
-ax.set_xticks(ind + 1.2* width)
-
-ax.set_xticklabels(active_set_0, rotation=90)
-
-ax.legend((rects1[0], rects2[0]), ('Unadjusted', 'Adjusted'), loc='upper right')
-
-print('here')
-
-plt.savefig('/Users/snigdhapanigrahi/Results_reduced_optimization/credible_hiv_selected_1.pdf', bbox_inches='tight')
diff --git a/selection/reduced_optimization/tests/test_carved.py b/selection/reduced_optimization/tests/test_carved.py
new file mode 100644
index 000000000..cca8675f9
--- /dev/null
+++ b/selection/reduced_optimization/tests/test_carved.py
@@ -0,0 +1,31 @@
+import numpy as np
+import regreg.api as rr
+
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_seed_iftrue, 
+                                 set_sampling_params_iftrue)
+
+from ..estimator import M_estimator_approx_carved
+from ...tests.instance import logistic_instance, gaussian_instance
+
+@set_seed_iftrue(SET_SEED)
+def test_carved():
+    n = 500
+    p = 100
+    s = 0
+    signal = 0.
+
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, signal=signal)
+    lam = 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+    n, p = X.shape
+
+    loss = rr.glm.gaussian(X, y)
+    total_size = loss.saturated_loss.shape[0]
+    subsample_size = int(0.8* total_size)
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
+    M_est = M_estimator_approx_carved(loss, epsilon, subsample_size, penalty, 'parametric')
+    M_est.solve_approx()
diff --git a/selection/reduced_optimization/tests/forward_stepwise_bayesian.py b/selection/reduced_optimization/tests/test_fs_bayesian.py
similarity index 100%
rename from selection/reduced_optimization/tests/forward_stepwise_bayesian.py
rename to selection/reduced_optimization/tests/test_fs_bayesian.py

From 5cb2f4e44e9ac96c80f58892e150195cf9cda587 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 10:26:43 -0700
Subject: [PATCH 111/617] WIP: more work on bayesian tests

---
 .../tests/test_carved_bayesian.py             | 93 ++++++++++---------
 1 file changed, 48 insertions(+), 45 deletions(-)
 rename sandbox/bayesian/carved_bayesian.py => selection/reduced_optimization/tests/test_carved_bayesian.py (76%)

diff --git a/sandbox/bayesian/carved_bayesian.py b/selection/reduced_optimization/tests/test_carved_bayesian.py
similarity index 76%
rename from sandbox/bayesian/carved_bayesian.py
rename to selection/reduced_optimization/tests/test_carved_bayesian.py
index 5f86aed66..c17dc0428 100644
--- a/sandbox/bayesian/carved_bayesian.py
+++ b/selection/reduced_optimization/tests/test_carved_bayesian.py
@@ -1,18 +1,24 @@
 from __future__ import print_function
+import sys
+import os
+
 import numpy as np
-import time
 import regreg.api as rr
-from selection.reduced_optimization.initial_soln import selection
-from selection.tests.instance import logistic_instance, gaussian_instance
 
-from selection.reduced_optimization.par_carved_reduced import selection_probability_carved, sel_inf_carved
-
-#from selection.reduced_optimization.estimator import M_estimator_approx_carved
-from selection.randomized.M_estimator import M_estimator, M_estimator_split
-from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
-
-import sys
-import os
+from selection.api import randomization
+from ..initial_soln import selection, instance
+from ..lasso_reduced import (nonnegative_softmax_scaled, 
+                             neg_log_cube_probability, 
+                             selection_probability_lasso, 
+                             sel_prob_gradient_map_lasso, 
+                             selective_inf_lasso)
+from ..par_carved_reduced import selection_probability_carved, sel_inf_carved
+from ...randomized.M_estimator import M_estimator, M_estimator_split
+from ...randomized.glm import pairs_bootstrap_glm, bootstrap_cov
+
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                                        set_seed_iftrue)
 
 def generate_data_random(n, p, sigma=1., rho=0., scale =True, center=True):
 
@@ -100,7 +106,9 @@ def carved_lasso_trial(X,
                        beta,
                        sigma,
                        lam,
-                       estimation='parametric'):
+                       estimation='parametric',
+                       ndraw=1000,
+                       burnin=100):
     n, p = X.shape
 
     loss = rr.glm.gaussian(X, y)
@@ -120,7 +128,7 @@ def carved_lasso_trial(X,
 
     if nactive >= 1:
         prior_variance = 1000.
-        noise_variance = sigma ** 2
+        noise_variance = sigma**2
         projection_active = X[:, active].dot(np.linalg.inv(X[:, active].T.dot(X[:, active])))
         M_1 = prior_variance * (X.dot(X.T)) + noise_variance * np.identity(n)
         M_2 = prior_variance * ((X.dot(X.T)).dot(projection_active))
@@ -134,7 +142,7 @@ def carved_lasso_trial(X,
         unadjusted_intervals = np.vstack([post_mean - 1.65 * (np.sqrt(post_var.diagonal())),
                                           post_mean + 1.65 * (np.sqrt(post_var.diagonal()))])
         grad_lasso = sel_inf_carved(M_est, prior_variance)
-        samples = grad_lasso.posterior_samples()
+        samples = grad_lasso.posterior_samples(langevin_steps=ndraw, burnin=burnin)
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
         selective_mean = np.mean(samples, axis=0)
@@ -165,16 +173,11 @@ def carved_lasso_trial(X,
     else:
         return np.vstack([0.,0.,0.,0.,0.,0.])
 
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+def test_carved_bayesian(ndraw=1000,
+                         burnin=100):
 
-if __name__ == "__main__":
-
-    # # read from command line
-    # seedn = int(sys.argv[1])
-    # outdir = sys.argv[2]
-    #
-    # outfile = os.path.join(outdir, "list_result_" + str(seedn) + ".txt")
-
-    ### set parameters
     n = 1000
     p = 100
     s = 0
@@ -188,28 +191,28 @@ def carved_lasso_trial(X,
     ad_risk = 0.
     unad_risk = 0.
 
-    for i in range(niter):
-        np.random.seed(i)
-        X, y, beta, sigma = generate_data_random(n=n, p=p)
-        lam = 0.8 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-        lasso = carved_lasso_trial(X,
-                                   y,
-                                   beta,
-                                   sigma,
-                                   lam)
-
-        ad_cov += lasso[0, 0]
-        unad_cov += lasso[1, 0]
-        ad_len += lasso[2, 0]
-        unad_len += lasso[3, 0]
-        ad_risk += lasso[4, 0]
-        unad_risk += lasso[5, 0]
-
-        print("\n")
-        print("iteration completed", i)
-        print("adjusted and unadjusted coverage", ad_cov, unad_cov)
-        print("adjusted and unadjusted lengths", ad_len, unad_len)
-        print("adjusted and unadjusted risks", ad_risk, unad_risk)
+    X, y, beta, sigma = generate_data_random(n=n, p=p)
+    lam = 0.8 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    lasso = carved_lasso_trial(X,
+                               y,
+                               beta,
+                               sigma,
+                               lam,
+                               ndraw=ndraw,
+                               burnin=burnin)
+
+    ad_cov += lasso[0, 0]
+    unad_cov += lasso[1, 0]
+    ad_len += lasso[2, 0]
+    unad_len += lasso[3, 0]
+    ad_risk += lasso[4, 0]
+    unad_risk += lasso[5, 0]
+
+    print("\n")
+    print("iteration completed", i)
+    print("adjusted and unadjusted coverage", ad_cov, unad_cov)
+    print("adjusted and unadjusted lengths", ad_len, unad_len)
+    print("adjusted and unadjusted risks", ad_risk, unad_risk)
 
     print("adjusted and unadjusted coverage", ad_cov, unad_cov)
     print("adjusted and unadjusted lengths", ad_len, unad_len)

From 1d8f1351b501031d8301a668d9e6d03e9e80c784 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 12:08:40 -0700
Subject: [PATCH 112/617] WIP: created step and threshold convenience classes

---
 selection/randomized/convenience.py           | 1136 ++++++++++++++++-
 .../randomized/tests/test_convenience.py      |   85 ++
 selection/randomized/tests/test_cv.py         |    2 +-
 .../randomized/tests/test_greedy_step.py      |   31 +-
 .../randomized/tests/test_multiple_queries.py |   33 +-
 .../randomized/tests/test_threshold_score.py  |   24 +-
 6 files changed, 1235 insertions(+), 76 deletions(-)
 create mode 100644 selection/randomized/tests/test_convenience.py

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index c3fd4004f..6a95e408f 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -7,7 +7,10 @@
 import numpy as np
 import regreg.api as rr
 
-from .glm import target as glm_target, glm_group_lasso
+from .glm import (target as glm_target, 
+                  glm_group_lasso,
+                  glm_greedy_step,
+                  glm_threshold_score)
 from .randomization import randomization
 from .query import multiple_queries
 
@@ -37,7 +40,7 @@ def __init__(self,
                  covariance_estimator=None):
         r"""
 
-        Create a new post-selection dor the LASSO problem
+        Create a new post-selection object for the LASSO problem
 
         Parameters
         ----------
@@ -55,7 +58,7 @@ def __init__(self,
         randomizer_scale : float
             Scale for IID components of randomization.
 
-        randomizer : str
+        randomizer : str (optional)
             One of ['laplace', 'logistic', 'gaussian']
 
         covariance_estimator : callable (optional)
@@ -96,7 +99,7 @@ def __init__(self,
         self.penalty = rr.group_lasso(np.arange(p),
                                       weights=dict(zip(np.arange(p), self.feature_weights)), lagrange=1.)
 
-    def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, marginalize_subgrad=True,
+    def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, 
             views=[]):
         """
         Fit the randomized lasso using `regreg`.
@@ -107,9 +110,6 @@ def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, marginalize_subgrad=True,
         solve_args : keyword args
              Passed to `regreg.problems.simple_problem.solve`.
 
-        marginalize_subgrad : bool 
-             If True, marginalize over inactive coordinates of the subgradient.
-
         views : list
              Other views of the data, e.g. cross-validation.
 
@@ -128,14 +128,40 @@ def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, marginalize_subgrad=True,
         views = copy(views); views.append(self._view)
         self._queries = multiple_queries(views)
         self._queries.solve()
-
-        if marginalize_subgrad == True:
-            self._view.decompose_subgradient(conditioning_groups=np.zeros(p, np.bool),
-                                            marginalizing_groups=np.ones(p, np.bool))
-        
+   
         self.signs = np.sign(self._view.initial_soln)
         return self.signs
 
+    def decompose_subgradient(self,
+                              conditioning_groups=None,
+                              marginalizing_groups=None):
+        """
+
+        Marginalize over some if inactive part of subgradient
+        if applicable.
+
+        Parameters
+        ----------
+
+        conditioning_groups : np.bool
+             Which groups' subgradients should we condition on.
+
+        marginalizing_groups : np.bool
+             Which groups' subgradients should we marginalize over.
+
+        Returns
+        -------
+
+        None
+
+        """
+
+        if not hasattr(self, "_view"):
+            raise ValueError("fit method should be run first")
+
+        self._view.decompose_subgradient(conditioning_groups=conditioning_groups,
+                                         marginalizing_groups=marginalizing_groups)
+
     def summary(self, selected_features, 
                 null_value=None,
                 level=0.9,
@@ -278,7 +304,7 @@ def gaussian(X,
         Returns
         -------
 
-        L : `selection.randomized.lasso.lasso`
+        L : `selection.randomized.convenience.lasso`
         
         Notes
         -----
@@ -299,11 +325,15 @@ def gaussian(X,
         n, p = X.shape
 
         mean_diag = np.mean((X**2).sum(0))
-        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
-        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+        if ridge_term is None:
+            ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
 
         return lasso(loglike, np.asarray(feature_weights) / sigma**2,
-                     ridge_term, randomizer_scale, randomizer=randomizer)
+                     ridge_term, randomizer_scale, randomizer=randomizer,
+                     covariance_estimator=covariance_estimator) # XXX: do we use the covariance_estimator?
 
     @staticmethod
     def logistic(X, 
@@ -311,7 +341,10 @@ def logistic(X,
                  feature_weights, 
                  trials=None, 
                  covariance_estimator=None,
-                 quadratic=None):
+                 quadratic=None,
+                 ridge_term=None,
+                 randomizer='gaussian',
+                 randomizer_scale=None):
         r"""
         Logistic LASSO with feature weights.
 
@@ -366,7 +399,7 @@ def logistic(X,
         Returns
         -------
 
-        L : `selection.randomized.lasso.lasso`
+        L : `selection.randomized.convenience.lasso`
         
         Notes
         -----
@@ -380,15 +413,23 @@ def logistic(X,
         the unpenalized estimator.
 
         """
+        n, p = X.shape
+
         loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
 
         mean_diag = np.mean((X**2).sum(0))
-        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
-        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
 
-        return lasso(loglike, feature_weights, ridge_term, 
+        if ridge_term is None:
+            ridge_term = mean_diag / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 
+
+        return lasso(loglike, feature_weights, 
+                     ridge_term, 
                      randomizer_scale,
-                     covariance_estimator=covariance_estimator)
+                     covariance_estimator=covariance_estimator,
+                     randomizer=randomizer)
 
     @staticmethod
     def coxph(X, 
@@ -396,7 +437,10 @@ def coxph(X,
               status, 
               feature_weights, 
               covariance_estimator=None,
-              quadratic=None):
+              quadratic=None,
+              ridge_term=None,
+              randomizer='gaussian',
+              randomizer_scale=None):
         r"""
         Cox proportional hazards LASSO with feature weights.
 
@@ -450,7 +494,7 @@ def coxph(X,
         Returns
         -------
 
-        L : `selection.randomized.lasso.lasso`
+        L : `selection.randomized.convenience.lasso`
         
         Notes
         -----
@@ -469,11 +513,18 @@ def coxph(X,
         # scale for randomization seems kind of meaningless here...
 
         mean_diag = np.mean((X**2).sum(0))
-        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
-        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
 
-        return lasso(loglike, feature_weights, ridge_term,
-                     randomizer_scale, randomizer=randomizer,
+        if ridge_term is None:
+            ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return lasso(loglike, 
+                     feature_weights, 
+                     ridge_term,
+                     randomizer_scale, 
+                     randomizer=randomizer,
                      covariance_estimator=covariance_estimator)
 
     @staticmethod
@@ -481,7 +532,10 @@ def poisson(X,
                 counts, 
                 feature_weights, 
                 covariance_estimator=None,
-                quadratic=None):
+                quadratic=None,
+                ridge_term=None,
+                randomizer_scale=None,
+                randomizer='gaussian'):
         r"""
         Poisson log-linear LASSO with feature weights.
 
@@ -530,7 +584,7 @@ def poisson(X,
         Returns
         -------
 
-        L : `selection.randomized.lasso.lasso`
+        L : `selection.randomized.convenience.lasso`
         
         Notes
         -----
@@ -544,16 +598,24 @@ def poisson(X,
         the unpenalized estimator.
 
         """
+        n, p = X.shape
         loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
 
         # scale for randomizer seems kind of meaningless here...
 
         mean_diag = np.mean((X**2).sum(0))
-        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
-        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
 
-        return lasso(loglike, feature_weights, ridge_term,
-                     randomizer_scale, randomizer=randomizer,
+        if ridge_term is None:
+            ridge_term = np.std(counts)**2 * mean_diag / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
+
+        return lasso(loglike, 
+                     feature_weights, 
+                     ridge_term,
+                     randomizer_scale, 
+                     randomizer=randomizer,
                      covariance_estimator=covariance_estimator)
 
     @staticmethod
@@ -563,7 +625,9 @@ def sqrt_lasso(X,
                    quadratic=None,
                    covariance='parametric',
                    sigma_estimate='truncated',
-                   solve_args={'min_its':200}):
+                   solve_args={'min_its':200},
+                   randomizer_scale=None,
+                   randomizer='gaussian'):
         r"""
         Use sqrt-LASSO to choose variables.
 
@@ -621,7 +685,7 @@ def sqrt_lasso(X,
         Returns
         -------
 
-        L : `selection.randomized.lasso.lasso`
+        L : `selection.randomized.convenience.lasso`
         
         Notes
         -----
@@ -746,3 +810,1003 @@ def sqrt_lasso(X,
 
         return L
 
+class step(lasso):
+
+    r"""
+    A class for maximizing some coordinates of the
+    randomized score of a GLM. The problem we are
+    solving is
+
+    .. math::
+
+        \text{minimize}_{\eta} (\nabla \ell(\bar{\beta}_E) - \omega)^T\eta
+
+    subject to $\|\eta_g\|_2/w_g \leq 1$ where $w_g$ are group weights.
+    The set of variables $E$ are variables we have partially maximized over
+    and $\bar{\beta}_E$ should be viewed as padded out with zeros
+    over all variables in $E^c$.
+
+    """
+
+
+    def __init__(self, 
+                 loglike, 
+                 feature_weights,
+                 inactive,
+                 randomizer_scale,
+                 active=None,
+                 randomizer='gaussian',
+                 covariance_estimator=None):
+        r"""
+
+        Create a new post-selection for the stepwise problem
+
+        Parameters
+        ----------
+
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is brodcast to all features.
+
+        inactive : np.bool
+            Which groups of variables are candidates
+            for inclusion in this step.
+
+        randomizer_scale : float
+            Scale for IID components of randomization.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+
+        randomizer : str (optional)
+            One of ['laplace', 'logistic', 'gaussian']
+
+        covariance_estimator : callable (optional)
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+
+        self.active = active
+        self.inactive = inactive
+
+        self.loglike = loglike
+        self.nfeature = p = loglike.shape[0]
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+        self.covariance_estimator = covariance_estimator
+
+        nrandom = inactive.sum()
+        if randomizer == 'laplace':
+            self.randomizer = randomization.laplace((nrandom,), scale=randomizer_scale)
+        elif randomizer == 'gaussian':
+            self.randomizer = randomization.isotropic_gaussian((nrandom,),randomizer_scale)
+        elif randomizer == 'logistic':
+            self.randomizer = randomization.logistic((nrandom,), scale=randomizer_scale)
+
+        self.penalty = rr.group_lasso(np.arange(p),
+                                      weights=dict(zip(np.arange(p), self.feature_weights)), lagrange=1.)
+
+    def fit(self, 
+            views=[]):
+        """
+        Find the maximizing group.
+
+        Parameters
+        ----------
+
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+
+        views : list
+             Other views of the data, e.g. cross-validation.
+
+        Returns
+        -------
+
+        sign_beta : np.float
+             Support and non-zero signs of randomized lasso solution.
+             
+        """
+
+        p = self.nfeature
+        self._view = glm_greedy_step(self.loglike, 
+                                     self.penalty, 
+                                     self.active,
+                                     self.inactive,
+                                     self.randomizer)
+        self._view.solve()
+
+        views = copy(views); views.append(self._view)
+        self._queries = multiple_queries(views)
+        self._queries.solve()
+   
+        self.maximizing_group = self._view.selection_variable['maximizing_group']
+        return self.maximizing_group
+
+    def decompose_subgradient(self,
+                              conditioning_groups=None,
+                              marginalizing_groups=None):
+        """
+
+        Marginalize over some if inactive part of subgradient
+        if applicable.
+
+        Parameters
+        ----------
+
+        conditioning_groups : np.bool
+             Which groups' subgradients should we condition on.
+
+        marginalizing_groups : np.bool
+             Which groups' subgradients should we marginalize over.
+
+        Returns
+        -------
+
+        None
+
+        """
+        raise NotImplementedError
+
+    @staticmethod
+    def gaussian(X, 
+                 Y, 
+                 feature_weights, 
+                 inactive=None,
+                 active=None,
+                 covariance_estimator=None,
+                 randomizer_scale=None,
+                 randomizer='gaussian'):
+        r"""
+        Take a step with a Gaussian loglikelihood.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        inactive : np.bool (optional)
+            Which groups of variables are candidates
+            for inclusion in this step. Defaults to ~active.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+            Defaults to `np.zeros(p, np.bool)`.
+
+        covariance_estimator : callable (optional)
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.step`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of some of the
+        rows and columns of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        loglike = rr.glm.gaussian(X, Y)
+        n, p = X.shape
+
+        if active is None:
+            active = np.zeros(p, np.bool)
+        if inactive is None:
+            inactive = ~active
+
+        if randomizer_scale is None:
+            mean_diag = np.mean((X**2).sum(0))
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return step(loglike, 
+                    feature_weights,
+                    inactive, 
+                    randomizer_scale, 
+                    active=active,
+                    randomizer=randomizer,
+                    covariance_estimator=covariance_estimator)  # XXX: do we use the covariance_estimator?
+
+    @staticmethod
+    def logistic(X, 
+                 successes, 
+                 feature_weights, 
+                 active=None,
+                 inactive=None,
+                 trials=None, 
+                 covariance_estimator=None,
+                 randomizer_scale=None,
+                 randomizer='gaussian'):
+        r"""
+        Take a step with a logistic loglikelihood.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        inactive : np.bool (optional)
+            Which groups of variables are candidates
+            for inclusion in this step. Defaults to ~active.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+            Defaults to `np.zeros(p, np.bool)`.
+
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y. 
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.step`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        n, p = X.shape
+        loglike = rr.glm.logistic(X, successes, trials=trials)
+
+        if active is None:
+            active = np.zeros(p, np.bool)
+        if inactive is None:
+            inactive = ~active
+
+        if randomizer_scale is None:
+            mean_diag = np.mean((X**2).sum(0))
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 
+
+        return step(loglike, 
+                    feature_weights, 
+                    inactive,
+                    randomizer_scale,
+                    active=active,
+                    covariance_estimator=covariance_estimator)
+
+    @staticmethod
+    def coxph(X, 
+              times, 
+              status, 
+              feature_weights, 
+              inactive=None,
+              active=None,
+              covariance_estimator=None,
+              randomizer_scale=None,
+              randomizer='gaussian'):
+        r"""
+        Take a step with a Cox partial loglikelihood.
+
+        Uses Efron's tie breaking method.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        times : ndarray
+            Shape (n,) -- the survival times.
+
+        status : ndarray
+            Shape (n,) -- the censoring status.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        inactive : np.bool (optional)
+            Which groups of variables are candidates
+            for inclusion in this step. Defaults to ~active.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+            Defaults to `np.zeros(p, np.bool)`.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        n, p = X.shape
+        loglike = coxph_obj(X, times, status)
+
+        if active is None:
+            active = np.zeros(p, np.bool)
+        if inactive is None:
+            inactive = ~active
+
+        if randomizer_scale is None:
+            randomizer_scale = 1. / np.sqrt(n)
+
+        return step(loglike, 
+                    feature_weights, 
+                    inactive,
+                    randomizer_scale,
+                    active=active,
+                    randomizer=randomizer,
+                    covariance_estimator=covariance_estimator)
+
+    @staticmethod
+    def poisson(X, 
+                counts, 
+                feature_weights, 
+                inactive=None,
+                active=None,
+                covariance_estimator=None,
+                randomizer_scale=None,
+                randomizer='gaussian'):
+        r"""
+        Take a step with a Poisson loglikelihood.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        counts : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        inactive : np.bool (optional)
+            Which groups of variables are candidates
+            for inclusion in this step. Defaults to ~active.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+            Defaults to `np.zeros(p, np.bool)`.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.step`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        n, p = X.shape
+        loglike = rr.glm.poisson(X, counts)
+
+        # scale for randomizer seems kind of meaningless here...
+
+        if active is None:
+            active = np.zeros(p, np.bool)
+        if inactive is None:
+            inactive = ~active
+
+        mean_diag = np.mean((X**2).sum(0))
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
+
+        return step(loglike, 
+                    feature_weights, 
+                    inactive,
+                    randomizer_scale, 
+                    active=active,
+                    randomizer=randomizer,
+                    covariance_estimator=covariance_estimator)
+
+class threshold(lasso):
+
+    r"""
+    A class for thresholding some coordinates of the
+    randomized score of a GLM. The problem we are
+    solving is
+
+    .. math::
+
+        \text{minimize}_{\eta: |\eta_i| \leq \tau_i} \frac{1}{2}\|\nabla \ell(\bar{\beta}_E) + \omega - \eta\|^2_2
+
+    The set of variables $E$ are variables we have partially maximized over
+    and $\bar{\beta}_E$ should be viewed as padded out with zeros
+    over all variables in $E^c$.
+
+    """
+
+    def __init__(self, 
+                 loglike, 
+                 threshold_value,
+                 inactive,
+                 randomizer_scale,
+                 active=None,
+                 randomizer='gaussian',
+                 covariance_estimator=None):
+        r"""
+
+        Create a new post-selection for the stepwise problem
+
+        Parameters
+        ----------
+
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+
+        threshold_value : np.ndarray
+            Thresholding for each feature. If 1d defaults
+            it is treated as a multiple of np.ones.
+
+        inactive : np.bool
+            Which groups of variables are candidates
+            for thresholding.
+
+        randomizer_scale : float
+            Scale for IID components of randomization.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+
+        randomizer : str (optional)
+            One of ['laplace', 'logistic', 'gaussian']
+
+        covariance_estimator : callable (optional)
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+
+        self.active = active
+        self.inactive = inactive
+
+        self.loglike = loglike
+        self.nfeature = p = self.loglike.shape[0]
+
+        if np.asarray(threshold_value).shape == ():
+            threshold = np.ones(loglike.shape) * threshold_value
+        self.threshold_value = np.asarray(threshold_value)
+
+        self.covariance_estimator = covariance_estimator
+
+        if randomizer == 'laplace':
+            self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
+        elif randomizer == 'gaussian':
+            self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
+        elif randomizer == 'logistic':
+            self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
+
+    def fit(self, 
+            views=[]):
+        """
+        Find the maximizing group.
+
+        Parameters
+        ----------
+
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+
+        views : list
+             Other views of the data, e.g. cross-validation.
+
+        Returns
+        -------
+
+        sign_beta : np.float
+             Support and non-zero signs of randomized lasso solution.
+             
+        """
+
+        p = self.nfeature
+        self._view = glm_threshold_score(self.loglike, 
+                                         self.threshold_value,
+                                         self.randomizer,
+                                         self.active,
+                                         self.inactive)
+        self._view.solve()
+
+        views = copy(views); views.append(self._view)
+        self._queries = multiple_queries(views)
+        self._queries.solve()
+   
+        self.boundary = self._view.selection_variable['boundary_set']
+        return self.boundary
+
+    def decompose_subgradient(self,
+                              conditioning_groups=None,
+                              marginalizing_groups=None):
+        """
+
+        Marginalize over some if inactive part of subgradient
+        if applicable.
+
+        Parameters
+        ----------
+
+        conditioning_groups : np.bool
+             Which groups' subgradients should we condition on.
+
+        marginalizing_groups : np.bool
+             Which groups' subgradients should we marginalize over.
+
+        Returns
+        -------
+
+        None
+
+        """
+        raise NotImplementedError
+
+    @staticmethod
+    def gaussian(X, 
+                 Y, 
+                 threshold_value, 
+                 inactive=None,
+                 active=None,
+                 covariance_estimator=None,
+                 randomizer_scale=None,
+                 randomizer='gaussian'):
+        r"""
+        Take a step with a Gaussian loglikelihood.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        threshold_value : [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `threshold` to 0. If `threshold` is 
+            a float, then all parameters are penalized equally.
+
+        inactive : np.bool (optional)
+            Which groups of variables are candidates
+            for inclusion in this step. Defaults to ~active.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+            Defaults to `np.zeros(p, np.bool)`.
+
+        covariance_estimator : callable (optional)
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.threshold`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of some of the
+        rows and columns of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+
+        loglike = rr.glm.gaussian(X, Y)
+        n, p = X.shape
+
+        if active is None:
+            active = np.zeros(p, np.bool)
+        if inactive is None:
+            inactive = ~active
+
+        if randomizer_scale is None:
+            mean_diag = np.mean((X**2).sum(0))
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return threshold(loglike, 
+                         threshold_value,
+                         inactive, 
+                         randomizer_scale, 
+                         active=active,
+                         randomizer=randomizer,
+                         covariance_estimator=covariance_estimator)  # XXX: do we use the covariance_estimator?
+
+    @staticmethod
+    def logistic(X, 
+                 successes, 
+                 threshold_value, 
+                 active=None,
+                 inactive=None,
+                 trials=None, 
+                 covariance_estimator=None,
+                 randomizer_scale=None,
+                 randomizer='gaussian'):
+        r"""
+        Take a step with a logistic loglikelihood.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+
+        threshold_value : [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `threshold` to 0. If `threshold` is 
+            a float, then all parameters are penalized equally.
+
+        inactive : np.bool (optional)
+            Which groups of variables are candidates
+            for inclusion in this step. Defaults to ~active.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+            Defaults to `np.zeros(p, np.bool)`.
+
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y. 
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.threshold`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        n, p = X.shape
+        loglike = rr.glm.logistic(X, successes, trials=trials)
+
+        if active is None:
+            active = np.zeros(p, np.bool)
+        if inactive is None:
+            inactive = ~active
+
+        if randomizer_scale is None:
+            mean_diag = np.mean((X**2).sum(0))
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 
+
+        return threshold(loglike, 
+                         threshold_value,
+                         inactive,
+                         randomizer_scale,
+                         active=active,
+                         covariance_estimator=covariance_estimator)
+
+    @staticmethod
+    def coxph(X, 
+              times, 
+              status, 
+              threshold_value,
+              inactive=None,
+              active=None,
+              covariance_estimator=None,
+              randomizer_scale=None,
+              randomizer='gaussian'):
+        r"""
+        Take a step with a Cox partial loglikelihood.
+
+        Uses Efron's tie breaking method.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        times : ndarray
+            Shape (n,) -- the survival times.
+
+        status : ndarray
+            Shape (n,) -- the censoring status.
+
+        threshold_value : [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `threshold` to 0. If `threshold` is 
+            a float, then all parameters are penalized equally.
+
+        inactive : np.bool (optional)
+            Which groups of variables are candidates
+            for inclusion in this step. Defaults to ~active.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+            Defaults to `np.zeros(p, np.bool)`.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.threshold`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        n, p = X.shape
+        loglike = coxph_obj(X, times, status)
+
+        if active is None:
+            active = np.zeros(p, np.bool)
+        if inactive is None:
+            inactive = ~active
+
+        if randomizer_scale is None:
+            randomizer_scale = 1. / np.sqrt(n)
+
+        return threshold(loglike, 
+                         threshold_value,
+                         inactive,
+                         randomizer_scale,
+                         active=active,
+                         randomizer=randomizer,
+                         covariance_estimator=covariance_estimator)
+
+    @staticmethod
+    def poisson(X, 
+                counts, 
+                threshold_value,
+                inactive=None,
+                active=None,
+                covariance_estimator=None,
+                randomizer_scale=None,
+                randomizer='gaussian'):
+        r"""
+        Take a step with a Poisson loglikelihood.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        counts : ndarray
+            Shape (n,) -- the response.
+
+        threshold_value : [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `threshold` to 0. If `threshold` is 
+            a float, then all parameters are penalized equally.
+
+        inactive : np.bool (optional)
+            Which groups of variables are candidates
+            for inclusion in this step. Defaults to ~active.
+
+        active : np.bool (optional)
+            Which groups of variables make up $E$, the
+            set of variables we partially minimize over.
+            Defaults to `np.zeros(p, np.bool)`.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.threshold`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        n, p = X.shape
+        loglike = rr.glm.poisson(X, counts)
+
+        # scale for randomizer seems kind of meaningless here...
+
+        if active is None:
+            active = np.zeros(p, np.bool)
+        if inactive is None:
+            inactive = ~active
+
+        mean_diag = np.mean((X**2).sum(0))
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
+
+        return threshold(loglike, 
+                         threshold_value,
+                         inactive,
+                         randomizer_scale, 
+                         active=active,
+                         randomizer=randomizer,
+                         covariance_estimator=covariance_estimator)
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
new file mode 100644
index 000000000..902af2709
--- /dev/null
+++ b/selection/randomized/tests/test_convenience.py
@@ -0,0 +1,85 @@
+from itertools import product
+import numpy as np
+import nose.tools as nt
+
+from ..convenience import lasso, step, threshold
+from ...tests.instance import (gaussian_instance,
+                               logistic_instance,
+                               poisson_instance)
+
+def test_lasso_constructors():
+
+    cls = lasso
+    for const_info, rand in product(zip([gaussian_instance,
+                                         logistic_instance,
+                                         poisson_instance],
+                                        [cls.gaussian,
+                                         cls.logistic,
+                                         cls.poisson]),
+                              ['gaussian', 'logistic', 'laplace']):
+
+        inst, const = const_info
+        X, Y = inst()[:2]
+        W = np.ones(X.shape[1])
+        conv = const(X, Y, W, randomizer=rand)
+        conv.fit()
+
+def test_step_constructors():
+
+    cls = step
+    for const_info, rand in product(zip([gaussian_instance,
+                                         logistic_instance,
+                                         poisson_instance],
+                                        [cls.gaussian,
+                                         cls.logistic,
+                                         cls.poisson]),
+                              ['gaussian', 'logistic', 'laplace']):
+
+        inst, const = const_info
+        X, Y = inst()[:2]
+        W = np.ones(X.shape[1])
+        conv = const(X, Y, W)
+        conv.fit()
+
+        n, p = X.shape
+        active = np.zeros(p, np.bool)
+        active[:int(p/2)] = True
+
+        conv = const(X, Y, W, active=active)
+        conv.fit()
+
+        conv = const(X, Y, W, inactive=~active)
+        conv.fit()
+        
+        conv = const(X, Y, W, inactive=~active, active=active)
+        conv.fit()
+        
+
+def test_threshold_constructors():
+
+    cls = threshold
+    for const_info, rand in product(zip([gaussian_instance,
+                                         logistic_instance,
+                                         poisson_instance],
+                                        [cls.gaussian,
+                                         cls.logistic,
+                                         cls.poisson]),
+                              ['gaussian', 'logistic', 'laplace']):
+
+        inst, const = const_info
+        X, Y = inst()[:2]
+        W = np.ones(X.shape[1])
+
+        n, p = X.shape
+        active = np.zeros(p, np.bool)
+        active[:int(p/2)] = True
+
+        conv = const(X, Y, W, active=active)
+        conv.fit()
+
+        conv = const(X, Y, W, inactive=~active)
+        conv.fit()
+        
+        conv = const(X, Y, W, inactive=~active, active=active)
+        conv.fit()
+        
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 39e692c1a..2ec6e88c8 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -27,7 +27,7 @@
     nboot = -1
 
 @register_report(['truth', 'cover', 'ci_length_clt', 'naive_pvalues', 'naive_cover', 'ci_length_naive',
-                    'active', 'BH_decisions', 'active_var'])
+                  'active', 'BH_decisions', 'active_var'])
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
diff --git a/selection/randomized/tests/test_greedy_step.py b/selection/randomized/tests/test_greedy_step.py
index 8ea220f74..b19d61b3b 100644
--- a/selection/randomized/tests/test_greedy_step.py
+++ b/selection/randomized/tests/test_greedy_step.py
@@ -7,23 +7,23 @@
 
 import regreg.api as rr
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (wait_for_return_value, 
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (wait_for_return_value, 
                                         set_seed_iftrue, 
                                         set_sampling_params_iftrue, 
                                         register_report)
-from selection.tests.instance import logistic_instance
-import selection.tests.reports as reports
-
-from selection.randomized.api import (randomization, 
-                                      multiple_queries, 
-                                      pairs_bootstrap_glm, 
-                                      glm_group_lasso, 
-                                      glm_greedy_step, 
-                                      pairs_inactive_score_glm)
-from selection.randomized.glm import bootstrap_cov
-from selection.distributions.discrete_family import discrete_family
-from selection.sampling.langevin import projected_langevin
+from ...tests.instance import logistic_instance
+import ...tests.reports as reports
+
+from ..api import (randomization, 
+                   multiple_queries, 
+                   pairs_bootstrap_glm, 
+                   glm_group_lasso, 
+                   glm_greedy_step, 
+                   pairs_inactive_score_glm)
+from ..glm import bootstrap_cov
+from ...distributions.discrete_family import discrete_family
+from ...sampling.langevin import projected_langevin
 
 @register_report(['pvalue', 'active'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
@@ -59,7 +59,8 @@ def test_overall_null_two_queries(ndraw=10000, burnin=2000, nsim=None): # nsim n
     inactive = ~active
     inactive_randomizer = randomization.laplace((inactive.sum(),), scale=0.5)
 
-    step = glm_greedy_step(loss, penalty,
+    step = glm_greedy_step(loss, 
+                           penalty,
                            active,
                            inactive,
                            inactive_randomizer)
diff --git a/selection/randomized/tests/test_multiple_queries.py b/selection/randomized/tests/test_multiple_queries.py
index 963413c01..adc0677cf 100644
--- a/selection/randomized/tests/test_multiple_queries.py
+++ b/selection/randomized/tests/test_multiple_queries.py
@@ -2,19 +2,28 @@
 import numpy as np
 import pandas as pd
 import regreg.api as rr
-import selection.tests.reports as reports
 
-
-from selection.tests.flags import SET_SEED, SMALL_SAMPLES
-from selection.tests.instance import logistic_instance
-from selection.tests.decorators import (wait_for_return_value, 
-                                        set_seed_iftrue, 
-                                        set_sampling_params_iftrue,
-                                        register_report)
-import selection.tests.reports as reports
-
-from selection.api import randomization, glm_group_lasso, pairs_bootstrap_glm, multiple_queries, discrete_family, projected_langevin, glm_group_lasso_parametric, glm_target
-from selection.randomized.glm import glm_parametric_covariance, glm_nonparametric_bootstrap, restricted_Mest, set_alpha_matrix
+import ...tests.reports as reports
+from ...tests.flags import SET_SEED, SMALL_SAMPLES
+from ...tests.instance import logistic_instance
+from ...tests.decorators import (wait_for_return_value, 
+                                 set_seed_iftrue, 
+                                 set_sampling_params_iftrue,
+                                 register_report)
+import ...tests.reports as reports
+
+from ...api import (randomization, 
+                    glm_group_lasso, 
+                    pairs_bootstrap_glm, 
+                    multiple_queries, 
+                    discrete_family, 
+                    projected_langevin, 
+                    glm_group_lasso_parametric, 
+                    glm_target)
+from ..glm import (glm_parametric_covariance, 
+                   glm_nonparametric_bootstrap, 
+                   restricted_Mest, 
+                   set_alpha_matrix)
 
 @register_report(['truth', 'active'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
diff --git a/selection/randomized/tests/test_threshold_score.py b/selection/randomized/tests/test_threshold_score.py
index 715462d9f..c7e6f742b 100644
--- a/selection/randomized/tests/test_threshold_score.py
+++ b/selection/randomized/tests/test_threshold_score.py
@@ -3,24 +3,24 @@
 
 import regreg.api as rr
 
-from selection.tests.decorators import (wait_for_return_value, 
+from ...tests.decorators import (wait_for_return_value, 
                                         set_seed_iftrue, 
                                         set_sampling_params_iftrue,
                                         register_report)
-import selection.tests.reports as reports
-from selection.tests.flags import SET_SEED, SMALL_SAMPLES
+import ...tests.reports as reports
+from ...tests.flags import SET_SEED, SMALL_SAMPLES
+from ...tests.instance import logistic_instance
 
-from selection.randomized.api import (randomization, 
-                                      multiple_queries, 
-                                      pairs_bootstrap_glm, 
-                                      glm_nonparametric_bootstrap,
-                                      glm_threshold_score)
+from ...distributions.discrete_family import discrete_family
+from ...sampling.langevin import projected_langevin
 
-from selection.randomized.glm import bootstrap_cov
-from selection.distributions.discrete_family import discrete_family
-from selection.sampling.langevin import projected_langevin
+from ..api import (randomization, 
+                   multiple_queries, 
+                   pairs_bootstrap_glm, 
+                   glm_nonparametric_bootstrap,
+                   glm_threshold_score)
+from ..glm import bootstrap_cov
 
-from selection.tests.instance import logistic_instance
 
 
 @register_report(['pvalue', 'active'])

From 7d57e3c917100a689a0ac599f0a31e9ad01b13f8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 12:34:08 -0700
Subject: [PATCH 113/617] WIP: convenience classes are not all sampling
 properly

---
 selection/randomized/M_estimator.py           |  2 +-
 selection/randomized/convenience.py           |  9 ++-
 selection/randomized/greedy_step.py           |  9 ++-
 .../randomized/tests/test_convenience.py      | 79 +++++++++++++++----
 selection/randomized/tests/test_cv.py         | 10 +--
 selection/randomized/threshold_score.py       | 22 ++----
 6 files changed, 88 insertions(+), 43 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index c662774a2..086fcb117 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -367,7 +367,7 @@ def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
         conditioning_groups and marginalizing_groups should be disjoint
         """
 
-        if (conditioning_groups * marginalizing_groups).sum() > 0:
+        if marginalizing_groups is not None and (conditioning_groups * marginalizing_groups).sum() > 0:
             raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
 
         if not self._setup:
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 6a95e408f..ec141a823 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -1391,16 +1391,17 @@ def __init__(self,
 
         if np.asarray(threshold_value).shape == ():
             threshold = np.ones(loglike.shape) * threshold_value
-        self.threshold_value = np.asarray(threshold_value)
+        self.threshold_value = np.asarray(threshold_value)[self.inactive]
 
         self.covariance_estimator = covariance_estimator
 
+        nrandom = inactive.sum()
         if randomizer == 'laplace':
-            self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
+            self.randomizer = randomization.laplace((nrandom,), scale=randomizer_scale)
         elif randomizer == 'gaussian':
-            self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
+            self.randomizer = randomization.isotropic_gaussian((nrandom,),randomizer_scale)
         elif randomizer == 'logistic':
-            self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
+            self.randomizer = randomization.logistic((nrandom,), scale=randomizer_scale)
 
     def fit(self, 
             views=[]):
diff --git a/selection/randomized/greedy_step.py b/selection/randomized/greedy_step.py
index b1a8fa582..1f9909691 100644
--- a/selection/randomized/greedy_step.py
+++ b/selection/randomized/greedy_step.py
@@ -35,12 +35,13 @@ def __init__(self,
                               beta_active)
          
         self.active = np.zeros(self.loss.shape, np.bool)
+        self.inactive = np.zeros(self.loss.shape, np.bool)
         for i, g in enumerate(np.unique(self.penalty.groups)):
             if self.active_groups[i]:
                 self.active[self.penalty.groups == g] = True
-
-        self.inactive = ~self.active
-
+            elif self.inactive_groups[i]:
+                self.inactive[self.penalty.groups == g] = True
+                
         # we form a dual group lasso object
         # to compute the max score
 
@@ -76,6 +77,8 @@ def solve(self, nboot=2000):
         self.observed_score_state = - self.loss.smooth_objective(beta_full, 'grad')[inactive]
         self._randomZ = self.randomization.sample()
 
+        self.num_opt_var = self._randomZ.shape[0]
+
         # find the randomized maximizer
 
         randomized_score = self.observed_score_state - self._randomZ
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index 902af2709..bc280cba2 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -6,8 +6,11 @@
 from ...tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue 
 
-def test_lasso_constructors():
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+def test_lasso_constructors(ndraw=1000, burnin=200):
 
     cls = lasso
     for const_info, rand in product(zip([gaussian_instance,
@@ -20,11 +23,34 @@ def test_lasso_constructors():
 
         inst, const = const_info
         X, Y = inst()[:2]
+        n, p = X.shape
+
         W = np.ones(X.shape[1])
         conv = const(X, Y, W, randomizer=rand)
-        conv.fit()
+        signs = conv.fit()
+
+        marginalizing_groups = np.zeros(p, np.bool)
+        marginalizing_groups[:int(p/2)] = True
+        
+        conditioning_groups = ~marginalizing_groups
+        conditioning_groups[-int(p/4):] = False
+
+        selected_features = np.zeros(p, np.bool)
+        selected_features[:3] = True
 
-def test_step_constructors():
+        conv.summary(selected_features,
+                     ndraw=ndraw,
+                     burnin=burnin)
+
+        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+                                   conditioning_groups=conditioning_groups)
+
+        conv.summary(selected_features,
+                     ndraw=ndraw,
+                     burnin=burnin)
+
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+def test_step_constructors(ndraw=1000, burnin=200):
 
     cls = step
     for const_info, rand in product(zip([gaussian_instance,
@@ -45,17 +71,27 @@ def test_step_constructors():
         active = np.zeros(p, np.bool)
         active[:int(p/2)] = True
 
-        conv = const(X, Y, W, active=active)
-        conv.fit()
+        inactive = ~active
+        inactive[-int(p/4):] = False
 
-        conv = const(X, Y, W, inactive=~active)
-        conv.fit()
+        conv1 = const(X, Y, W, active=active)
+        conv1.fit()
+
+        conv2 = const(X, Y, W, inactive=inactive)
+        conv2.fit()
         
-        conv = const(X, Y, W, inactive=~active, active=active)
-        conv.fit()
+        conv3 = const(X, Y, W, inactive=inactive, active=active)
+        conv3.fit()
         
+        selected_features = np.zeros(p, np.bool)
+        selected_features[:3] = True
+
+        conv3.summary(selected_features,
+                      ndraw=ndraw,
+                      burnin=burnin)
 
-def test_threshold_constructors():
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+def test_threshold_constructors(ndraw=1000, burnin=200):
 
     cls = threshold
     for const_info, rand in product(zip([gaussian_instance,
@@ -74,12 +110,23 @@ def test_threshold_constructors():
         active = np.zeros(p, np.bool)
         active[:int(p/2)] = True
 
-        conv = const(X, Y, W, active=active)
-        conv.fit()
+        inactive = ~active
+        inactive[-int(p/4):] = False
 
-        conv = const(X, Y, W, inactive=~active)
-        conv.fit()
+        conv1 = const(X, Y, W, active=active)
+        conv1.fit()
+
+        conv2 = const(X, Y, W, inactive=inactive)
+        conv2.fit()
         
-        conv = const(X, Y, W, inactive=~active, active=active)
-        conv.fit()
+        conv3 = const(X, Y, W, inactive=inactive, active=active)
+        conv3.fit()
         
+        selected_features = np.zeros(p, np.bool)
+        selected_features[:3] = True
+
+        conv3.summary(selected_features,
+                      ndraw=ndraw,
+                      burnin=burnin)
+
+
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 2ec6e88c8..613975784 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -4,12 +4,12 @@
 
 import regreg.api as rr
 
-from selection.api import (randomization,
-                           glm_group_lasso,
-                           multiple_queries,
-                           glm_target)
+from ...api import (randomization,
+                    glm_group_lasso,
+                    multiple_queries,
+                    glm_target)
 from ...tests.instance import (gaussian_instance,
-                                      logistic_instance)
+                               logistic_instance)
 
 from ..query import naive_confidence_intervals, naive_pvalues
 
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index 74dbbe90c..e8b095b7b 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -5,7 +5,14 @@
 from .M_estimator import restricted_Mest
 
 class threshold_score(query):
-    def __init__(self, loss, threshold, randomization, active, inactive, beta_active=None,
+
+    def __init__(self, 
+                 loss, 
+                 threshold, 
+                 randomization, 
+                 active, 
+                 inactive, 
+                 beta_active=None,
                  solve_args={'min_its': 50, 'tol': 1.e-10}):
         """
         penalty is a group_lasso object that assigns weights to groups
@@ -18,7 +25,6 @@ def __init__(self, loss, threshold, randomization, active, inactive, beta_active
         active_bool = np.zeros(loss.shape, np.bool)
         active_bool[active] = 1
         active = active_bool
-        inactive = ~active
 
         if np.array(threshold).shape in [(), (1,)]:
             threshold = np.ones(inactive.sum()) * threshold
@@ -74,9 +80,6 @@ def solve(self, nboot=2000):
 
         self.boundary = np.fabs(randomized_score) > threshold
 
-        #self.positive_boundary  = (randomized_score > threshold)
-        #self.negative_boundary = (-randomized_score < threshold)
-
         self.interior = ~self.boundary
 
         self.observed_score_state = inactive_score
@@ -85,7 +88,6 @@ def solve(self, nboot=2000):
 
         self._solved = True
 
-        #self.num_opt_var = self.boundary.shape[0]
         self.nboot = nboot
         self.ndim = self.loss.shape[0]
 
@@ -103,18 +105,10 @@ def construct_weights(self, full_state):
         weights[self.boundary] = ((self.randomization._density(threshold[self.boundary] - full_state[self.boundary]) - self.randomization._density(-threshold[self.boundary] - full_state[self.boundary])) /
                                   (1 - self.randomization._cdf(threshold[self.boundary] - full_state[self.boundary]) + self.randomization._cdf(-threshold[self.boundary] - full_state[self.boundary])))
 
-        #weights[self.positive_boundary] = self.randomization._density(threshold[self.positive_boundary] - full_state[self.positive_boundary])  / \
-        #                          (1 - self.randomization._cdf(threshold[self.positive_boundary] - full_state[self.positive_boundary]))
-
-
-        #weights[self.negative_boundary] = - self.randomization._density(-threshold[self.negative_boundary] - full_state[self.negative_boundary]) / \
-        #                                   (self.randomization._cdf(-threshold[self.negative_boundary] - full_state[self.negative_boundary]))
-
 
         weights[~self.boundary] = ((-self.randomization._density(threshold[~self.boundary] - full_state[~self.boundary]) + self.randomization._density(-threshold[~self.boundary] - full_state[~self.boundary])) /
                                    (self.randomization._cdf(threshold[~self.boundary] - full_state[~self.boundary]) - self.randomization._cdf(-threshold[~self.boundary] - full_state[~self.boundary])))
 
-        #return -weights
         return weights ## tested
 
     def setup_sampler(self):

From 59313312d01b287556f80e93161da34bb3344e95 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 13:30:00 -0700
Subject: [PATCH 114/617] poisson instance

---
 selection/tests/instance.py | 76 +++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/selection/tests/instance.py b/selection/tests/instance.py
index f6c56ae5d..a27ebf08a 100644
--- a/selection/tests/instance.py
+++ b/selection/tests/instance.py
@@ -194,6 +194,82 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
     Y = np.random.binomial(1, pi)
     return X, Y, beta, np.nonzero(active)[0]
 
+def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=14,
+                    random_signs=False, 
+                    scale=True, 
+                    center=True, 
+                    equicorrelated=True):
+    """
+    A testing instance for the LASSO.
+    Design is equi-correlated in the population,
+    normalized to have columns of norm 1.
+
+    Parameters
+    ----------
+
+    n : int
+        Sample size
+
+    p : int
+        Number of features
+
+    s : int
+        True sparsity
+
+    rho : float
+        Equicorrelation value (must be in interval [0,1])
+
+    signal : float or (float, float)
+        Sizes for the coefficients. If a tuple -- then coefficients
+        are equally spaced between these values using np.linspace.
+
+    random_signs : bool
+        If true, assign random signs to coefficients.
+        Else they are all positive.
+
+    Returns
+    -------
+
+    X : np.float((n,p))
+        Design matrix.
+
+    y : np.float(n)
+        Response vector.
+
+    beta : np.float(p)
+        True coefficients.
+
+    active : np.int(s)
+        Non-zero pattern.
+
+    """
+
+    X = _design(n, p, rho, equicorrelated)
+
+    if center:
+        X -= X.mean(0)[None,:]
+    if scale:
+        X /= X.std(0)[None,:]
+    X /= np.sqrt(n)
+    beta = np.zeros(p) 
+    signal = np.atleast_1d(signal)
+    if signal.shape == (1,):
+        beta[:s] = signal[0] 
+    else:
+        beta[:s] = np.linspace(signal[0], signal[1], s)
+    if random_signs:
+        beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
+    np.random.shuffle(beta)
+
+    active = np.zeros(p, np.bool)
+    active[beta != 0] = True
+
+    eta = linpred = np.dot(X, beta) 
+    mu = np.exp(eta)
+
+    Y = np.random.poisson(mu)
+    return X, Y, beta, np.nonzero(active)[0]
+
 def HIV_NRTI(drug='3TC', 
              standardize=True, 
              datafile=None,

From 51cca5043e7773f522b6ac011f567017dc8ecadd Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 15:46:25 -0700
Subject: [PATCH 115/617] WIP: working on convenience samplers

---
 selection/randomized/convenience.py           | 148 +++++++++---------
 selection/randomized/glm.py                   |  15 +-
 selection/randomized/greedy_step.py           |  28 ++--
 selection/randomized/query.py                 |   2 +-
 .../randomized/tests/test_convenience.py      |  16 +-
 selection/randomized/threshold_score.py       |  57 +++++--
 6 files changed, 154 insertions(+), 112 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index ec141a823..68f3972b0 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -832,7 +832,7 @@ class step(lasso):
     def __init__(self, 
                  loglike, 
                  feature_weights,
-                 inactive,
+                 candidate,
                  randomizer_scale,
                  active=None,
                  randomizer='gaussian',
@@ -851,7 +851,7 @@ def __init__(self,
             Feature weights for L-1 penalty. If a float,
             it is brodcast to all features.
 
-        inactive : np.bool
+        candidate : np.bool
             Which groups of variables are candidates
             for inclusion in this step.
 
@@ -873,17 +873,17 @@ def __init__(self,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
         """
 
         self.active = active
-        self.inactive = inactive
+        self.candidate = candidate
 
         self.loglike = loglike
         self.nfeature = p = loglike.shape[0]
@@ -894,7 +894,7 @@ def __init__(self,
 
         self.covariance_estimator = covariance_estimator
 
-        nrandom = inactive.sum()
+        nrandom = candidate.sum()
         if randomizer == 'laplace':
             self.randomizer = randomization.laplace((nrandom,), scale=randomizer_scale)
         elif randomizer == 'gaussian':
@@ -931,7 +931,7 @@ def fit(self,
         self._view = glm_greedy_step(self.loglike, 
                                      self.penalty, 
                                      self.active,
-                                     self.inactive,
+                                     self.candidate,
                                      self.randomizer)
         self._view.solve()
 
@@ -947,7 +947,7 @@ def decompose_subgradient(self,
                               marginalizing_groups=None):
         """
 
-        Marginalize over some if inactive part of subgradient
+        Marginalize over some if candidate part of subgradient
         if applicable.
 
         Parameters
@@ -971,7 +971,7 @@ def decompose_subgradient(self,
     def gaussian(X, 
                  Y, 
                  feature_weights, 
-                 inactive=None,
+                 candidate=None,
                  active=None,
                  covariance_estimator=None,
                  randomizer_scale=None,
@@ -994,7 +994,7 @@ def gaussian(X,
             `feature_weights` to 0. If `feature_weights` is 
             a float, then all parameters are penalized equally.
 
-        inactive : np.bool (optional)
+        candidate : np.bool (optional)
             Which groups of variables are candidates
             for inclusion in this step. Defaults to ~active.
 
@@ -1022,11 +1022,11 @@ def gaussian(X,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of some of the
         rows and columns of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
@@ -1036,8 +1036,8 @@ def gaussian(X,
 
         if active is None:
             active = np.zeros(p, np.bool)
-        if inactive is None:
-            inactive = ~active
+        if candidate is None:
+            candidate = ~active
 
         if randomizer_scale is None:
             mean_diag = np.mean((X**2).sum(0))
@@ -1045,7 +1045,7 @@ def gaussian(X,
 
         return step(loglike, 
                     feature_weights,
-                    inactive, 
+                    candidate, 
                     randomizer_scale, 
                     active=active,
                     randomizer=randomizer,
@@ -1056,7 +1056,7 @@ def logistic(X,
                  successes, 
                  feature_weights, 
                  active=None,
-                 inactive=None,
+                 candidate=None,
                  trials=None, 
                  covariance_estimator=None,
                  randomizer_scale=None,
@@ -1081,7 +1081,7 @@ def logistic(X,
             `feature_weights` to 0. If `feature_weights` is 
             a float, then all parameters are penalized equally.
 
-        inactive : np.bool (optional)
+        candidate : np.bool (optional)
             Which groups of variables are candidates
             for inclusion in this step. Defaults to ~active.
 
@@ -1113,10 +1113,10 @@ def logistic(X,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
@@ -1126,8 +1126,8 @@ def logistic(X,
 
         if active is None:
             active = np.zeros(p, np.bool)
-        if inactive is None:
-            inactive = ~active
+        if candidate is None:
+            candidate = ~active
 
         if randomizer_scale is None:
             mean_diag = np.mean((X**2).sum(0))
@@ -1135,7 +1135,7 @@ def logistic(X,
 
         return step(loglike, 
                     feature_weights, 
-                    inactive,
+                    candidate,
                     randomizer_scale,
                     active=active,
                     covariance_estimator=covariance_estimator)
@@ -1145,7 +1145,7 @@ def coxph(X,
               times, 
               status, 
               feature_weights, 
-              inactive=None,
+              candidate=None,
               active=None,
               covariance_estimator=None,
               randomizer_scale=None,
@@ -1173,7 +1173,7 @@ def coxph(X,
             `feature_weights` to 0. If `feature_weights` is 
             a float, then all parameters are penalized equally.
 
-        inactive : np.bool (optional)
+        candidate : np.bool (optional)
             Which groups of variables are candidates
             for inclusion in this step. Defaults to ~active.
 
@@ -1201,10 +1201,10 @@ def coxph(X,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
@@ -1214,15 +1214,15 @@ def coxph(X,
 
         if active is None:
             active = np.zeros(p, np.bool)
-        if inactive is None:
-            inactive = ~active
+        if candidate is None:
+            candidate = ~active
 
         if randomizer_scale is None:
             randomizer_scale = 1. / np.sqrt(n)
 
         return step(loglike, 
                     feature_weights, 
-                    inactive,
+                    candidate,
                     randomizer_scale,
                     active=active,
                     randomizer=randomizer,
@@ -1232,7 +1232,7 @@ def coxph(X,
     def poisson(X, 
                 counts, 
                 feature_weights, 
-                inactive=None,
+                candidate=None,
                 active=None,
                 covariance_estimator=None,
                 randomizer_scale=None,
@@ -1255,7 +1255,7 @@ def poisson(X,
             `feature_weights` to 0. If `feature_weights` is 
             a float, then all parameters are penalized equally.
 
-        inactive : np.bool (optional)
+        candidate : np.bool (optional)
             Which groups of variables are candidates
             for inclusion in this step. Defaults to ~active.
 
@@ -1283,10 +1283,10 @@ def poisson(X,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
@@ -1298,8 +1298,8 @@ def poisson(X,
 
         if active is None:
             active = np.zeros(p, np.bool)
-        if inactive is None:
-            inactive = ~active
+        if candidate is None:
+            candidate = ~active
 
         mean_diag = np.mean((X**2).sum(0))
         if randomizer_scale is None:
@@ -1307,7 +1307,7 @@ def poisson(X,
 
         return step(loglike, 
                     feature_weights, 
-                    inactive,
+                    candidate,
                     randomizer_scale, 
                     active=active,
                     randomizer=randomizer,
@@ -1333,7 +1333,7 @@ class threshold(lasso):
     def __init__(self, 
                  loglike, 
                  threshold_value,
-                 inactive,
+                 candidate,
                  randomizer_scale,
                  active=None,
                  randomizer='gaussian',
@@ -1348,11 +1348,11 @@ def __init__(self,
         loglike : `regreg.smooth.glm.glm`
             A (negative) log-likelihood as implemented in `regreg`.
 
-        threshold_value : np.ndarray
+        threshold_value : [float, sequence]
             Thresholding for each feature. If 1d defaults
             it is treated as a multiple of np.ones.
 
-        inactive : np.bool
+        candidate : np.bool
             Which groups of variables are candidates
             for thresholding.
 
@@ -1374,28 +1374,28 @@ def __init__(self,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
         """
 
         self.active = active
-        self.inactive = inactive
+        self.candidate = candidate
 
         self.loglike = loglike
         self.nfeature = p = self.loglike.shape[0]
 
         if np.asarray(threshold_value).shape == ():
             threshold = np.ones(loglike.shape) * threshold_value
-        self.threshold_value = np.asarray(threshold_value)[self.inactive]
+        self.threshold_value = np.asarray(threshold_value)[self.candidate]
 
         self.covariance_estimator = covariance_estimator
 
-        nrandom = inactive.sum()
+        nrandom = candidate.sum()
         if randomizer == 'laplace':
             self.randomizer = randomization.laplace((nrandom,), scale=randomizer_scale)
         elif randomizer == 'gaussian':
@@ -1430,7 +1430,7 @@ def fit(self,
                                          self.threshold_value,
                                          self.randomizer,
                                          self.active,
-                                         self.inactive)
+                                         self.candidate)
         self._view.solve()
 
         views = copy(views); views.append(self._view)
@@ -1445,7 +1445,7 @@ def decompose_subgradient(self,
                               marginalizing_groups=None):
         """
 
-        Marginalize over some if inactive part of subgradient
+        Marginalize over some if candidate part of subgradient
         if applicable.
 
         Parameters
@@ -1469,7 +1469,7 @@ def decompose_subgradient(self,
     def gaussian(X, 
                  Y, 
                  threshold_value, 
-                 inactive=None,
+                 candidate=None,
                  active=None,
                  covariance_estimator=None,
                  randomizer_scale=None,
@@ -1492,7 +1492,7 @@ def gaussian(X,
             `threshold` to 0. If `threshold` is 
             a float, then all parameters are penalized equally.
 
-        inactive : np.bool (optional)
+        candidate : np.bool (optional)
             Which groups of variables are candidates
             for inclusion in this step. Defaults to ~active.
 
@@ -1520,11 +1520,11 @@ def gaussian(X,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of some of the
         rows and columns of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
@@ -1535,8 +1535,8 @@ def gaussian(X,
 
         if active is None:
             active = np.zeros(p, np.bool)
-        if inactive is None:
-            inactive = ~active
+        if candidate is None:
+            candidate = ~active
 
         if randomizer_scale is None:
             mean_diag = np.mean((X**2).sum(0))
@@ -1544,7 +1544,7 @@ def gaussian(X,
 
         return threshold(loglike, 
                          threshold_value,
-                         inactive, 
+                         candidate, 
                          randomizer_scale, 
                          active=active,
                          randomizer=randomizer,
@@ -1555,7 +1555,7 @@ def logistic(X,
                  successes, 
                  threshold_value, 
                  active=None,
-                 inactive=None,
+                 candidate=None,
                  trials=None, 
                  covariance_estimator=None,
                  randomizer_scale=None,
@@ -1580,7 +1580,7 @@ def logistic(X,
             `threshold` to 0. If `threshold` is 
             a float, then all parameters are penalized equally.
 
-        inactive : np.bool (optional)
+        candidate : np.bool (optional)
             Which groups of variables are candidates
             for inclusion in this step. Defaults to ~active.
 
@@ -1612,10 +1612,10 @@ def logistic(X,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
@@ -1625,8 +1625,8 @@ def logistic(X,
 
         if active is None:
             active = np.zeros(p, np.bool)
-        if inactive is None:
-            inactive = ~active
+        if candidate is None:
+            candidate = ~active
 
         if randomizer_scale is None:
             mean_diag = np.mean((X**2).sum(0))
@@ -1634,7 +1634,7 @@ def logistic(X,
 
         return threshold(loglike, 
                          threshold_value,
-                         inactive,
+                         candidate,
                          randomizer_scale,
                          active=active,
                          covariance_estimator=covariance_estimator)
@@ -1644,7 +1644,7 @@ def coxph(X,
               times, 
               status, 
               threshold_value,
-              inactive=None,
+              candidate=None,
               active=None,
               covariance_estimator=None,
               randomizer_scale=None,
@@ -1672,7 +1672,7 @@ def coxph(X,
             `threshold` to 0. If `threshold` is 
             a float, then all parameters are penalized equally.
 
-        inactive : np.bool (optional)
+        candidate : np.bool (optional)
             Which groups of variables are candidates
             for inclusion in this step. Defaults to ~active.
 
@@ -1700,10 +1700,10 @@ def coxph(X,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
@@ -1713,15 +1713,15 @@ def coxph(X,
 
         if active is None:
             active = np.zeros(p, np.bool)
-        if inactive is None:
-            inactive = ~active
+        if candidate is None:
+            candidate = ~active
 
         if randomizer_scale is None:
             randomizer_scale = 1. / np.sqrt(n)
 
         return threshold(loglike, 
                          threshold_value,
-                         inactive,
+                         candidate,
                          randomizer_scale,
                          active=active,
                          randomizer=randomizer,
@@ -1731,7 +1731,7 @@ def coxph(X,
     def poisson(X, 
                 counts, 
                 threshold_value,
-                inactive=None,
+                candidate=None,
                 active=None,
                 covariance_estimator=None,
                 randomizer_scale=None,
@@ -1754,7 +1754,7 @@ def poisson(X,
             `threshold` to 0. If `threshold` is 
             a float, then all parameters are penalized equally.
 
-        inactive : np.bool (optional)
+        candidate : np.bool (optional)
             Which groups of variables are candidates
             for inclusion in this step. Defaults to ~active.
 
@@ -1782,10 +1782,10 @@ def poisson(X,
         -----
 
         If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
+        take arguments (beta, active, candidate)
         and return an estimate of the covariance of
         $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
+        the unpenalized estimator and the candidate
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
 
@@ -1797,8 +1797,8 @@ def poisson(X,
 
         if active is None:
             active = np.zeros(p, np.bool)
-        if inactive is None:
-            inactive = ~active
+        if candidate is None:
+            candidate = ~active
 
         mean_diag = np.mean((X**2).sum(0))
         if randomizer_scale is None:
@@ -1806,7 +1806,7 @@ def poisson(X,
 
         return threshold(loglike, 
                          threshold_value,
-                         inactive,
+                         candidate,
                          randomizer_scale, 
                          active=active,
                          randomizer=randomizer,
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 5828962f2..b4a59870c 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -123,6 +123,7 @@ def pairs_inactive_score_glm(glm_loss,
                              active, 
                              beta_active, 
                              scaling=1.,
+                             inactive=None,
                              solve_args={'min_its':50, 'tol':1.e-10}):
 
     """
@@ -148,6 +149,10 @@ def pairs_inactive_score_glm(glm_loss,
         are multiplied by sqrt(scaling) inactive ones are divided
         by sqrt(scaling).
 
+    inactive : np.bool (optional)
+        Which coordinates to return. If None, defaults
+        to ~active.
+
     solve_args : dict
         Arguments passed to solver of restricted problem (`restricted_Mest`) if 
         beta_full is None.
@@ -161,7 +166,9 @@ def pairs_inactive_score_glm(glm_loss,
 
     """
 
-    inactive = ~active
+    if inactive is None:
+        inactive = ~active
+
     beta_full = np.zeros(glm_loss.shape)
     beta_full[active] = beta_active
 
@@ -541,7 +548,8 @@ def setup_sampler(self):
         greedy_score_step.setup_sampler(self)
         bootstrap_score = pairs_inactive_score_glm(self.loss, 
                                                    self.active,
-                                                   self.beta_active)
+                                                   self.beta_active,
+                                                   inactive=self.candidate)
         return bootstrap_score
 
 class glm_threshold_score(threshold_score):
@@ -550,7 +558,8 @@ def setup_sampler(self):
         threshold_score.setup_sampler(self)
         bootstrap_score = pairs_inactive_score_glm(self.loss, 
                                                    self.active,
-                                                   self.beta_active)
+                                                   self.beta_active,
+                                                   inactive=self.candidate)
         return bootstrap_score
 
 
diff --git a/selection/randomized/greedy_step.py b/selection/randomized/greedy_step.py
index 1f9909691..e134f3b6c 100644
--- a/selection/randomized/greedy_step.py
+++ b/selection/randomized/greedy_step.py
@@ -10,7 +10,7 @@ def __init__(self,
                  loss, 
                  penalty, 
                  active_groups, 
-                 inactive_groups, 
+                 candidate_groups, 
                  randomization, 
                  solve_args={'min_its':50, 'tol':1.e-10},
                  beta_active=None):
@@ -23,29 +23,29 @@ def __init__(self,
         (self.loss,
          self.penalty,
          self.active_groups,
-         self.inactive_groups,
+         self.candidate_groups,
          self.randomization,
          self.solve_args,
          self.beta_active) = (loss,
                               penalty,
                               active_groups,
-                              inactive_groups,
+                              candidate_groups,
                               randomization,
                               solve_args,
                               beta_active)
          
         self.active = np.zeros(self.loss.shape, np.bool)
-        self.inactive = np.zeros(self.loss.shape, np.bool)
+        self.candidate = np.zeros(self.loss.shape, np.bool)
         for i, g in enumerate(np.unique(self.penalty.groups)):
             if self.active_groups[i]:
                 self.active[self.penalty.groups == g] = True
-            elif self.inactive_groups[i]:
-                self.inactive[self.penalty.groups == g] = True
+            elif self.candidate_groups[i]:
+                self.candidate[self.penalty.groups == g] = True
                 
         # we form a dual group lasso object
         # to compute the max score
 
-        new_groups = penalty.groups[self.inactive]
+        new_groups = penalty.groups[self.candidate]
         new_weights = dict([(g,penalty.weights[g]) for g in penalty.weights.keys() if g in np.unique(new_groups)])
 
         self.group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, lagrange=1.)
@@ -55,13 +55,13 @@ def solve(self, nboot=2000):
         (loss,
          penalty,
          active,
-         inactive,
+         candidate,
          randomization,
          solve_args,
          beta_active) = (self.loss,
                          self.penalty,
                          self.active,
-                         self.inactive,
+                         self.candidate,
                          self.randomization,
                          self.solve_args,
                          self.beta_active)
@@ -74,7 +74,7 @@ def solve(self, nboot=2000):
             
         # score at unpenalized M-estimator
 
-        self.observed_score_state = - self.loss.smooth_objective(beta_full, 'grad')[inactive]
+        self.observed_score_state = - self.loss.smooth_objective(beta_full, 'grad')[candidate]
         self._randomZ = self.randomization.sample()
 
         self.num_opt_var = self._randomZ.shape[0]
@@ -90,7 +90,7 @@ def solve(self, nboot=2000):
         maximizing_subgrad = self.observed_score_state[self.group_lasso_dual.groups == maximizing_group]
         maximizing_subgrad /= np.linalg.norm(maximizing_subgrad) # this is now a unit vector
         maximizing_subgrad *= self.group_lasso_dual.weights[maximizing_group] # now a vector of length given by weight of maximizing group
-        self.maximizing_subgrad = np.zeros(inactive.sum())
+        self.maximizing_subgrad = np.zeros(candidate.sum())
         self.maximizing_subgrad[self.group_lasso_dual.groups == maximizing_group] = maximizing_subgrad
         self.observed_scaling = np.max(terms) / self.group_lasso_dual.weights[maximizing_group]
 
@@ -101,7 +101,7 @@ def solve(self, nboot=2000):
         for g in losing_groups:
             losing_set += self.group_lasso_dual.groups == g
 
-        # (inactive_subgradients, scaling) are in this epigraph:
+        # (candidate_subgradients, scaling) are in this epigraph:
         losing_weights = dict([(g, self.group_lasso_dual.weights[g]) for g in self.group_lasso_dual.weights.keys() if g in losing_groups])
         self.group_lasso_dual_epigraph = rr.group_lasso_dual_epigraph(self.group_lasso_dual.groups[losing_set], weights=losing_weights)
         
@@ -111,7 +111,7 @@ def solve(self, nboot=2000):
         # which variables are added to the model
 
         winning_variables = self.group_lasso_dual.groups == maximizing_group
-        padding_map = np.identity(self.active.shape[0])[:,self.inactive]
+        padding_map = np.identity(self.active.shape[0])[:,self.candidate]
         self.maximizing_variables = padding_map.dot(winning_variables) > 0
         
         self.selection_variable = {'maximizing_group':maximizing_group, 
@@ -127,7 +127,7 @@ def setup_sampler(self):
         self.observed_opt_state = np.hstack([self.observed_subgradients,
                                              self.observed_scaling])
 
-        p = self.inactive.sum() # shorthand
+        p = self.candidate.sum() # shorthand
         _opt_linear_term = np.zeros((p, 1 + self.observed_subgradients.shape[0]))
         _opt_linear_term[:,:self.observed_subgradients.shape[0]] = self.losing_padding_map
         _opt_linear_term[:,-1] = self.maximizing_subgrad
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 27162b4ad..1214c3ef5 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -236,7 +236,7 @@ def setup_sampler(self, form_covariances):
         curr_randomization_length = 0
         self.randomization_slice = []
         for objective in self.objectives:
-            randomization_length = objective.loss.shape[0]
+            randomization_length = objective.randomization.shape[0]
             self.randomization_slice.append(slice(curr_randomization_length,
                                                   curr_randomization_length + randomization_length))
             curr_randomization_length = curr_randomization_length + randomization_length
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index bc280cba2..9bf0b1ffc 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -71,16 +71,16 @@ def test_step_constructors(ndraw=1000, burnin=200):
         active = np.zeros(p, np.bool)
         active[:int(p/2)] = True
 
-        inactive = ~active
-        inactive[-int(p/4):] = False
+        candidate = ~active
+        candidate[-int(p/4):] = False
 
         conv1 = const(X, Y, W, active=active)
         conv1.fit()
 
-        conv2 = const(X, Y, W, inactive=inactive)
+        conv2 = const(X, Y, W, candidate=candidate)
         conv2.fit()
         
-        conv3 = const(X, Y, W, inactive=inactive, active=active)
+        conv3 = const(X, Y, W, candidate=candidate, active=active)
         conv3.fit()
         
         selected_features = np.zeros(p, np.bool)
@@ -110,16 +110,16 @@ def test_threshold_constructors(ndraw=1000, burnin=200):
         active = np.zeros(p, np.bool)
         active[:int(p/2)] = True
 
-        inactive = ~active
-        inactive[-int(p/4):] = False
+        candidate = ~active
+        candidate[-int(p/4):] = False
 
         conv1 = const(X, Y, W, active=active)
         conv1.fit()
 
-        conv2 = const(X, Y, W, inactive=inactive)
+        conv2 = const(X, Y, W, candidate=candidate)
         conv2.fit()
         
-        conv3 = const(X, Y, W, inactive=inactive, active=active)
+        conv3 = const(X, Y, W, candidate=candidate, active=active)
         conv3.fit()
         
         selected_features = np.zeros(p, np.bool)
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index e8b095b7b..cb54898a0 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -6,41 +6,74 @@
 
 class threshold_score(query):
 
+    """
+
+    Randomly threshold the score of a linear 
+    model.
+
+    """
+
     def __init__(self, 
                  loss, 
                  threshold, 
                  randomization, 
                  active, 
-                 inactive, 
+                 candidate, 
                  beta_active=None,
                  solve_args={'min_its': 50, 'tol': 1.e-10}):
         """
-        penalty is a group_lasso object that assigns weights to groups
+
+        Parameters
+        ----------
+
+        loss : regreg.smooth.smooth_atom
+            Loss whose score (gradient) will be thresholded.
+
+        threshold_value : [float, sequence]
+            Thresholding for each feature. If 1d defaults
+            it is treated as a multiple of np.ones.
+
+        randomization : selection.randomized.randomization.randomization
+            Instance of a randomizer.
+
+        active : np.bool
+            Loss is first partially minimized over the active coordinates.
+            May be all zeros.
+
+        candidate : np.bool
+            Candidate coordinates for thresholding.
+        
+        beta_active : np.float (optional)
+            If supplied this is taken as solution 
+            of partial minimization.
+
+        solve_args : dict (optional)
+            Arguments passed in solving the partial minimization.
         """
 
         query.__init__(self, randomization)
 
-        # threshold could be a vector size inactive
+        # threshold could be a vector size candidate
 
         active_bool = np.zeros(loss.shape, np.bool)
         active_bool[active] = 1
         active = active_bool
 
         if np.array(threshold).shape in [(), (1,)]:
-            threshold = np.ones(inactive.sum()) * threshold
+            threshold = np.ones(candidate.sum()) * threshold
 
         self.epsilon = 0.  # for randomized loss
 
         (self.loss,
          self.threshold,
          self.active,
-         self.inactive,
+         self.candidate,
          self.beta_active,
          self.randomization,
          self.solve_args) = (loss,
                              threshold,
                              active,
-                             inactive,
+                             candidate,
                              beta_active,
                              randomization,
                              solve_args)
@@ -50,12 +83,12 @@ def solve(self, nboot=2000):
         (loss,
          threshold,
          active,
-         inactive,
+         candidate,
          beta_active,
          randomization) = (self.loss,
                            self.threshold,
                            self.active,
-                           self.inactive,
+                           self.candidate,
                            self.beta_active,
                            self.randomization)
 
@@ -70,11 +103,11 @@ def solve(self, nboot=2000):
         beta_full[active] = beta_active
         self._beta_full = beta_full
 
-        inactive_score = self.loss.smooth_objective(beta_full, 'grad')[inactive]
-        randomized_score = inactive_score + randomization.sample()
+        candidate_score = self.loss.smooth_objective(beta_full, 'grad')[candidate]
+        randomized_score = candidate_score + randomization.sample()
 
         # find the current active group, i.e.
-        # subset of inactive that pass the threshold
+        # subset of candidate that pass the threshold
 
         # TODO: make this test use group LASSO
 
@@ -82,7 +115,7 @@ def solve(self, nboot=2000):
 
         self.interior = ~self.boundary
 
-        self.observed_score_state = inactive_score
+        self.observed_score_state = candidate_score
 
         self.selection_variable = {'boundary_set': self.boundary}
 

From d24bfeeab12c02fdb2e9c01f6741a7af13cc39f8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 15 Aug 2017 17:07:06 -0700
Subject: [PATCH 116/617] using power method for lipschitz; testing intervals
 are formed

---
 selection/randomized/query.py                  | 11 ++++++-----
 selection/randomized/tests/test_convenience.py |  6 ++++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 1214c3ef5..29a017292 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1,13 +1,14 @@
 from itertools import product
 import numpy as np
+
 from scipy.stats import norm as ndist
 from scipy.optimize import bisect
 
+from regreg.affine import power_L
+
 from ..distributions.api import discrete_family, intervals_from_sample
 from ..sampling.langevin import projected_langevin
 
-
-
 class query(object):
 
     def __init__(self, randomization):
@@ -760,10 +761,10 @@ def crude_lipschitz(self):
         lipschitz : float
 
         """
-        lipschitz = np.linalg.svd(self.target_inv_cov)[1].max()
+        lipschitz = power_L(self.target_inv_cov)
         for transform, objective in zip(self.target_transform, self.objectives):
-            lipschitz += np.linalg.svd(transform[0])[1].max()**2 * objective.randomization.lipschitz
-            lipschitz += np.linalg.svd(objective.score_transform[0])[1].max()**2 * objective.randomization.lipschitz
+            lipschitz += power_L(transform[0])**2 * objective.randomization.lipschitz
+            lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
         return lipschitz
 
 
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index 9bf0b1ffc..e2129f70d 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -40,7 +40,8 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
 
         conv.summary(selected_features,
                      ndraw=ndraw,
-                     burnin=burnin)
+                     burnin=burnin,
+                     compute_intervals=True)
 
         conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
                                    conditioning_groups=conditioning_groups)
@@ -88,7 +89,8 @@ def test_step_constructors(ndraw=1000, burnin=200):
 
         conv3.summary(selected_features,
                       ndraw=ndraw,
-                      burnin=burnin)
+                      burnin=burnin,
+                      compute_intervals=True)
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_threshold_constructors(ndraw=1000, burnin=200):

From 1d9e49909c5fd944d03ccc677bb2d37eb8c98e8b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 16 Aug 2017 14:06:43 -0700
Subject: [PATCH 117/617] BF: forgot to also modify stepsize in langevin

---
 selection/randomized/tests/test_convenience.py |  2 ++
 selection/sampling/langevin.py                 |  1 +
 selection/tests/instance.py                    | 10 +++++-----
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index e2129f70d..95b736ba2 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -43,6 +43,8 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
                      burnin=burnin,
                      compute_intervals=True)
 
+        print(`const_info` + ' OK')
+
         conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
                                    conditioning_groups=conditioning_groups)
 
diff --git a/selection/sampling/langevin.py b/selection/sampling/langevin.py
index 67a623b56..a5281652c 100644
--- a/selection/sampling/langevin.py
+++ b/selection/sampling/langevin.py
@@ -39,6 +39,7 @@ def next(self):
             if not np.all(np.isfinite(self.gradient_map(candidate))):
                 nattempt += 1
                 self._sqrt_step *= 0.8
+                self.stepsize = self._sqrt_step**2
                 if nattempt >= 10:
                     raise ValueError('unable to find feasible step')
             else:
diff --git a/selection/tests/instance.py b/selection/tests/instance.py
index a27ebf08a..34487d697 100644
--- a/selection/tests/instance.py
+++ b/selection/tests/instance.py
@@ -194,11 +194,11 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
     Y = np.random.binomial(1, pi)
     return X, Y, beta, np.nonzero(active)[0]
 
-def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=14,
-                    random_signs=False, 
-                    scale=True, 
-                    center=True, 
-                    equicorrelated=True):
+def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4,
+                     random_signs=False, 
+                     scale=True, 
+                     center=True, 
+                     equicorrelated=True):
     """
     A testing instance for the LASSO.
     Design is equi-correlated in the population,

From 439414ea634f78e1788e4db5ce948bf8b68c1bee Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 16 Aug 2017 14:27:01 -0700
Subject: [PATCH 118/617] moving CV to algorithms except cv_view

---
 selection/{randomized => algorithms}/cv.py        | 15 ++++-----------
 selection/{randomized => algorithms}/cv_glmnet.py |  8 +++-----
 selection/randomized/cv_view.py                   |  5 +++--
 selection/randomized/tests/test_cv.py             |  2 +-
 .../test_cv_corrected_nonrandomized_lasso.py      | 13 ++++++++-----
 5 files changed, 19 insertions(+), 24 deletions(-)
 rename selection/{randomized => algorithms}/cv.py (95%)
 rename selection/{randomized => algorithms}/cv_glmnet.py (94%)

diff --git a/selection/randomized/cv.py b/selection/algorithms/cv.py
similarity index 95%
rename from selection/randomized/cv.py
rename to selection/algorithms/cv.py
index b3c85d198..7adc2217a 100644
--- a/selection/randomized/cv.py
+++ b/selection/algorithms/cv.py
@@ -1,9 +1,8 @@
-import functools
+import functools, copy
 import numpy as np
 import regreg.api as rr
-import copy
-from selection.randomized.M_estimator import restricted_Mest
-from selection.api import randomization
+
+from ..randomized.randomization import randomization
 
 class CV(object):
 
@@ -67,11 +66,6 @@ def CV_err(self,
                 problem = rr.simple_problem(loss_train, penalty)
             beta_train = problem.solve(**solve_args)
 
-            #active = beta_train!=0
-            #_beta_unpenalized = restricted_Mest(loss_train, active, solve_args=solve_args)
-            #beta_full = np.zeros(p)
-            #beta_full[active] = _beta_unpenalized
-
             _mu = lambda X, beta: loss_test.saturated_loss.mean_function(X.dot(beta))
             resid = y_test - _mu(X_test, beta_train)
             cur = (resid**2).sum() / n_test
@@ -89,7 +83,6 @@ def CV_err(self,
             SD_CV_randomized = np.sqrt((CV_err_squared_randomized - (CV_err_randomized**2/self.K)) / (self.K-1))
             return CV_err, SD_CV, CV_err_randomized, SD_CV_randomized
         else:
-            #print(CV_err, SD_CV)
             return CV_err, SD_CV
 
 
@@ -204,7 +197,7 @@ def _CV1_boot(indices):
 
         return _CVR_boot, _CV1_boot
 
-if __name__ == '__main__':
+def main():
     from selection.tests.instance import gaussian_instance
     np.random.seed(1)
     n, p = 3000, 1000
diff --git a/selection/randomized/cv_glmnet.py b/selection/algorithms/cv_glmnet.py
similarity index 94%
rename from selection/randomized/cv_glmnet.py
rename to selection/algorithms/cv_glmnet.py
index 7d961f678..fa6803dba 100644
--- a/selection/randomized/cv_glmnet.py
+++ b/selection/algorithms/cv_glmnet.py
@@ -7,10 +7,8 @@
 
 import warnings
 import numpy as np
-import regreg.api as rr
 
-from ..tests.instance import gaussian_instance
-from .randomization import randomization
+from ..randomized.randomization import randomization
 
 try:
     from rpy2.robjects.packages import importr
@@ -20,7 +18,7 @@
     importr('glmnet')
     have_glmnet = True
 except ImportError:
-    warnings.warn('rpy2 seems not to be installed -- CV_glmnet class will not work')
+    warnings.warn('rpy2 and / or glmnet seem not to be installed -- CV_glmnet class will not work')
     have_glmnet = False
     pass
 
@@ -106,7 +104,7 @@ def choose_lambda_CVR(self,  scale1 = None, scale2=None, loss=None):
             rv2 = np.asarray(randomization2._sampler(size=(1,)))
         CVR = CV_err+rv1.flatten()+rv2.flatten()
         lam_CVR = self.lam_seq[np.argmin(CVR)] # lam_CVR minimizes CVR
-        #print("randomized index:", list(self.lam_seq).index(lam_CVR))
+
         CV1 = CV_err+rv1.flatten()
         return  lam_CVR, SD, CVR, CV1, self.lam_seq
 
diff --git a/selection/randomized/cv_view.py b/selection/randomized/cv_view.py
index bbdcd2ea6..52d3b28fb 100644
--- a/selection/randomized/cv_view.py
+++ b/selection/randomized/cv_view.py
@@ -2,9 +2,10 @@
 import numpy as np
 import regreg.api as rr
 
+from ..algorithms.cv import CV
+from ..algorithms.cv_glmnet import CV_glmnet, have_glmnet
+
 from .query import query
-from .cv import CV
-from .cv_glmnet import CV_glmnet, have_glmnet
 from .glm import bootstrap_cov
 from .randomization import randomization
 
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 613975784..11369632c 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -13,7 +13,7 @@
 
 from ..query import naive_confidence_intervals, naive_pvalues
 
-import ...tests.reports as reports
+import selection.tests.reports as reports
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.decorators import (wait_for_return_value, 
                                  set_seed_iftrue, 
diff --git a/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py b/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
index dfe1c5ec1..882173254 100644
--- a/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
+++ b/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
@@ -25,7 +25,7 @@
 def test_cv_corrected_nonrandomized_lasso(n=300,
                                           p=100,
                                           s=3,
-                                          signal=3.5,
+                                          signal=7.5,
                                           rho=0.,
                                           sigma=1.,
                                           K=5,
@@ -33,7 +33,8 @@ def test_cv_corrected_nonrandomized_lasso(n=300,
                                           X=None,
                                           check_screen=True,
                                           glmnet=True,
-                                          intervals=False):
+                                          intervals=False,
+                                          nsample=2): # number of bootstrap samples
 
     print (n, p, s, rho)
     if X is not None:
@@ -87,14 +88,14 @@ def coef_boot(indices):
         return selected_boot(indices)[:active.sum()]
 
     if (check_screen==False) or (set(truth).issubset(np.nonzero(active)[0])):
-
+        print('blah')
         active_set = np.nonzero(active)[0]
         true_vec = beta[active]
         one_step = L.onestep_estimator
 
         cov_est = glm_nonparametric_bootstrap(n, n)
         # compute covariance of selected parameters with CV error curve
-        cov = cov_est(coef_boot, cross_terms=[CV_boot], nsample=500)
+        cov = cov_est(coef_boot, cross_terms=[CV_boot], nsample=nsample)
 
         # residual is fixed
         # covariance of L.constraints is more accurate than cov[0]
@@ -115,6 +116,8 @@ def coef_boot(indices):
         B = B[keep]
         C = B.dot(A)
 
+        print('huh')
+
         CV_constraints = constraints(C, -B.dot(residual))
 
         full_constraints = stack(CV_constraints, L.constraints)
@@ -217,7 +220,7 @@ def report(niter=100, design="random", **kwargs):
     fig.savefig('cv_corrected_nonrandomized_lasso_pivots.pdf')
 
 
-if __name__ == '__main__':
+def main():
     np.random.seed(500)
     kwargs = {'s': 0, 'n': 500, 'p': 100, 'signal': 3.5, 'sigma': 1, 'rho': 0., 'intervals':False}
     report(niter=1, **kwargs)

From 2a95b2d01ed7e0a0c7468a99634c3110ea092aaf Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 16 Aug 2017 14:40:32 -0700
Subject: [PATCH 119/617] BF: fixed some imports

---
 selection/randomized/tests/test_condition.py        | 10 ++++------
 selection/randomized/tests/test_greedy_step.py      |  2 +-
 selection/randomized/tests/test_multiple_queries.py |  3 +--
 selection/randomized/tests/test_threshold_score.py  |  2 +-
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/selection/randomized/tests/test_condition.py b/selection/randomized/tests/test_condition.py
index b157dddc4..5c5bfe496 100644
--- a/selection/randomized/tests/test_condition.py
+++ b/selection/randomized/tests/test_condition.py
@@ -36,7 +36,7 @@ def test_condition(s=0,
                    lam_frac = 1.4,
                    ndraw=10000, burnin=2000,
                    loss='logistic',
-                   nviews=1,
+                   nviews=4,
                    scalings=True):
 
     if loss=="gaussian":
@@ -48,13 +48,12 @@ def test_condition(s=0,
         loss = rr.glm.logistic(X, y)
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
 
-    #randomizer = randomization.isotropic_gaussian((p,), scale=sigma)
     randomizer = randomization.laplace((p,), scale=0.6)
 
     epsilon = 1. / np.sqrt(n)
 
     W = np.ones(p)*lam
-    #W[0] = 0 # use at least some unpenalized
+    W[0] = 0 # use at least some unpenalized
     penalty = rr.group_lasso(np.arange(p),
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
@@ -79,7 +78,7 @@ def test_condition(s=0,
             return None
 
         if scalings: # try condition on some scalings
-            for i in range(nviews):
+            for i in range(int(nviews)/2):
                 conditioning_groups = np.zeros(p, bool)
                 conditioning_groups[:int(p/2)] = True
                 marginalizing_groups = np.ones(p, bool)
@@ -96,8 +95,7 @@ def test_condition(s=0,
         target_sampler, target_observed = glm_target(loss,
                                                      active_union,
                                                      queries)
-                                                     #reference= beta[active_union])
-        #print(target_sampler.target_cov)
+
         test_stat = lambda x: np.linalg.norm(x - beta[active_union])
         observed_test_value = test_stat(target_observed)
 
diff --git a/selection/randomized/tests/test_greedy_step.py b/selection/randomized/tests/test_greedy_step.py
index b19d61b3b..fc40a8677 100644
--- a/selection/randomized/tests/test_greedy_step.py
+++ b/selection/randomized/tests/test_greedy_step.py
@@ -13,7 +13,7 @@
                                         set_sampling_params_iftrue, 
                                         register_report)
 from ...tests.instance import logistic_instance
-import ...tests.reports as reports
+import selection.tests.reports as reports
 
 from ..api import (randomization, 
                    multiple_queries, 
diff --git a/selection/randomized/tests/test_multiple_queries.py b/selection/randomized/tests/test_multiple_queries.py
index adc0677cf..27d17fdec 100644
--- a/selection/randomized/tests/test_multiple_queries.py
+++ b/selection/randomized/tests/test_multiple_queries.py
@@ -3,14 +3,13 @@
 import pandas as pd
 import regreg.api as rr
 
-import ...tests.reports as reports
 from ...tests.flags import SET_SEED, SMALL_SAMPLES
 from ...tests.instance import logistic_instance
 from ...tests.decorators import (wait_for_return_value, 
                                  set_seed_iftrue, 
                                  set_sampling_params_iftrue,
                                  register_report)
-import ...tests.reports as reports
+import selection.tests.reports as reports
 
 from ...api import (randomization, 
                     glm_group_lasso, 
diff --git a/selection/randomized/tests/test_threshold_score.py b/selection/randomized/tests/test_threshold_score.py
index c7e6f742b..022ad18c8 100644
--- a/selection/randomized/tests/test_threshold_score.py
+++ b/selection/randomized/tests/test_threshold_score.py
@@ -7,7 +7,7 @@
                                         set_seed_iftrue, 
                                         set_sampling_params_iftrue,
                                         register_report)
-import ...tests.reports as reports
+import selection.tests.reports as reports
 from ...tests.flags import SET_SEED, SMALL_SAMPLES
 from ...tests.instance import logistic_instance
 

From 6881db2dd4c31335ace820f945fa11ce93d6a669 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 16 Aug 2017 14:42:07 -0700
Subject: [PATCH 120/617] BF: name of variable in travis script

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 129539f65..1a1cc5f23 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,10 +19,10 @@ matrix:
   include:
     - python: 3.5
       env:
-        - R_TESTS=1
+        - RUN_R_TESTS=1
     - python: 2.7
       env:
-        - R_TESTS=1
+        - RUN_R_TESTS=1
 before_install:
   - source travis-tools/utils.sh
   - travis_before_install

From 1e385f92fd0837b19d237bdadc634443f90f17e5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 20 Aug 2017 16:29:23 -0700
Subject: [PATCH 121/617] checking R coord descent QP solver

---
 selection/algorithms/tests/test_compareR.py | 49 +++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 6adca2484..fe8a50db0 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -1,6 +1,7 @@
 from __future__ import print_function
 
 import numpy as np
+import regreg.api as rr
 import nose.tools as nt
 
 try:
@@ -287,3 +288,51 @@ def test_logistic():
     yield np.testing.assert_allclose, L.summary('onesided')['pval'][1:], R_pvals, tol, tol, False, 'logistic pvalues'
 
 
+
+@np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
+def test_solve_QP(): # check the R coordinate descent LASSO solver
+
+    n, p = 100, 200
+    lam = 10
+    np.random.seed(0)
+
+    X = np.random.standard_normal((n, p))
+    Y = np.random.standard_normal(n)
+
+    loss = rr.squared_error(X, Y)
+    pen = rr.l1norm(p, lagrange=lam)
+    problem = rr.simple_problem(loss, pen)
+    soln = problem.solve(min_its=500, tol=1.e-12)
+
+    import rpy2.robjects.numpy2ri
+    rpy2.robjects.numpy2ri.activate()
+
+    tol = 1.e-5
+    rpy.r.assign('X', X)
+    rpy.r.assign('Y', Y)
+    rpy.r.assign('lam', lam)
+    
+    R_code = """
+
+    library(selectiveInference)
+    p = ncol(X)
+    soln_R = rep(0, p)
+    grad = -t(X) %*% Y
+    ever_active = c(1, rep(0, p-1))
+    nactive = as.integer(1)
+    kkt_tol = 1.e-12
+    objective_tol = 1.e-12
+    maxiter = 500
+    soln_R = selectiveInference:::solve_QP(t(X) %*% X, lam, maxiter, soln_R, -t(X) %*% Y, grad, ever_active, nactive, kkt_tol, objective_tol)$soln
+
+    """ 
+
+    rpy.r(R_code)
+
+    soln_R = np.asarray(rpy.r('soln_R'))
+
+    rpy2.robjects.numpy2ri.deactivate()
+
+    yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
+
+

From 8c300e820d9a1028d48843d8c6fc582d740f7dd4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 24 Aug 2017 17:04:50 -0700
Subject: [PATCH 122/617] cosmetic edit

---
 selection/randomized/query.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 29a017292..7ea29c5ab 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -451,6 +451,7 @@ def __init__(self,
         self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
 
         # added for the reconstruction map in case we marginalize over optimization variables
+
         randomization_length_total = 0
         self.randomization_slice = []
         for i in range(self.nqueries):
@@ -462,7 +463,7 @@ def __init__(self,
 
     def set_reference(self, reference):
         self._reference = np.atleast_1d(reference)
-        self._reference_inv = self.target_inv_cov.dot(self.reference)
+        self._reference_inv = self.target_inv_cov.dot(self.reference).flatten()
 
     def get_reference(self):
         return self._reference
@@ -514,7 +515,7 @@ def gradient(self, state):
             target_grad += target_grad_curr.copy()
 
         target_grad = - target_grad
-        target_grad += self._reference_inv.flatten() - self.target_inv_cov.dot(target_state)
+        target_grad += self._reference_inv - self.target_inv_cov.dot(target_state)
         full_grad[self.target_slice] = target_grad
         full_grad[self.overall_opt_slice] = -opt_grad
 

From 44290d666c21b5fc48f7778773ac41987a856593 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 24 Aug 2017 22:12:02 -0700
Subject: [PATCH 123/617] added some new methods to not sample the data

---
 selection/randomized/query.py                 | 116 ++++++++++++++++++
 .../randomized/tests/test_convenience.py      |  14 ++-
 2 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 7ea29c5ab..df8d33c73 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -61,6 +61,39 @@ def randomization_gradient(self, data_state, data_transform, opt_state):
             opt_grad = None
         return data_grad, opt_grad #- self.grad_log_jacobian(opt_state)
 
+    def randomization_gradient_opt(self, data_state, data_transform, opt_state):
+        """
+        Randomization derivative at full state.
+        """
+
+        # reconstruction of randoimzation omega
+
+        opt_linear, opt_offset = self.opt_transform
+        data_linear, data_offset = data_transform
+
+        data_piece = data_offset
+
+        # value of the randomization omega
+
+        if opt_linear is not None: # this can happen if we marginalize all of omega!
+            opt_piece = opt_linear.dot(opt_state) + opt_offset
+            full_state = (data_piece + opt_piece)
+        else:
+            full_state = data_piece
+
+        # gradient of negative log density of randomization at omega
+        # we may have marginalized over some optimization variables here
+
+        randomization_derivative = self.construct_weights(full_state)
+
+        # chain rule for data, optimization parts
+
+        if opt_linear is not None:
+            opt_grad = opt_linear.T.dot(randomization_derivative)
+        else:
+            opt_grad = None
+        return None, opt_grad 
+
     def construct_weights(self, full_state):
         return self.randomization.gradient(full_state)
 
@@ -491,6 +524,27 @@ def projection(self, state):
         state[self.overall_opt_slice] = new_opt_state
         return state
 
+    def projection_opt(self, state):
+        '''
+        Projection map of projected Langevin sampler.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Typically, the projection will only act on
+           `opt_vars`.
+        Returns
+        -------
+        projected_state : np.float
+        '''
+
+        opt_state = state[self.overall_opt_slice]
+        new_opt_state = np.zeros_like(opt_state)
+        for i in range(self.nqueries):
+            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
+        state[self.overall_opt_slice] = new_opt_state
+        return state[self.overall_opt_slice]
+
     def gradient(self, state):
         '''
         Gradient of log-density at current state.
@@ -521,12 +575,34 @@ def gradient(self, state):
 
         return full_grad
 
+    def gradient_opt(self, state):
+        """
+        Gradient only w.r.t. opt variables
+        """
+
+        target_state, opt_state = state[self.target_slice], state[self.overall_opt_slice]
+        target_grad, opt_grad = np.zeros_like(target_state), np.zeros_like(opt_state)
+        full_grad = np.zeros_like(state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+            target_grad_curr, opt_grad[self.opt_slice[i]] = \
+                self.objectives[i].randomization_gradient_opt(target_state, self.target_transform[i], opt_state[self.opt_slice[i]])
+
+        full_grad[self.target_slice] = 0
+        full_grad[self.overall_opt_slice] = -opt_grad
+
+        return full_grad[self.overall_opt_slice]
+
+
     def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
         '''
         Sample `target` from selective density
         using projected Langevin sampler with
         gradient map `self.gradient` and
         projection map `self.projection`.
+
         Parameters
         ----------
         ndraw : int
@@ -566,6 +642,46 @@ def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
                 samples.append(target_langevin.state[keep_slice].copy())
         return np.asarray(samples)
 
+    def sample_opt(self, ndraw, burnin, stepsize=None):
+        '''
+        Sample optimization variables 
+        using projected Langevin sampler 
+        keeping the data fixed.
+
+        Parameters
+        ----------
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        keep_opt : bool
+           Should we return optimization variables
+           as well as the target?
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if stepsize is None:
+            stepsize = 1. / self.crude_lipschitz() # should be lipschitz of randomization
+
+        target_langevin = projected_langevin(self.observed_state.copy()[self.overall_opt_slice],
+                                             self.gradient_opt,
+                                             self.projection_opt,
+                                             stepsize)
+
+        samples = []
+
+        for i in range(ndraw + burnin):
+            target_langevin.next()
+            if (i >= burnin):
+                samples.append(target_langevin.state.copy())
+        return np.asarray(samples)
+
     def hypothesis_test(self,
                         test_stat,
                         observed_value,
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index 95b736ba2..ae08e7608 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -3,6 +3,7 @@
 import nose.tools as nt
 
 from ..convenience import lasso, step, threshold
+from ..glm import target as glm_target
 from ...tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
@@ -25,7 +26,7 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
         X, Y = inst()[:2]
         n, p = X.shape
 
-        W = np.ones(X.shape[1])
+        W = np.ones(X.shape[1]) * 20
         conv = const(X, Y, W, randomizer=rand)
         signs = conv.fit()
 
@@ -43,8 +44,6 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
                      burnin=burnin,
                      compute_intervals=True)
 
-        print(`const_info` + ' OK')
-
         conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
                                    conditioning_groups=conditioning_groups)
 
@@ -52,6 +51,15 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
                      ndraw=ndraw,
                      burnin=burnin)
 
+        target_sampler, target_observed = glm_target(conv.loglike,
+                                                     selected_features,
+                                                     conv._queries,
+                                                     bootstrap=False)
+
+        S = target_sampler.sample_opt(ndraw,
+                                      burnin)
+
+
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_step_constructors(ndraw=1000, burnin=200):
 

From e3a3e9c2fbff7afa7a42e250edc7c3cfed8fbc0c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 24 Aug 2017 22:35:02 -0700
Subject: [PATCH 124/617] BF: variable should be an int

---
 selection/algorithms/lasso.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index b2b883e8e..a80ea0403 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -806,6 +806,7 @@ def summary(self, alternative='twosided', alpha=0.05, UMAU=False,
                                                              'upper_trunc',
                                                              'sd'], 
                                                             np.array(result).T)]))
+        df['variable'] = df['variable'].astype(int)
         return df
 
 

From 6ea3a9a43b6c26f3b2fe07311bd2a49b199ea307 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 25 Aug 2017 11:27:33 -0700
Subject: [PATCH 125/617] new class for optimization sampler

---
 selection/randomized/M_estimator.py           |   7 +-
 selection/randomized/convenience.py           |   2 +
 selection/randomized/query.py                 | 729 +++++++++++++++---
 .../tests/test_optimization_sampler.py        |  55 ++
 4 files changed, 671 insertions(+), 122 deletions(-)
 create mode 100644 selection/randomized/tests/test_optimization_sampler.py

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 086fcb117..1616572be 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -258,7 +258,6 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         new_groups = penalty.groups[inactive]
         new_weights = dict([(g, penalty.weights[g] / _sqrt_scaling) for g in penalty.weights.keys() if g in np.unique(new_groups)])
 
-
         # we form a dual group lasso object
         # to do the projection
 
@@ -422,7 +421,9 @@ def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
         for _i, _s in zip(inactive_moving_idx, subgrad_idx):
             new_linear[_i, _s] = 1.
 
-        observed_opt_state = self.observed_opt_state[:(self._active_groups.sum()+self._unpenalized_groups.sum()+moving_inactive_variables.sum())]
+        observed_opt_state = self.observed_opt_state[:(self._active_groups.sum() +
+                                                       self._unpenalized_groups.sum() +
+                                                       moving_inactive_variables.sum())]
         observed_opt_state[subgrad_slice] = self.initial_subgrad[moving_inactive_variables]
 
         self.observed_opt_state = observed_opt_state
@@ -438,7 +439,6 @@ def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
 
         new_offset = condition_linear[:,subgrad_condition_slice].dot(self.initial_subgrad[condition_inactive_variables]) + opt_offset
 
-
         self.opt_transform = (new_linear, new_offset)
 
         # for group LASSO this should not induce a bigger jacobian as
@@ -452,7 +452,6 @@ def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
         #self.subgrad_slice = np.zeros(new_linear.shape[1], np.bool)
         self.num_opt_var = new_linear.shape[1]
 
-
     def condition_on_scalings(self):
         """
         Maybe we should allow subgradients of only some variables...
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 68f3972b0..f4445855a 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -162,6 +162,8 @@ def decompose_subgradient(self,
         self._view.decompose_subgradient(conditioning_groups=conditioning_groups,
                                          marginalizing_groups=marginalizing_groups)
 
+        self._queries.setup_opt_state()
+
     def summary(self, selected_features, 
                 null_value=None,
                 level=0.9,
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index df8d33c73..7eb5af32f 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -32,12 +32,15 @@ def randomization_gradient(self, data_state, data_transform, opt_state):
         Randomization derivative at full state.
         """
 
-        # reconstruction of randoimzation omega
+        # reconstruction of randomization omega
 
         opt_linear, opt_offset = self.opt_transform
-        data_linear, data_offset = data_transform
 
-        data_piece = data_linear.dot(data_state) + data_offset
+        data_linear, data_offset = data_transform
+        if data_linear is not None:
+            data_piece = data_linear.dot(data_state) + data_offset
+        else: # this can be none if we are not moving a target
+            data_piece = data_offset
 
         # value of the randomization omega
 
@@ -54,45 +57,16 @@ def randomization_gradient(self, data_state, data_transform, opt_state):
 
         # chain rule for data, optimization parts
 
-        data_grad = data_linear.T.dot(randomization_derivative)
-        if opt_linear is not None:
-            opt_grad = opt_linear.T.dot(randomization_derivative)
-        else:
-            opt_grad = None
-        return data_grad, opt_grad #- self.grad_log_jacobian(opt_state)
-
-    def randomization_gradient_opt(self, data_state, data_transform, opt_state):
-        """
-        Randomization derivative at full state.
-        """
-
-        # reconstruction of randoimzation omega
-
-        opt_linear, opt_offset = self.opt_transform
-        data_linear, data_offset = data_transform
-
-        data_piece = data_offset
-
-        # value of the randomization omega
-
-        if opt_linear is not None: # this can happen if we marginalize all of omega!
-            opt_piece = opt_linear.dot(opt_state) + opt_offset
-            full_state = (data_piece + opt_piece)
+        if data_linear is not None:
+            data_grad = data_linear.T.dot(randomization_derivative)
         else:
-            full_state = data_piece
-
-        # gradient of negative log density of randomization at omega
-        # we may have marginalized over some optimization variables here
-
-        randomization_derivative = self.construct_weights(full_state)
-
-        # chain rule for data, optimization parts
+            data_grad = None
 
         if opt_linear is not None:
             opt_grad = opt_linear.T.dot(randomization_derivative)
         else:
             opt_grad = None
-        return None, opt_grad 
+        return data_grad, opt_grad #- self.grad_log_jacobian(opt_state)
 
     def construct_weights(self, full_state):
         return self.randomization.gradient(full_state)
@@ -300,25 +274,32 @@ def setup_target(self,
         ----------
         target_info : object
            Passed as first argument to `self.form_covariances`.
+
         observed_target_state : np.float
            Observed value of the target estimator.
+
         reference : np.float (optional)
            Reference parameter for Gaussian approximation
            of target.
+
         target_set : sequence (optional)
            Which coordinates of target are really
            of interest. If not None, then coordinates
            not in target_set are assumed to have 0
            mean in the sampler.
+
         Notes
         -----
+
         The variable `target_set` can be used for
         a selected model test when some functionals
         are assumed to have 0 mean in the limiting
         Gaussian approximation. This can
         sometimes mean an increase in power.
+
         Returns
         -------
+
         target : targeted_sampler
             An instance of `targeted_sampler` that
             can be used to sample, test hypotheses,
@@ -371,26 +352,33 @@ def __init__(self,
         '''
         Parameters
         ----------
+
         multi_view : `multiple_queries`
            Instance of `multiple_queries`. Attributes
            `objectives`, `score_info` are key
            attributed. (Should maybe change constructor
            to reflect only what is needed.)
+
         target_info : object
            Passed as first argument to `self.form_covariances`.
+
         observed_target_state : np.float
            Observed value of the target estimator.
+
         form_covariances : callable
            Used in linear decomposition of each score
            and the target.
+
         reference : np.float (optional)
            Reference parameter for Gaussian approximation
            of target.
+
         target_set : sequence (optional)
            Which coordinates of target are really
            of interest. If not None, then coordinates
            not in target_set are assumed to have 0
            mean in the sampler.
+
         parametric : bool
            Use parametric covariance estimate?
 
@@ -433,6 +421,7 @@ def __init__(self,
         self.randomization_slice = multi_view.randomization_slice
 
         self.score_cov = []
+        target_cov_sum = 0
         for i in range(self.nqueries):
             if parametric == False:
                 target_cov, cross_cov = multi_view.form_covariances(target_info,  
@@ -442,9 +431,11 @@ def __init__(self,
                 target_cov, cross_cov = multi_view.form_covariances(target_info, 
                                                                     cross_terms=[multi_view.score_info[i]])
 
-            self.target_cov = target_cov
+            target_cov_sum += target_cov
             self.score_cov.append(cross_cov)
 
+        self.target_cov = target_cov_sum / self.nqueries
+
         # XXX we're not really using this target_set in our tests
 
         # zero out some coordinates of target_cov
@@ -463,9 +454,12 @@ def __init__(self,
                 self.objectives[i].linear_decomposition(self.score_cov[i],
                                                         self.target_cov,
                                                         self.observed_target_state))
+
         self.target_cov = np.atleast_2d(self.target_cov)
         self.target_inv_cov = np.linalg.inv(self.target_cov)
+
         # size of reference? should it only be target_set?
+
         if reference is None:
             reference = np.zeros(self.target_inv_cov.shape[0])
         self.reference = reference
@@ -524,27 +518,6 @@ def projection(self, state):
         state[self.overall_opt_slice] = new_opt_state
         return state
 
-    def projection_opt(self, state):
-        '''
-        Projection map of projected Langevin sampler.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Typically, the projection will only act on
-           `opt_vars`.
-        Returns
-        -------
-        projected_state : np.float
-        '''
-
-        opt_state = state[self.overall_opt_slice]
-        new_opt_state = np.zeros_like(opt_state)
-        for i in range(self.nqueries):
-            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
-        state[self.overall_opt_slice] = new_opt_state
-        return state[self.overall_opt_slice]
-
     def gradient(self, state):
         '''
         Gradient of log-density at current state.
@@ -575,26 +548,6 @@ def gradient(self, state):
 
         return full_grad
 
-    def gradient_opt(self, state):
-        """
-        Gradient only w.r.t. opt variables
-        """
-
-        target_state, opt_state = state[self.target_slice], state[self.overall_opt_slice]
-        target_grad, opt_grad = np.zeros_like(target_state), np.zeros_like(opt_state)
-        full_grad = np.zeros_like(state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        for i in range(self.nqueries):
-            target_grad_curr, opt_grad[self.opt_slice[i]] = \
-                self.objectives[i].randomization_gradient_opt(target_state, self.target_transform[i], opt_state[self.opt_slice[i]])
-
-        full_grad[self.target_slice] = 0
-        full_grad[self.overall_opt_slice] = -opt_grad
-
-        return full_grad[self.overall_opt_slice]
-
 
     def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
         '''
@@ -642,46 +595,6 @@ def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
                 samples.append(target_langevin.state[keep_slice].copy())
         return np.asarray(samples)
 
-    def sample_opt(self, ndraw, burnin, stepsize=None):
-        '''
-        Sample optimization variables 
-        using projected Langevin sampler 
-        keeping the data fixed.
-
-        Parameters
-        ----------
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        keep_opt : bool
-           Should we return optimization variables
-           as well as the target?
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if stepsize is None:
-            stepsize = 1. / self.crude_lipschitz() # should be lipschitz of randomization
-
-        target_langevin = projected_langevin(self.observed_state.copy()[self.overall_opt_slice],
-                                             self.gradient_opt,
-                                             self.projection_opt,
-                                             stepsize)
-
-        samples = []
-
-        for i in range(ndraw + burnin):
-            target_langevin.next()
-            if (i >= burnin):
-                samples.append(target_langevin.state.copy())
-        return np.asarray(samples)
-
     def hypothesis_test(self,
                         test_stat,
                         observed_value,
@@ -1109,6 +1022,586 @@ def coefficient_pvalues_translate(self,
 
         return np.array(pvalues)
 
+
+class optimization_sampler(targeted_sampler):
+
+    '''
+    Object to sample only optimization variables of a selective sampler
+    fixing the observed score.
+    '''
+
+    def __init__(self,
+                 multi_view):
+
+        '''
+        Parameters
+        ----------
+
+        multi_view : `multiple_queries`
+           Instance of `multiple_queries`. Attributes
+           `objectives`, `score_info` are key
+           attributed. (Should maybe change constructor
+           to reflect only what is needed.)
+
+
+        '''
+
+        # sampler will draw samples for bootstrap
+        # these are arguments to target_info and score_bootstrap
+        # nonparamteric bootstrap is np.random.choice(n, size=(n,), replace=True)
+        # residual bootstrap might be X_E.dot(\bar{\beta}_E)
+        # + np.random.choice(resid, size=(n,), replace=True)
+
+        # if target_set is not None, we assume that
+        # these coordinates (specified by a list of coordinates) of target
+        # is assumed to be independent of the rest
+        # the corresponding block of `target_cov` is zeroed out
+
+        # we need these attributes of multi_view
+
+        self.nqueries = len(multi_view.objectives)
+        self.opt_slice = multi_view.opt_slice
+        self.objectives = multi_view.objectives
+
+        self.total_randomization_length = multi_view.total_randomization_length
+        self.randomization_slice = multi_view.randomization_slice
+
+        # set the observed state
+
+        self.observed_state = np.zeros_like(multi_view.observed_opt_state)
+        self.observed_state[:] = multi_view.observed_opt_state
+
+        # added for the reconstruction map in case we marginalize over optimization variables
+
+        randomization_length_total = 0
+        self.randomization_slice = []
+        for i in range(self.nqueries):
+            self.randomization_slice.append(
+                slice(randomization_length_total, randomization_length_total + self.objectives[i].ndim))
+            randomization_length_total += self.objectives[i].ndim
+
+        self.randomization_length_total = randomization_length_total
+
+        # We implicitly assume that we are sampling a target
+        # independent of the data in each view
+
+        self.target_transform = []
+        for i in range(self.nqueries):
+            obj = self.objectives[i]
+            
+            _, observed_score = obj.linear_decomposition(np.zeros(obj.ndim),
+                                                         np.array([[1.]]),
+                                                         0.)
+            self.target_transform.append((None, observed_score)) 
+
+    def projection(self, state):
+        '''
+        Projection map of projected Langevin sampler.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Typically, the projection will only act on
+           `opt_vars`.
+        Returns
+        -------
+        projected_state : np.float
+        '''
+
+        opt_state = state
+        new_opt_state = np.zeros_like(opt_state)
+        for i in range(self.nqueries):
+            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
+        return new_opt_state
+
+    def gradient(self, state):
+        """
+        Gradient only w.r.t. opt variables
+        """
+
+        opt_state = state
+        opt_grad = np.zeros_like(opt_state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+            # the 0 is our fictitious target independent of all the data
+            _, opt_grad[self.opt_slice[i]] = \
+                self.objectives[i].randomization_gradient(0., self.target_transform[i], opt_state[self.opt_slice[i]])
+
+        return opt_grad
+
+
+    def sample(self, ndraw, burnin, stepsize=None):
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+
+        Parameters
+        ----------
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        keep_opt : bool
+           Should we return optimization variables
+           as well as the target?
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if stepsize is None:
+            stepsize = 1. / self.crude_lipschitz()
+
+        target_langevin = projected_langevin(self.observed_state.copy(),
+                                             self.gradient,
+                                             self.projection,
+                                             stepsize)
+
+        samples = []
+
+        for i in range(ndraw + burnin):
+            target_langevin.next()
+            if (i >= burnin):
+                samples.append(target_langevin.state.copy())
+        return np.asarray(samples)
+
+    def hypothesis_test(self,
+                        test_stat,
+                        observed_value,
+                        ndraw=10000,
+                        burnin=2000,
+                        stepsize=None,
+                        sample=None,
+                        parameter=None,
+                        alternative='twosided'):
+
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+        Parameters
+        ----------
+        test_stat : callable
+           Test statistic to evaluate on sample from
+           selective distribution.
+        observed_value : float
+           Observed value of test statistic.
+           Used in p-value calculation.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc. If not None,
+           `ndraw, burnin, stepsize` are ignored.
+        parameter : np.float (optional)
+           If not None, defaults to `self.reference`.
+           Otherwise, sample is reweighted using Gaussian tilting.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        if parameter is None:
+            parameter = self.reference
+
+        sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
+
+
+        delta = self.target_inv_cov.dot(parameter - self.reference)
+        W = np.exp(sample.dot(delta))
+
+        family = discrete_family(sample_test_stat, W)
+        pval = family.cdf(0, observed_value)
+
+        if alternative == 'greater':
+            return 1 - pval
+        elif alternative == 'less':
+            return pval
+        else:
+            return 2 * min(pval, 1 - pval)
+
+    def confidence_intervals(self,
+                             observed,
+                             ndraw=10000,
+                             burnin=2000,
+                             stepsize=None,
+                             sample=None,
+                             level=0.9):
+        '''
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        level : float (optional)
+            Specify the
+            confidence level.
+        Notes
+        -----
+        Construct selective confidence intervals
+        for each parameter of the target.
+        Returns
+        -------
+        intervals : [(float, float)]
+            List of confidence intervals.
+        '''
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        nactive = observed.shape[0]
+        intervals_instance = intervals_from_sample(self.reference,
+                                                   sample,
+                                                   observed,
+                                                   self.target_cov)
+
+        return intervals_instance.confidence_intervals_all(level=level)
+
+    def coefficient_pvalues(self,
+                            observed,
+                            parameter=None,
+                            ndraw=10000,
+                            burnin=2000,
+                            stepsize=None,
+                            sample=None,
+                            alternative='twosided'):
+        '''
+        Construct selective p-values
+        for each parameter of the target.
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        parameter : np.float (optional)
+            A vector of parameters with shape `self.shape`
+            at which to evaluate p-values. Defaults
+            to `np.zeros(self.shape)`.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalues : np.float
+
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        if parameter is None:
+            parameter = np.zeros(self.shape)
+
+        nactive = observed.shape[0]
+        intervals_instance = intervals_from_sample(self.reference,
+                                                   sample,
+                                                   observed,
+                                                   self.target_cov)
+
+        pval = intervals_instance.pivots_all(parameter)
+
+        if alternative == 'greater':
+            return 1 - pval
+        elif alternative == 'less':
+            return pval
+        else:
+            return 2 * np.minimum(pval, 1 - pval)
+
+    def crude_lipschitz(self):
+        """
+        A crude Lipschitz constant for the
+        gradient of the log-density.
+        Returns
+        -------
+        lipschitz : float
+
+        """
+        lipschitz = power_L(self.target_inv_cov)
+        for transform, objective in zip(self.target_transform, self.objectives):
+            lipschitz += power_L(transform[0])**2 * objective.randomization.lipschitz
+            lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
+        return lipschitz
+
+
+    def reconstruction_map(self, state):
+        '''
+        Reconstruction of randomization at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be array with each row a state.
+        Returns
+        -------
+        reconstructed : np.float
+           Has shape of `opt_vars` with same number of rows
+           as `state`.
+
+        '''
+
+        state = np.atleast_2d(state)
+        #print(state.shape)
+        if len(state.shape) > 2:
+            raise ValueError('expecting at most 2-dimensional array')
+
+        target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
+        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
+        #reconstructed = np.zeros((opt_state.shape[0],self.randomization_length_total))
+
+        for i in range(self.nqueries):
+            reconstructed[:, self.randomization_slice[i]] = self.objectives[i].reconstruction_map(target_state,
+                                                                                        self.target_transform[i],
+                                                                                        opt_state[:, self.opt_slice[i]])
+
+        return np.squeeze(reconstructed)
+
+    def log_randomization_density(self, state):
+        '''
+        Log of randomization density at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be two-dimensional with each row a state.
+        Returns
+        -------
+        density : np.float
+            Has number of rows as `state` if 2-dimensional.
+        '''
+
+        reconstructed = self.reconstruction_map(state)
+        value = np.zeros(reconstructed.shape[0])
+
+        for i in range(self.nqueries):
+            log_dens = self.objectives[i].randomization.log_density
+            value += log_dens(reconstructed[:,self.opt_slice[i]])
+        return np.squeeze(value)
+
+
+    def hypothesis_test_translate(self,
+                                  sample,
+                                  test_stat,
+                                  observed_target,
+                                  parameter=None,
+                                  alternative='twosided'):
+
+        '''
+        Carry out a hypothesis test
+        based on the distribution of the
+        residual `observed_target - target`
+        sampled at `self.reference`.
+        Parameters
+        ----------
+        sample : np.array
+           Sample of target and optimization variables drawn at `self.reference`.
+        test_stat : callable
+           Test statistic to evaluate on sample from
+           selective distribution.
+        observed_target : np.float
+           Observed value of target estimate.
+           Used in p-value calculation.
+        parameter : np.float (optional)
+           If not None, defaults to `self.reference`.
+           Otherwise, sample is reweighted using Gaussian tilting.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        _intervals = translate_intervals(self,
+                                         sample,
+                                         observed_target)
+
+        if parameter is None:
+            parameter = self.reference
+
+        return _intervals.pivot(test_stat,
+                                parameter,
+                                alternative=alternative)
+
+
+    def confidence_intervals_translate(self,
+                                       observed_target,
+                                       ndraw=10000,
+                                       burnin=2000,
+                                       stepsize=None,
+                                       sample=None,
+                                       level=0.9):
+        '''
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        level : float (optional)
+            Specify the
+            confidence level.
+        Notes
+        -----
+        Construct selective confidence intervals
+        for each parameter of the target.
+        Returns
+        -------
+        intervals : [(float, float)]
+            List of confidence intervals.
+        '''
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize, keep_opt=True)
+
+        _intervals = translate_intervals(self,
+                                         sample,
+                                         observed_target)
+
+        limits = []
+
+        for i in range(observed_target.shape[0]):
+            keep = np.zeros_like(observed_target)
+            keep[i] = 1.
+            limits.append(_intervals.confidence_interval(keep, level=level))
+
+        return np.array(limits)
+
+    def coefficient_pvalues_translate(self,
+                                      observed_target,
+                                      parameter=None,
+                                      ndraw=10000,
+                                      burnin=2000,
+                                      stepsize=None,
+                                      sample=None,
+                                      alternative='twosided'):
+        '''
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        parameter : np.float (optional)
+            A vector of parameters with shape `self.shape`
+            at which to evaluate p-values. Defaults
+            to `np.zeros(self.shape)`.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalues : np.float
+            P values for each coefficient.
+
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize, keep_opt=True)
+
+        if parameter is None:
+            parameter = np.zeros_like(observed_target)
+
+        _intervals = translate_intervals(self,
+                                         sample,
+                                         observed_target)
+
+        pvalues = []
+
+        for i in range(observed_target.shape[0]):
+            keep = np.zeros_like(observed_target)
+            keep[i] = 1.
+
+            _parameter = self.reference.copy()
+            _parameter[i] = parameter[i]
+            pvalues.append(_intervals.pivot(lambda x: keep.dot(x),
+                                            _parameter,
+                                            alternative=alternative))
+
+        return np.array(pvalues)
+
+
+
 class bootstrapped_target_sampler(targeted_sampler):
 
     # make one of these for each hypothesis test
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
new file mode 100644
index 000000000..337b6a042
--- /dev/null
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -0,0 +1,55 @@
+from itertools import product
+import numpy as np
+import nose.tools as nt
+
+from ..convenience import lasso, step, threshold
+from ..query import optimization_sampler
+from ...tests.instance import (gaussian_instance,
+                               logistic_instance,
+                               poisson_instance)
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue 
+
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+def test_optimization_sampler(ndraw=1000, burnin=200):
+
+    cls = lasso
+    for const_info, rand in product(zip([gaussian_instance,
+                                         logistic_instance,
+                                         poisson_instance],
+                                        [cls.gaussian,
+                                         cls.logistic,
+                                         cls.poisson]),
+                              ['gaussian', 'logistic', 'laplace']):
+
+        inst, const = const_info
+        X, Y = inst()[:2]
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * 80
+        conv = const(X, Y, W, randomizer=rand)
+        signs = conv.fit()
+
+        marginalizing_groups = np.zeros(p, np.bool)
+        marginalizing_groups[:int(p/2)] = True
+        
+        conditioning_groups = ~marginalizing_groups
+        conditioning_groups[-int(p/4):] = False
+
+        selected_features = np.zeros(p, np.bool)
+        selected_features[:3] = True
+
+        conv.summary(selected_features,
+                     ndraw=ndraw,
+                     burnin=burnin,
+                     compute_intervals=True)
+
+        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+                                   conditioning_groups=conditioning_groups)
+
+        target_sampler = optimization_sampler(conv._queries)
+
+        S = target_sampler.sample(ndraw,
+                                  burnin,
+                                  stepsize=1.e-3)
+

From 5e488f42ef4b0766a503006d65e1d54359492245 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 25 Aug 2017 17:20:11 -0700
Subject: [PATCH 126/617] testing the group lasso subgradient decomposition

---
 selection/randomized/M_estimator.py           |  56 +++----
 .../tests/test_decompose_subgrad.py           | 138 ++++++++++++++++++
 .../tests/test_optimization_sampler.py        |   6 +-
 3 files changed, 171 insertions(+), 29 deletions(-)
 create mode 100644 selection/randomized/tests/test_decompose_subgrad.py

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 1616572be..1777ba275 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -366,14 +366,21 @@ def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
         conditioning_groups and marginalizing_groups should be disjoint
         """
 
-        if marginalizing_groups is not None and (conditioning_groups * marginalizing_groups).sum() > 0:
+        groups = np.unique(self.penalty.groups)
+        condition_inactive_groups = np.zeros_like(groups, dtype=bool)
+
+        if conditioning_groups is None:
+            conditioning_groups = np.zeros_like(groups, dtype=np.bool)
+
+        if marginalizing_groups is None:
+            marginalizing_groups = np.zeros_like(groups, dtype=np.bool)
+
+        if np.any(conditioning_groups * marginalizing_groups):
             raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
 
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-        groups = np.unique(self.penalty.groups)
-        condition_inactive_groups = np.zeros_like(groups, dtype=bool)
         condition_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
         moving_inactive_groups = np.zeros_like(groups, dtype=bool)
         moving_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
@@ -402,42 +409,41 @@ def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
 
         self.inactive_marginal_groups = inactive_marginal_groups
         self.limits_marginal_groups = limits_marginal_groups
-        #if self.inactive_marginal_groups.sum()==0:
-        #    self._marginalize_subgradient=False
-                #_opt_affine_term[group] = active_directions[:, idx][group] * penalty.weights[g]
-                #idx += 1
-        #self.condition_inactive_groups = condition_inactive_groups
+
         opt_linear, opt_offset = self.opt_transform
 
-        new_linear = np.zeros((opt_linear.shape[0], self._active_groups.sum()+self._unpenalized_groups.sum()+moving_inactive_variables.sum()))
-        new_linear[:,self.scaling_slice] = opt_linear[:, self.scaling_slice]
+        new_linear = np.zeros((opt_linear.shape[0], (self._active_groups.sum() +
+                                                     self._unpenalized_groups.sum() +
+                                                     moving_inactive_variables.sum())))
+        new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice]
         new_linear[:, self.unpenalized_slice] = opt_linear[:, self.unpenalized_slice]
 
         inactive_moving_idx = np.nonzero(moving_inactive_variables)[0]
         subgrad_idx = range(self._active_groups.sum() + self._unpenalized.sum(),
-                            self._active_groups.sum() + self._unpenalized.sum()+moving_inactive_variables.sum())
-        subgrad_slice = slice(self._active_groups.sum() + self._unpenalized.sum(),
-                              self._active_groups.sum() + self._unpenalized.sum()+moving_inactive_variables.sum())
+                            self._active_groups.sum() + self._unpenalized.sum() +
+                            moving_inactive_variables.sum())
+        subgrad_slice = subgrad_idx
         for _i, _s in zip(inactive_moving_idx, subgrad_idx):
             new_linear[_i, _s] = 1.
 
         observed_opt_state = self.observed_opt_state[:(self._active_groups.sum() +
                                                        self._unpenalized_groups.sum() +
                                                        moving_inactive_variables.sum())]
-        observed_opt_state[subgrad_slice] = self.initial_subgrad[moving_inactive_variables]
+        observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive_variables]
 
         self.observed_opt_state = observed_opt_state
 
-        condition_linear = np.zeros((opt_linear.shape[0], self._active_groups.sum()+self._unpenalized_groups.sum()+condition_inactive_variables.sum()))
+        condition_linear = np.zeros((opt_linear.shape[0], (self._active_groups.sum() +
+                                                           self._unpenalized_groups.sum() +
+                                                           condition_inactive_variables.sum())))
         inactive_condition_idx = np.nonzero(condition_inactive_variables)[0]
         subgrad_condition_idx = range(self._active_groups.sum() + self._unpenalized.sum(),
-                            self._active_groups.sum() + self._unpenalized.sum() + condition_inactive_variables.sum())
-        subgrad_condition_slice = slice(self._active_groups.sum() + self._unpenalized.sum(),
-                              self._active_groups.sum() + self._unpenalized.sum() + condition_inactive_variables.sum())
+                                      self._active_groups.sum() + self._unpenalized.sum() + condition_inactive_variables.sum())
+
         for _i, _s in zip(inactive_condition_idx, subgrad_condition_idx):
             condition_linear[_i, _s] = 1.
 
-        new_offset = condition_linear[:,subgrad_condition_slice].dot(self.initial_subgrad[condition_inactive_variables]) + opt_offset
+        new_offset = condition_linear[:,subgrad_condition_idx].dot(self.initial_subgrad[condition_inactive_variables]) + opt_offset
 
         self.opt_transform = (new_linear, new_offset)
 
@@ -446,10 +452,6 @@ def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
 
         self.selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
 
-        # reset variables
-        #self.observed_opt_state = np.concatenate((self.observed_opt_state[self.scaling_slice], subgrad_observed[~condition_inactive_variables]), 0)
-        #self.scaling_slice = slice(None, None, None)
-        #self.subgrad_slice = np.zeros(new_linear.shape[1], np.bool)
         self.num_opt_var = new_linear.shape[1]
 
     def condition_on_scalings(self):
@@ -480,6 +482,8 @@ def condition_on_scalings(self):
     def construct_weights(self, full_state):
         """
             marginalizing over the sub-gradient
+
+            full_state is 
         """
 
         if not self._setup:
@@ -490,15 +494,15 @@ def construct_weights(self, full_state):
             weights = np.zeros(p)
 
             if self.inactive_marginal_groups.sum()>0:
-                full_state_plus = full_state+np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
-                full_state_minus = full_state-np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
+                full_state_plus = full_state + np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
+                full_state_minus = full_state - np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
 
 
             def fraction(full_state_plus, full_state_minus, inactive_marginal_groups):
                 return (np.divide(self.randomization._pdf(full_state_plus) - self.randomization._pdf(full_state_minus),
                        self.randomization._cdf(full_state_plus) - self.randomization._cdf(full_state_minus)))[inactive_marginal_groups]
 
-            if self.inactive_marginal_groups.sum()>0:
+            if self.inactive_marginal_groups.sum() > 0:
                 weights[self.inactive_marginal_groups] = fraction(full_state_plus, full_state_minus, self.inactive_marginal_groups)
             weights[~self.inactive_marginal_groups] = self.randomization._derivative_log_density(full_state)[~self.inactive_marginal_groups]
 
diff --git a/selection/randomized/tests/test_decompose_subgrad.py b/selection/randomized/tests/test_decompose_subgrad.py
new file mode 100644
index 000000000..7ebbe056b
--- /dev/null
+++ b/selection/randomized/tests/test_decompose_subgrad.py
@@ -0,0 +1,138 @@
+from itertools import product
+import numpy as np
+import nose.tools as nt
+
+from ..convenience import lasso, step, threshold
+from ..glm import target as glm_target
+
+def test_marginalize():
+
+    np.random.seed(10) # we are going to freeze the active set for this test
+
+    n, p = 20, 5
+    X = np.random.standard_normal((n, p))
+    X /= np.sqrt((X**2).sum(0))[None, :]
+    Y = X.dot([60.1, -61, 0, 0, 0]) + np.random.standard_normal(n)
+
+    n, p = X.shape
+
+    W = np.ones(p) * 20
+    L = lasso.gaussian(X, Y, W, randomizer='gaussian', randomizer_scale=0.01)
+    signs = L.fit()
+
+    # we should be able to reconstruct the initial randomness by hand
+
+    beta = L._view.initial_soln
+    omega = X.T.dot(X.dot(beta) - Y) + L.ridge_term * beta + L._view.initial_subgrad
+
+    np.testing.assert_allclose(omega, L._view._initial_omega)
+
+    A1, b1 = L._view.opt_transform
+    opt_state1 = L._view.observed_opt_state.copy()
+    state1 = A1.dot(opt_state1) + b1
+
+    # now marginalize over some coordinates of inactive
+
+    marginalizing_groups = np.ones(p, np.bool)
+    marginalizing_groups[:3] = False
+
+    L.decompose_subgradient(marginalizing_groups = marginalizing_groups)
+
+    A2, b2 = L._view.opt_transform
+    opt_state2 = L._view.observed_opt_state.copy()
+    state2 = A2.dot(opt_state2) + b2
+
+    opt_state3 = opt_state1.copy()
+    opt_state3[3:] = 0.
+    state3 = A1.dot(opt_state3) + b1
+
+    np.testing.assert_allclose(state1[:3], state2[:3])  # coordinates that are not marginalized over agree before and after marginalizing
+    np.testing.assert_allclose(state3, state2) # when marginalizing, the transform is such that the marginalized subgradients were 0
+
+def test_condition():
+
+    n, p = 20, 5
+
+    np.random.seed(10) # we are going to freeze the active set for this test
+
+    X = np.random.standard_normal((n, p))
+    X /= np.sqrt((X**2).sum(0))[None, :]
+    Y = X.dot([60.1, -61, 0, 0, 0]) + np.random.standard_normal(n)
+
+    n, p = X.shape
+
+    W = np.ones(p) * 20
+    L = lasso.gaussian(X, Y, W, randomizer='gaussian', randomizer_scale=0.01)
+
+    signs = L.fit()
+
+    # we should be able to reconstruct the initial randomness by hand
+
+    beta = L._view.initial_soln
+    omega = X.T.dot(X.dot(beta) - Y) + L.ridge_term * beta + L._view.initial_subgrad
+
+    np.testing.assert_allclose(omega, L._view._initial_omega)
+
+    A1, b1 = L._view.opt_transform
+    state1 = A1.dot(L._view.observed_opt_state) + b1
+
+    # now marginalize over some coordinates of inactive
+
+    conditioning_groups = np.ones(p, np.bool)
+    conditioning_groups[:3] = False
+
+    L.decompose_subgradient(conditioning_groups = conditioning_groups)
+
+    A2, b2 = L._view.opt_transform
+    state2 = A2.dot(L._view.observed_opt_state) + b2
+
+    np.testing.assert_allclose(state1, state2) # when conditioning, the transform is such that the marginalized subgradients were 
+                                               # what we had originally observed
+
+def test_both():
+
+
+    np.random.seed(10) # we are going to freeze the active set for this test
+
+    n, p = 20, 10
+    X = np.random.standard_normal((n, p))
+    X /= np.sqrt((X**2).sum(0))[None, :]
+    Y = X.dot([60.1, -61] + [0] * (p-2)) + np.random.standard_normal(n)
+
+    n, p = X.shape
+
+    W = np.ones(p) * 20
+    L = lasso.gaussian(X, Y, W, randomizer='gaussian', randomizer_scale=0.01)
+    signs = L.fit()
+
+    # we should be able to reconstruct the initial randomness by hand
+
+    beta = L._view.initial_soln
+    omega = X.T.dot(X.dot(beta) - Y) + L.ridge_term * beta + L._view.initial_subgrad
+
+    np.testing.assert_allclose(omega, L._view._initial_omega)
+
+    A1, b1 = L._view.opt_transform
+    opt_state1 = L._view.observed_opt_state.copy()
+    state1 = A1.dot(opt_state1) + b1
+
+    # now marginalize over some coordinates of inactive
+
+    marginalizing_groups = np.zeros(p, np.bool)
+    marginalizing_groups[3:5] = True
+
+    conditioning_groups = np.zeros(p, np.bool)
+    conditioning_groups[5:7] = True
+
+    L.decompose_subgradient(marginalizing_groups = marginalizing_groups,
+                            conditioning_groups = conditioning_groups)
+
+    A2, b2 = L._view.opt_transform
+    opt_state2 = L._view.observed_opt_state.copy()
+    state2 = A2.dot(opt_state2) + b2
+
+    opt_state3 = opt_state1.copy()
+    opt_state3[3:5] = 0.
+    state3 = A1.dot(opt_state3) + b1
+
+    np.testing.assert_allclose(state3, state2) # when marginalizing, the transform is such that the marginalized subgradients were 0
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
index 337b6a042..04524b33a 100644
--- a/selection/randomized/tests/test_optimization_sampler.py
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -39,14 +39,14 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
         selected_features = np.zeros(p, np.bool)
         selected_features[:3] = True
 
+        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+                                   conditioning_groups=conditioning_groups)
+
         conv.summary(selected_features,
                      ndraw=ndraw,
                      burnin=burnin,
                      compute_intervals=True)
 
-        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
-                                   conditioning_groups=conditioning_groups)
-
         target_sampler = optimization_sampler(conv._queries)
 
         S = target_sampler.sample(ndraw,

From aa20a861fe022631023c95dc2f56482116e6466d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 25 Aug 2017 17:20:50 -0700
Subject: [PATCH 127/617] storing initial randomization for tests

---
 selection/randomized/query.py         | 2 +-
 selection/randomized/randomization.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 7eb5af32f..aeb7168a2 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -23,7 +23,7 @@ def __init__(self, randomization):
     def randomize(self):
 
         if not self._randomized:
-            self.randomized_loss = self.randomization.randomize(self.loss, self.epsilon)
+            self.randomized_loss, self._initial_omega = self.randomization.randomize(self.loss, self.epsilon)
         self._randomized = True
 
 
diff --git a/selection/randomized/randomization.py b/selection/randomized/randomization.py
index debd91781..d6b68b6bf 100644
--- a/selection/randomized/randomization.py
+++ b/selection/randomized/randomization.py
@@ -89,7 +89,7 @@ def randomize(self, loss, epsilon=0):
         randomized_loss = rr.smooth_sum([loss])
         _randomZ = self.sample()
         randomized_loss.quadratic = rr.identity_quadratic(epsilon, 0, -_randomZ, 0)
-        return randomized_loss
+        return randomized_loss, _randomZ
 
     @staticmethod
     def isotropic_gaussian(shape, scale):
@@ -302,7 +302,7 @@ def randomize(self, loss, epsilon):
 
         randomized_loss.quadratic = quadratic
 
-        return randomized_loss
+        return randomized_loss, None
 
 # Conjugate generating function for Gaussian
 

From 61a287e896c99947c3c62fc277b80c60ec7156fa Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c04c.SUNet>
Date: Mon, 28 Aug 2017 13:21:33 -0700
Subject: [PATCH 128/617] fixed import issues in barrier

---
 selection/reduced_optimization/barrier.py | 52 ++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/selection/reduced_optimization/barrier.py b/selection/reduced_optimization/barrier.py
index 06eab1c71..f8b2a0596 100644
--- a/selection/reduced_optimization/barrier.py
+++ b/selection/reduced_optimization/barrier.py
@@ -1,7 +1,57 @@
 import numpy as np
 import regreg.api as rr
 from scipy.optimize import bisect, minimize
-from selection.bayesian.selection_probability_rr import cube_barrier_scaled, cube_gradient_scaled, cube_hessian_scaled
+
+def cube_barrier_scaled(argument, lagrange, cube_scale= 1.):
+    '''
+    Barrier approximation to the
+    cube $[-\lambda,\lambda]^k$ with $\lambda$ being `lagrange`.
+    The function is
+    $$
+    z \mapsto \log(1 + 1 / (\lambda - z)) + \log(1 + 1 / (z + \lambda))
+    $$
+    with $z$ being `argument`
+    '''
+    BIG = 10 ** 10  # our Newton method will never evaluate this
+    # with any violations, but `scipy.minimize` does
+    _diff = argument - lagrange  # z - \lambda < 0
+    _sum = argument + lagrange  # z + \lambda > 0
+    violations = ((_diff >= 0).sum() + (_sum <= 0).sum() > 0)
+    return np.log((_diff - (cube_scale*lagrange)) * (_sum + (cube_scale*lagrange)) / (_diff * _sum)).sum() + BIG * violations
+
+
+def cube_gradient_scaled(argument, lagrange, cube_scale= 1.):
+    """
+    Gradient of approximation to the
+    cube $[-\lambda,\lambda]^k$ with $\lambda$ being `lagrange`.
+    The function is
+    $$
+    z \mapsto \frac{2}{\lambda - z} - \frac{1}{\lambda - z + 1} +
+    \frac{1}{z - \lambda + 1}
+    $$
+    with $z$ being `argument`
+    """
+    _diff = argument - lagrange  # z - \lambda < 0
+    _sum = argument + lagrange  # z + \lambda > 0
+    return 1. / (_diff - (cube_scale*lagrange)) - 1. / _diff + 1. / (_sum + (cube_scale*lagrange)) - 1. / _sum
+
+
+def cube_hessian_scaled(argument, lagrange, cube_scale= 1.):
+    """
+    (Diagonal) Heissian of approximation to the
+    cube $[-\lambda,\lambda]^k$ with $\lambda$ being `lagrange`.
+    The function is
+    $$
+    z \mapsto \frac{2}{\lambda - z} - \frac{1}{\lambda - z + 1} +
+    \frac{1}{z - \lambda + 1}
+    $$
+    with $z$ being `argument`
+    """
+    _diff = argument - lagrange  # z - \lambda < 0
+    _sum = argument + lagrange  # z + \lambda > 0
+    return 1. / _diff ** 2 - 1. / (_diff - (cube_scale*lagrange)) ** 2 + 1. / _sum ** 2 - \
+           1. / (_sum + (cube_scale*lagrange)) ** 2
+
 
 def cube_barrier_softmax_coord(z, lam):
     _diff = z - lam

From c4e4fefbf46e6a59b5bd9934362312f747253f2c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51skqm.SUNet>
Date: Tue, 29 Aug 2017 09:56:59 -0700
Subject: [PATCH 129/617] cleaned carved lasso test

---
 selection/reduced_optimization/estimator.py   | 555 +++++++++++++++++-
 .../reduced_optimization/initial_soln.py      |  49 +-
 .../reduced_optimization/tests/test_carved.py |  31 -
 .../tests/test_carved_bayesian.py             | 219 -------
 .../{carved_test.py => test_carved_lasso.py}  |  98 +---
 .../tests/test_reduced_lasso.py               |  17 +-
 6 files changed, 587 insertions(+), 382 deletions(-)
 delete mode 100644 selection/reduced_optimization/tests/test_carved.py
 delete mode 100644 selection/reduced_optimization/tests/test_carved_bayesian.py
 rename selection/reduced_optimization/tests/{carved_test.py => test_carved_lasso.py} (59%)

diff --git a/selection/reduced_optimization/estimator.py b/selection/reduced_optimization/estimator.py
index c38929840..44ac103d1 100644
--- a/selection/reduced_optimization/estimator.py
+++ b/selection/reduced_optimization/estimator.py
@@ -1,6 +1,559 @@
 import numpy as np
-from selection.randomized.M_estimator import M_estimator, M_estimator_split
+import regreg.api as rr
 from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
+from selection.randomized.query import query
+from selection.randomized.randomization import split
+import functools
+
+def pairs_bootstrap_glm(glm_loss,
+                        active,
+                        beta_full=None,
+                        inactive=None,
+                        scaling=1.,
+                        solve_args={'min_its':50, 'tol':1.e-10}):
+    """
+    pairs bootstrap of (beta_hat_active, -grad_inactive(beta_hat_active))
+    """
+    X, Y = glm_loss.data
+
+    if beta_full is None:
+        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+        beta_full = np.zeros(glm_loss.shape)
+        beta_full[active] = beta_active
+    else:
+        beta_active = beta_full[active]
+
+    X_active = X[:,active]
+
+    nactive = active.sum()
+    ntotal = nactive
+
+    if inactive is not None:
+        X_inactive = X[:,inactive]
+        ntotal += inactive.sum()
+
+    _bootW = np.diag(glm_loss.saturated_loss.hessian(X_active.dot(beta_active)))
+    _bootQ = X_active.T.dot(_bootW.dot(X_active))
+    _bootQinv = np.linalg.inv(_bootQ)
+    if inactive is not None:
+        _bootC = X_inactive.T.dot(_bootW.dot(X_active))
+        _bootI = _bootC.dot(_bootQinv)
+    else:
+        _bootI = None
+
+    nactive = active.sum()
+    if inactive is not None:
+        X_full = np.hstack([X_active,X_inactive])
+        beta_overall = np.zeros(X_full.shape[1])
+        beta_overall[:nactive] = beta_active
+    else:
+        X_full = X_active
+        beta_overall = beta_active
+
+    _boot_mu = lambda X_full, beta_overall: glm_loss.saturated_loss.mean_function(X_full.dot(beta_overall))
+
+    if ntotal > nactive:
+        observed = np.hstack([beta_active, -glm_loss.smooth_objective(beta_full, 'grad')[inactive]])
+    else:
+        observed = beta_active
+
+    # scaling is a lipschitz constant for a gradient squared
+    _sqrt_scaling = np.sqrt(scaling)
+
+    def _boot_score(X_full, Y, ntotal, _bootQinv, _bootI, nactive, _sqrt_scaling, beta_overall, indices):
+        X_star = X_full[indices]
+        Y_star = Y[indices]
+        score = X_star.T.dot(Y_star - _boot_mu(X_star, beta_overall))
+        result = np.zeros(ntotal)
+        result[:nactive] = _bootQinv.dot(score[:nactive])
+        if ntotal > nactive:
+            result[nactive:] = score[nactive:] - _bootI.dot(score[:nactive])
+        result[:nactive] *= _sqrt_scaling
+        result[nactive:] /= _sqrt_scaling
+        return result
+
+    observed[:nactive] *= _sqrt_scaling
+    observed[nactive:] /= _sqrt_scaling
+
+    return functools.partial(_boot_score, X_full, Y, ntotal, _bootQinv, _bootI, nactive, _sqrt_scaling, beta_overall), observed
+
+def pairs_bootstrap_score(glm_loss,
+                          active,
+                          beta_active=None,
+                          solve_args={'min_its':50, 'tol':1.e-10}):
+    """
+    pairs bootstrap of (beta_hat_active, -grad_inactive(beta_hat_active))
+    """
+    X, Y = glm_loss.data
+
+    if beta_active is None:
+        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+    X_active = X[:,active]
+
+    _bootW = np.diag(glm_loss.saturated_loss.hessian(X_active.dot(beta_active)))
+
+    _boot_mu = lambda X_active, beta_active: glm_loss.saturated_loss.mean_function(X_active.dot(beta_active))
+
+    def _boot_score(X, Y, active, beta_active, indices):
+        X_star = X[indices]
+        Y_star = Y[indices]
+        score = -X_star.T.dot(Y_star - _boot_mu(X_star[:,active], beta_active))
+        return score
+
+    return functools.partial(_boot_score, X, Y, active, beta_active)
+
+def set_alpha_matrix(glm_loss,
+                     active,
+                     beta_full=None,
+                     inactive=None,
+                     scaling=1.,
+                     solve_args={'min_its': 50, 'tol': 1.e-10}):
+
+    X, Y = glm_loss.data
+
+    if beta_full is None:
+        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+        beta_full = np.zeros(glm_loss.shape)
+        beta_full[active] = beta_active
+    else:
+        beta_active = beta_full[active]
+
+    X_active = X[:,active]
+
+    nactive = active.sum()
+    ntotal = nactive
+
+    if inactive is not None:
+        X_inactive = X[:,inactive]
+        ntotal += inactive.sum()
+
+    _W = np.diag(glm_loss.saturated_loss.hessian(X_active.dot(beta_active)))
+    _Q = X_active.T.dot(_W.dot(X_active))
+    _Qinv = np.linalg.inv(_Q)
+    nactive = active.sum()
+    if inactive is not None:
+        X_full = np.hstack([X_active, X_inactive])
+        beta_overall = np.zeros(X_full.shape[1])
+        beta_overall[:nactive] = beta_active
+    else:
+        X_full = X_active
+        beta_overall = beta_active
+
+    obs_residuals = Y - glm_loss.saturated_loss.mean_function(X_full.dot(beta_overall))
+
+    return np.dot(np.dot(_Qinv, X_active.T), np.diag(obs_residuals))
+
+class M_estimator(query):
+
+    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
+        """
+        Fits the logistic regression to a candidate active set, without penalty.
+        Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
+        Computes $\bar{\beta}_E$ which is the restricted
+        M-estimator (i.e. subject to the constraint $\beta_{-E}=0$).
+        Parameters:
+        -----------
+        active: np.bool
+            The active set from fitting the logistic lasso
+        solve_args: dict
+            Arguments to be passed to regreg solver.
+        Returns:
+        --------
+        None
+        Notes:
+        ------
+        Sets self._beta_unpenalized which will be used in the covariance matrix calculation.
+        Also computes Hessian of loss at restricted M-estimator as well as the bootstrap covariance.
+        """
+
+        query.__init__(self, randomization)
+
+        (self.loss,
+         self.epsilon,
+         self.penalty,
+         self.randomization,
+         self.solve_args) = (loss,
+                             epsilon,
+                             penalty,
+                             randomization,
+                             solve_args)
+
+    # Methods needed for subclassing a query
+
+    def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
+
+        self.randomize()
+
+        (loss,
+         randomized_loss,
+         epsilon,
+         penalty,
+         randomization,
+         solve_args) = (self.loss,
+                        self.randomized_loss,
+                        self.epsilon,
+                        self.penalty,
+                        self.randomization,
+                        self.solve_args)
+
+        # initial solution
+
+        problem = rr.simple_problem(randomized_loss, penalty)
+        self.initial_soln = problem.solve(**solve_args)
+
+        # find the active groups and their direction vectors
+        # as well as unpenalized groups
+
+        groups = np.unique(penalty.groups)
+        active_groups = np.zeros(len(groups), np.bool)
+        unpenalized_groups = np.zeros(len(groups), np.bool)
+
+        active_directions = []
+        active = np.zeros(loss.shape, np.bool)
+        unpenalized = np.zeros(loss.shape, np.bool)
+
+        initial_scalings = []
+
+        for i, g in enumerate(groups):
+            group = penalty.groups == g
+            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) > 1.e-6 * penalty.weights[g]) and (penalty.weights[g] > 0)
+            unpenalized_groups[i] = (penalty.weights[g] == 0)
+            if active_groups[i]:
+                active[group] = True
+                z = np.zeros(active.shape, np.float)
+                z[group] = self.initial_soln[group] / np.linalg.norm(self.initial_soln[group])
+                active_directions.append(z)
+                initial_scalings.append(np.linalg.norm(self.initial_soln[group]))
+            if unpenalized_groups[i]:
+                unpenalized[group] = True
+
+        # solve the restricted problem
+
+        self._overall = active + unpenalized
+        self._inactive = ~self._overall
+        self._unpenalized = unpenalized
+        self._active_directions = np.array(active_directions).T
+        self._active_groups = np.array(active_groups, np.bool)
+        self._unpenalized_groups = np.array(unpenalized_groups, np.bool)
+
+        self.selection_variable = {'groups':self._active_groups,
+                                   'variables':self._overall,
+                                   'directions':self._active_directions}
+
+        # initial state for opt variables
+
+        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
+                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad'))
+                          # the quadratic of a smooth_atom is not included in computing the smooth_objective
+
+        initial_subgrad = initial_subgrad[self._inactive]
+        initial_unpenalized = self.initial_soln[self._unpenalized]
+        self.observed_opt_state = np.concatenate([initial_scalings,
+                                                  initial_unpenalized,
+                                                  initial_subgrad], axis=0)
+
+        # set the _solved bit
+
+        self._solved = True
+
+        # Now setup the pieces for linear decomposition
+
+        (loss,
+         epsilon,
+         penalty,
+         initial_soln,
+         overall,
+         inactive,
+         unpenalized,
+         active_groups,
+         active_directions) = (self.loss,
+                               self.epsilon,
+                               self.penalty,
+                               self.initial_soln,
+                               self._overall,
+                               self._inactive,
+                               self._unpenalized,
+                               self._active_groups,
+                               self._active_directions)
+
+        # scaling should be chosen to be Lipschitz constant for gradient of Gaussian part
+
+        # we are implicitly assuming that
+        # loss is a pairs model
+
+        _sqrt_scaling = np.sqrt(scaling)
+
+        _beta_unpenalized = restricted_Mest(loss, overall, solve_args=solve_args)
+
+        beta_full = np.zeros(overall.shape)
+        beta_full[overall] = _beta_unpenalized
+        _hessian = loss.hessian(beta_full)
+        self._beta_full = beta_full
+
+        # observed state for score
+
+        self.observed_score_state = np.hstack([_beta_unpenalized * _sqrt_scaling,
+                                               -loss.smooth_objective(beta_full, 'grad')[inactive] / _sqrt_scaling])
+
+        # form linear part
+
+        self.num_opt_var = p = loss.shape[0] # shorthand for p
+
+        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
+        # E for active
+        # U for unpenalized
+        # -E for inactive
+
+        _opt_linear_term = np.zeros((p, self._active_groups.sum() + unpenalized.sum() + inactive.sum()))
+        _score_linear_term = np.zeros((p, p))
+
+        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
+
+        Mest_slice = slice(0, overall.sum())
+        _Mest_hessian = _hessian[:,overall]
+        _score_linear_term[:,Mest_slice] = -_Mest_hessian / _sqrt_scaling
+
+        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
+
+        null_idx = range(overall.sum(), p)
+        inactive_idx = np.nonzero(inactive)[0]
+        for _i, _n in zip(inactive_idx, null_idx):
+            _score_linear_term[_i,_n] = -_sqrt_scaling
+
+        # c_E piece
+
+        scaling_slice = slice(0, active_groups.sum())
+        if len(active_directions)==0:
+            _opt_hessian=0
+        else:
+            _opt_hessian = (_hessian + epsilon * np.identity(p)).dot(active_directions)
+        _opt_linear_term[:,scaling_slice] = _opt_hessian / _sqrt_scaling
+
+        self.observed_opt_state[scaling_slice] *= _sqrt_scaling
+
+        # beta_U piece
+
+        unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
+        unpenalized_directions = np.identity(p)[:,unpenalized]
+        if unpenalized.sum():
+            _opt_linear_term[:,unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
+
+        self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling
+
+        # subgrad piece
+
+        subgrad_idx = range(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
+        subgrad_slice = slice(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
+        for _i, _s in zip(inactive_idx, subgrad_idx):
+            _opt_linear_term[_i,_s] = _sqrt_scaling
+
+        self.observed_opt_state[subgrad_slice] /= _sqrt_scaling
+
+        # form affine part
+
+        _opt_affine_term = np.zeros(p)
+        idx = 0
+        groups = np.unique(penalty.groups)
+        for i, g in enumerate(groups):
+            if active_groups[i]:
+                group = penalty.groups == g
+                _opt_affine_term[group] = active_directions[:,idx][group] * penalty.weights[g]
+                idx += 1
+
+        # two transforms that encode score and optimization
+        # variable roles
+
+        self.opt_transform = (_opt_linear_term, _opt_affine_term)
+        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
+
+        # later, we will modify `score_transform`
+        # in `linear_decomposition`
+
+        # now store everything needed for the projections
+        # the projection acts only on the optimization
+        # variables
+
+        self.scaling_slice = scaling_slice
+
+        # weights are scaled here because the linear terms scales them by scaling
+
+        new_groups = penalty.groups[inactive]
+        new_weights = dict([(g, penalty.weights[g] / _sqrt_scaling) for g in penalty.weights.keys() if g in np.unique(new_groups)])
+
+        # we form a dual group lasso object
+        # to do the projection
+
+        self.group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, bound=1.)
+        self.subgrad_slice = subgrad_slice
+
+        self._setup = True
+
+    def setup_sampler(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
+        pass
+
+    def projection(self, opt_state):
+        """
+        Full projection for Langevin.
+        The state here will be only the state of the optimization variables.
+        """
+
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
+
+
+        if ('subgradient' not in self.selection_variable and
+            'scaling' not in self.selection_variable): # have not conditioned on any thing else
+            new_state = opt_state.copy() # not really necessary to copy
+            new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
+            new_state[self.subgrad_slice] = self.group_lasso_dual.bound_prox(opt_state[self.subgrad_slice])
+        elif ('subgradient' not in self.selection_variable and
+              'scaling' in self.selection_variable): # conditioned on the initial scalings
+                                                     # only the subgradient in opt_state
+            new_state = self.group_lasso_dual.bound_prox(opt_state)
+        elif ('subgradient' in self.selection_variable and
+              'scaling' not in self.selection_variable): # conditioned on the subgradient
+                                                         # only the scaling in opt_state
+            new_state = np.maximum(opt_state, 0)
+        else:
+            new_state = opt_state
+        return new_state
+
+    # optional things to condition on
+
+    def condition_on_subgradient(self):
+        """
+        Maybe we should allow subgradients of only some variables...
+        """
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
+
+        opt_linear, opt_offset = self.opt_transform
+
+        new_offset = opt_linear[:,self.subgrad_slice].dot(self.observed_opt_state[self.subgrad_slice]) + opt_offset
+        new_linear = opt_linear[:,self.scaling_slice]
+
+        self.opt_transform = (new_linear, new_offset)
+
+        # for group LASSO this should not induce a bigger jacobian as
+        # the subgradients are in the interior of a ball
+        self.selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
+
+        # reset variables
+
+        self.observed_opt_state = self.observed_opt_state[self.scaling_slice]
+        self.scaling_slice = slice(None, None, None)
+        self.subgrad_slice = np.zeros(new_linear.shape[1], np.bool)
+        self.num_opt_var = new_linear.shape[1]
+
+    def condition_on_scalings(self):
+        """
+        Maybe we should allow subgradients of only some variables...
+        """
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
+
+        opt_linear, opt_offset = self.opt_transform
+
+        new_offset = opt_linear[:,self.scaling_slice].dot(self.observed_opt_state[self.scaling_slice]) + opt_offset
+        new_linear = opt_linear[:,self.subgrad_slice]
+
+        self.opt_transform = (new_linear, new_offset)
+
+        # for group LASSO this will induce a bigger jacobian
+        self.selection_variable['scalings'] = self.observed_opt_state[self.scaling_slice]
+
+        # reset slices
+
+        self.observed_opt_state = self.observed_opt_state[self.subgrad_slice]
+        self.subgrad_slice = slice(None, None, None)
+        self.scaling_slice = np.zeros(new_linear.shape[1], np.bool)
+        self.num_opt_var = new_linear.shape[1]
+
+
+
+def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
+
+    X, Y = Mest_loss.data
+
+    if Mest_loss._is_transform:
+        raise NotImplementedError('to fit restricted model, X must be an ndarray or scipy.sparse; general transforms not implemented')
+    X_restricted = X[:,active]
+    loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
+    beta_E = loss_restricted.solve(**solve_args)
+
+    return beta_E
+
+class M_estimator_split(M_estimator):
+
+    def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10}):
+        total_size = loss.saturated_loss.shape[0]
+        self.randomization = split(loss.shape, subsample_size, total_size)
+        M_estimator.__init__(self,loss, epsilon, penalty, self.randomization, solve_args=solve_args)
+
+        total_size = loss.saturated_loss.shape[0]
+        if subsample_size > total_size:
+            raise ValueError('subsample size must be smaller than total sample size')
+
+        self.total_size, self.subsample_size = total_size, subsample_size
+
+    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B=2000):
+
+        M_estimator.setup_sampler(self,
+                                  scaling=scaling,
+                                  solve_args=solve_args)
+
+        # now we need to estimate covariance of
+        # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)
+
+        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0] # shorthand
+
+        #from .glm import pairs_bootstrap_score
+
+        bootstrap_score = pairs_bootstrap_score(self.loss,
+                                                self._overall,
+                                                beta_active=self._beta_full[self._overall],
+                                                solve_args=solve_args)
+
+        # find unpenalized MLE on subsample
+
+        newq, oldq = rr.identity_quadratic(0, 0, 0, 0), self.randomized_loss.quadratic
+        self.randomized_loss.quadratic = newq
+        beta_active_subsample = restricted_Mest(self.randomized_loss,
+                                                self._overall)
+
+        bootstrap_score_split = pairs_bootstrap_score(self.loss,
+                                                      self._overall,
+                                                      beta_active=beta_active_subsample,
+                                                      solve_args=solve_args)
+        self.randomized_loss.quadratic = oldq
+
+        inv_frac = n / m
+
+        def subsample_diff(m, n, indices):
+            subsample = np.random.choice(indices, size=m, replace=False)
+            full_score = bootstrap_score(indices) # a sum of n terms
+            randomized_score = bootstrap_score_split(subsample) # a sum of m terms
+            return full_score - randomized_score * inv_frac
+
+        first_moment = np.zeros(p)
+        second_moment = np.zeros((p, p))
+
+        _n = np.arange(n)
+        for _ in range(B):
+            indices = np.random.choice(_n, size=n, replace=True)
+            randomized_score = subsample_diff(m, n, indices)
+            first_moment += randomized_score
+            second_moment += np.multiply.outer(randomized_score, randomized_score)
+
+        first_moment /= B
+        second_moment /= B
+
+        cov = second_moment - np.multiply.outer(first_moment,
+                                                first_moment)
+
+        self.randomization.set_covariance(cov)
+
+        return bootstrap_score, cov
 
 class M_estimator_approx(M_estimator):
 
diff --git a/selection/reduced_optimization/initial_soln.py b/selection/reduced_optimization/initial_soln.py
index 50b0e008a..813b2f0bd 100644
--- a/selection/reduced_optimization/initial_soln.py
+++ b/selection/reduced_optimization/initial_soln.py
@@ -1,59 +1,15 @@
 import numpy as np
 import regreg.api as rr
-#from selection.bayesian.cisEQTLS.tests.CV_lambda import tuning_parameter_glmnet
-# from rpy2.robjects.packages import importr
-# from rpy2 import robjects
-# glmnet = importr('glmnet')
-#import rpy2.robjects.numpy2ri
-#rpy2.robjects.numpy2ri.activate()
-import numpy as np
-import regreg.api as rr
-from selection.tests.instance import gaussian_instance
-
-
-# def tuning_parameter_glmnet(X, y):
-#     robjects.r('''
-#         glmnet_cv = function(X,y, lam_seq=NA){
-#         y = as.matrix(y)
-#         X = as.matrix(X)
-#         if (is.na(lam_seq)){
-#             G_CV = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-#         }
-#         else {
-#             G_CV = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE, lambda=lam_seq)
-#         }
-#         lam_1SE = G_CV$lambda.1se
-#         lam_minCV = G_CV$lambda.min
-#         n = nrow(X)
-#         lam_minCV = lam_minCV*n
-#         lam_1SE = lam_1SE*n
-#         lam_seq = G_CV$lambda*n
-#         result = list(lam_minCV=lam_minCV, lam_1SE=lam_1SE, lam_seq = lam_seq, CV_err=G_CV$cvm, SD=G_CV$cvsd)
-#         return(result)
-#         }''')
-#
-#     r_glmnet_cv = robjects.globalenv['glmnet_cv']
-#     n, p = X.shape
-#     r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-#     r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-#     result = r_glmnet_cv(r_X, r_y)
-#     lam_minCV = result[0][0]
-#     lam_1SE = result[1][0]
-#     return lam_minCV, lam_1SE
-
 
 def selection(X, y, random_Z, randomization_scale=1, sigma=None, method="theoretical"):
     n, p = X.shape
     loss = rr.glm.gaussian(X,y)
     epsilon = 1. / np.sqrt(n)
-    lam_frac = 1.
+    lam_frac = 1.2
     if sigma is None:
         sigma = 1.
     if method == "theoretical":
         lam = 1. * sigma * lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 10000)))).max(0))
-    # elif method == "cross-validation":
-    #     lam = tuning_parameter_glmnet(X, y)[1]
-    #     print(lam)
 
     W = np.ones(p)*lam
     penalty = rr.group_lasso(np.arange(p), weights = dict(zip(np.arange(p), W)), lagrange=1.)
@@ -62,7 +18,6 @@ def selection(X, y, random_Z, randomization_scale=1, sigma=None, method="theoret
 
     problem = rr.simple_problem(loss, penalty)
     random_term = rr.identity_quadratic(epsilon, 0, -randomization_scale * random_Z, 0)
-    solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
 
 
     solve_args = {'tol': 1.e-10, 'min_its': 100, 'max_its': 500}
@@ -76,8 +31,6 @@ def selection(X, y, random_Z, randomization_scale=1, sigma=None, method="theoret
     cube = subgradient[~active]/lam
     return lam, epsilon, active, betaE, cube, initial_soln
 
-#creating instance X,y,beta: for a single X, sampling lots of y
-
 class instance(object):
 
     def __init__(self, n, p, s, snr=5, sigma=1., rho=0, random_signs=False, scale =True, center=True):
diff --git a/selection/reduced_optimization/tests/test_carved.py b/selection/reduced_optimization/tests/test_carved.py
deleted file mode 100644
index cca8675f9..000000000
--- a/selection/reduced_optimization/tests/test_carved.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import numpy as np
-import regreg.api as rr
-
-from ...tests.flags import SMALL_SAMPLES, SET_SEED
-from ...tests.decorators import (set_seed_iftrue, 
-                                 set_sampling_params_iftrue)
-
-from ..estimator import M_estimator_approx_carved
-from ...tests.instance import logistic_instance, gaussian_instance
-
-@set_seed_iftrue(SET_SEED)
-def test_carved():
-    n = 500
-    p = 100
-    s = 0
-    signal = 0.
-
-    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, signal=signal)
-    lam = 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-    n, p = X.shape
-
-    loss = rr.glm.gaussian(X, y)
-    total_size = loss.saturated_loss.shape[0]
-    subsample_size = int(0.8* total_size)
-    epsilon = 1. / np.sqrt(n)
-
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-    M_est = M_estimator_approx_carved(loss, epsilon, subsample_size, penalty, 'parametric')
-    M_est.solve_approx()
diff --git a/selection/reduced_optimization/tests/test_carved_bayesian.py b/selection/reduced_optimization/tests/test_carved_bayesian.py
deleted file mode 100644
index c17dc0428..000000000
--- a/selection/reduced_optimization/tests/test_carved_bayesian.py
+++ /dev/null
@@ -1,219 +0,0 @@
-from __future__ import print_function
-import sys
-import os
-
-import numpy as np
-import regreg.api as rr
-
-from selection.api import randomization
-from ..initial_soln import selection, instance
-from ..lasso_reduced import (nonnegative_softmax_scaled, 
-                             neg_log_cube_probability, 
-                             selection_probability_lasso, 
-                             sel_prob_gradient_map_lasso, 
-                             selective_inf_lasso)
-from ..par_carved_reduced import selection_probability_carved, sel_inf_carved
-from ...randomized.M_estimator import M_estimator, M_estimator_split
-from ...randomized.glm import pairs_bootstrap_glm, bootstrap_cov
-
-from ...tests.flags import SMALL_SAMPLES, SET_SEED
-from ...tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
-
-def generate_data_random(n, p, sigma=1., rho=0., scale =True, center=True):
-
-    X = (np.sqrt(1 - rho) * np.random.standard_normal((n, p)) + np.sqrt(rho) * np.random.standard_normal(n)[:, None])
-
-    if center:
-        X -= X.mean(0)[None, :]
-    if scale:
-        X /= (X.std(0)[None, :] * np.sqrt(n))
-
-    beta_true = np.zeros(p)
-    u = np.random.uniform(0., 1., p)
-    for i in range(p):
-        if u[i] <= 0.9:
-            beta_true[i] = np.random.laplace(loc=0., scale=0.1)
-        else:
-            beta_true[i] = np.random.laplace(loc=0., scale=1.)
-
-    beta = beta_true
-
-    Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma
-
-    return X, Y, beta * sigma, sigma
-
-class M_estimator_approx_carved(M_estimator_split):
-
-    def __init__(self, loss, epsilon, subsample_size, penalty, estimation):
-
-        M_estimator_split.__init__(self,loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10})
-        self.estimation = estimation
-
-    def solve_approx(self):
-
-        self.solve()
-
-        self.nactive = self._overall.sum()
-        X, _ = self.loss.data
-        n, p = X.shape
-        self.p = p
-        self.target_observed = self.observed_score_state[:self.nactive]
-
-        self.feasible_point = np.concatenate([self.observed_score_state, np.fabs(self.observed_opt_state[:self.nactive]),
-                                              self.observed_opt_state[self.nactive:]], axis = 0)
-
-        (_opt_linear_term, _opt_affine_term) = self.opt_transform
-        self._opt_linear_term = np.concatenate(
-            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
-
-        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]), 0)
-        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
-
-        (_score_linear_term, _) = self.score_transform
-        self._score_linear_term = np.concatenate(
-            (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
-
-        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
-
-        lagrange = []
-        for key, value in self.penalty.weights.iteritems():
-            lagrange.append(value)
-        lagrange = np.asarray(lagrange)
-
-        #print("True or false", np.all(lagrange[0]-np.fabs(self.feasible_point[p+self.nactive:]))>0)
-        #print("True or false", np.all(self.feasible_point[p:][:self.nactive]) > 0)
-
-        self.inactive_lagrange = lagrange[~self._overall]
-
-        self.bootstrap_score, self.randomization_cov = self.setup_sampler()
-
-        if self.estimation == 'parametric':
-            score_cov = np.zeros((p,p))
-            inv_X_active = np.linalg.inv(X[:, self._overall].T.dot(X[:, self._overall]))
-            projection_X_active = X[:,self._overall].dot(np.linalg.inv(X[:, self._overall].T.dot(X[:, self._overall]))).dot(X[:,self._overall].T)
-            score_cov[:self.nactive, :self.nactive] = inv_X_active
-            score_cov[self.nactive:, self.nactive:] = X[:,~self._overall].T.dot(np.identity(n)- projection_X_active).dot(X[:,~self._overall])
-
-        elif self.estimation == 'bootstrap':
-            score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), self.bootstrap_score)
-
-        self.score_cov = score_cov
-        self.score_cov_inv = np.linalg.inv(self.score_cov)
-
-def carved_lasso_trial(X,
-                       y,
-                       beta,
-                       sigma,
-                       lam,
-                       estimation='parametric',
-                       ndraw=1000,
-                       burnin=100):
-    n, p = X.shape
-
-    loss = rr.glm.gaussian(X, y)
-    epsilon = 1. / np.sqrt(n)
-
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    total_size = loss.saturated_loss.shape[0]
-    subsample_size = int(0.8 * total_size)
-
-    M_est = M_estimator_approx_carved(loss, epsilon, subsample_size, penalty, estimation)
-
-    M_est.solve_approx()
-    active = M_est._overall
-    nactive = M_est.nactive
-
-    if nactive >= 1:
-        prior_variance = 1000.
-        noise_variance = sigma**2
-        projection_active = X[:, active].dot(np.linalg.inv(X[:, active].T.dot(X[:, active])))
-        M_1 = prior_variance * (X.dot(X.T)) + noise_variance * np.identity(n)
-        M_2 = prior_variance * ((X.dot(X.T)).dot(projection_active))
-        M_3 = prior_variance * (projection_active.T.dot(X.dot(X.T)).dot(projection_active))
-        post_mean = M_2.T.dot(np.linalg.inv(M_1)).dot(y)
-
-        print("observed data", post_mean)
-
-        post_var = M_3 - M_2.T.dot(np.linalg.inv(M_1)).dot(M_2)
-
-        unadjusted_intervals = np.vstack([post_mean - 1.65 * (np.sqrt(post_var.diagonal())),
-                                          post_mean + 1.65 * (np.sqrt(post_var.diagonal()))])
-        grad_lasso = sel_inf_carved(M_est, prior_variance)
-        samples = grad_lasso.posterior_samples(langevin_steps=ndraw, burnin=burnin)
-        adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
-
-        selective_mean = np.mean(samples, axis=0)
-
-        coverage_ad = np.zeros(nactive)
-        coverage_unad = np.zeros(nactive)
-        ad_length = np.zeros(nactive)
-        unad_length = np.zeros(nactive)
-
-        true_val = projection_active.T.dot(X.dot(beta))
-        for l in range(nactive):
-            if (adjusted_intervals[0, l] <= true_val[l]) and (true_val[l] <= adjusted_intervals[1, l]):
-                coverage_ad[l] += 1
-            ad_length[l] = adjusted_intervals[1, l] - adjusted_intervals[0, l]
-            if (unadjusted_intervals[0, l] <= true_val[l]) and (true_val[l] <= unadjusted_intervals[1, l]):
-                coverage_unad[l] += 1
-            unad_length[l] = unadjusted_intervals[1, l] - unadjusted_intervals[0, l]
-
-        sel_cov = coverage_ad.sum() / nactive
-        naive_cov = coverage_unad.sum() / nactive
-        ad_len = ad_length.sum() / nactive
-        unad_len = unad_length.sum() / nactive
-        bayes_risk_ad = np.power(selective_mean - true_val, 2.).sum() / nactive
-        bayes_risk_unad = np.power(post_mean - true_val, 2.).sum() / nactive
-
-        return np.vstack([sel_cov, naive_cov, ad_len, unad_len, bayes_risk_ad, bayes_risk_unad])
-
-    else:
-        return np.vstack([0.,0.,0.,0.,0.,0.])
-
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
-def test_carved_bayesian(ndraw=1000,
-                         burnin=100):
-
-    n = 1000
-    p = 100
-    s = 0
-    snr = 0.
-
-    niter = 50
-    ad_cov = 0.
-    unad_cov = 0.
-    ad_len = 0.
-    unad_len = 0.
-    ad_risk = 0.
-    unad_risk = 0.
-
-    X, y, beta, sigma = generate_data_random(n=n, p=p)
-    lam = 0.8 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-    lasso = carved_lasso_trial(X,
-                               y,
-                               beta,
-                               sigma,
-                               lam,
-                               ndraw=ndraw,
-                               burnin=burnin)
-
-    ad_cov += lasso[0, 0]
-    unad_cov += lasso[1, 0]
-    ad_len += lasso[2, 0]
-    unad_len += lasso[3, 0]
-    ad_risk += lasso[4, 0]
-    unad_risk += lasso[5, 0]
-
-    print("\n")
-    print("iteration completed", i)
-    print("adjusted and unadjusted coverage", ad_cov, unad_cov)
-    print("adjusted and unadjusted lengths", ad_len, unad_len)
-    print("adjusted and unadjusted risks", ad_risk, unad_risk)
-
-    print("adjusted and unadjusted coverage", ad_cov, unad_cov)
-    print("adjusted and unadjusted lengths", ad_len, unad_len)
-    print("adjusted and unadjusted risks", ad_risk, unad_risk)
diff --git a/selection/reduced_optimization/tests/carved_test.py b/selection/reduced_optimization/tests/test_carved_lasso.py
similarity index 59%
rename from selection/reduced_optimization/tests/carved_test.py
rename to selection/reduced_optimization/tests/test_carved_lasso.py
index cbbde5c25..36928b279 100644
--- a/selection/reduced_optimization/tests/carved_test.py
+++ b/selection/reduced_optimization/tests/test_carved_lasso.py
@@ -1,16 +1,12 @@
 from __future__ import print_function
 import numpy as np
-import time
 import regreg.api as rr
-from selection.reduced_optimization.initial_soln import selection
 from selection.tests.instance import logistic_instance, gaussian_instance
 
 from selection.reduced_optimization.par_carved_reduced import selection_probability_carved, sel_inf_carved
 
 from selection.reduced_optimization.estimator import M_estimator_approx_carved
 
-import sys
-import os
 
 def carved_lasso_trial(X,
                        y,
@@ -80,83 +76,41 @@ def carved_lasso_trial(X,
         return np.vstack([sel_cov, naive_cov, ad_len, unad_len, bayes_risk_ad, bayes_risk_unad])
 
     else:
-        return np.vstack([0.,0.,0.,0., 0., 0.])
+        return np.vstack([0.,0.,0.,0.,0.,0.])
 
-
-if __name__ == "__main__":
+def test_carved_lasso():
     ### set parameters
     n = 1000
     p = 100
-    s = 0
-    snr = 0.
-
+    s = 20
+    snr = 7.
 
-    niter = 24
     ad_cov = 0.
     unad_cov = 0.
     ad_len = 0.
     unad_len = 0.
-    no_sel = 0
     ad_risk = 0.
     unad_risk = 0.
 
-    for i in range(niter):
-
-         ### GENERATE X, Y BASED ON SEED
-         #i+17 was good, i+27 was good
-         np.random.seed(37)  # ensures different y
-         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
-         lam = 0.8 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-         ### RUN LASSO AND TEST
-         lasso = carved_lasso_trial(X,
-                                    y,
-                                    beta,
-                                    sigma,
-                                    lam)
-
-         if lasso is not None:
-             ad_cov += lasso[0,0]
-             unad_cov += lasso[1,0]
-             ad_len += lasso[2, 0]
-             unad_len += lasso[3, 0]
-             ad_risk += lasso[4, 0]
-             unad_risk += lasso[5, 0]
-             print("\n")
-             print("iteration completed", i - no_sel)
-             print("\n")
-             print("adjusted and unadjusted coverage", ad_cov, unad_cov)
-             print("adjusted and unadjusted lengths", ad_len, unad_len)
-             print("adjusted and unadjusted risks", ad_risk, unad_risk)
-         else:
-             no_sel += 1
-
-
-# if __name__ == "__main__":
-#
-#     # read from command line
-#     print(len(sys.argv))
-#     seedn = int(sys.argv[1]) + 17
-#     outdir = sys.argv[2]
-#
-#     outfile = os.path.join(outdir, "list_result_" + str(seedn) + ".txt")
-#
-#     ### set parameters
-#     n = 1000
-#     p = 200
-#     s = 0
-#     snr = 0.
-#
-#     np.random.seed(seedn)  # ensures different X and y
-#     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
-#
-#     lam = 0.8 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-#
-#     ### RUN LASSO AND SAVE
-#     lasso = carved_lasso_trial(X,
-#                                y,
-#                                beta,
-#                                sigma,
-#                                lam)
-#
-#     np.savetxt(outfile, lasso)
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, signal=snr)
+    lam = 0.8 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    lasso = carved_lasso_trial(X,
+                               y,
+                               beta,
+                               sigma,
+                               lam)
+
+
+    if lasso is not None:
+        ad_cov += lasso[0,0]
+        unad_cov += lasso[1,0]
+        ad_len += lasso[2, 0]
+        unad_len += lasso[3, 0]
+        ad_risk += lasso[4, 0]
+        unad_risk += lasso[5, 0]
+        print("\n")
+        print("\n")
+        print("adjusted and unadjusted coverage", ad_cov, unad_cov)
+        print("adjusted and unadjusted lengths", ad_len, unad_len)
+
+test_carved_lasso()
\ No newline at end of file
diff --git a/selection/reduced_optimization/tests/test_reduced_lasso.py b/selection/reduced_optimization/tests/test_reduced_lasso.py
index fb11201ce..6262d87e9 100644
--- a/selection/reduced_optimization/tests/test_reduced_lasso.py
+++ b/selection/reduced_optimization/tests/test_reduced_lasso.py
@@ -1,17 +1,13 @@
 from __future__ import print_function
-
-import sys
-import os
-
 import numpy as np
 
 from selection.api import randomization
-from ..initial_soln import selection, instance
-from ..lasso_reduced import (nonnegative_softmax_scaled, 
-                             neg_log_cube_probability, 
-                             selection_probability_lasso, 
-                             sel_prob_gradient_map_lasso, 
-                             selective_inf_lasso)
+from selection.reduced_optimization.initial_soln import selection, instance
+from selection.reduced_optimization.lasso_reduced import (nonnegative_softmax_scaled,
+                                                          neg_log_cube_probability,
+                                                          selection_probability_lasso,
+                                                          sel_prob_gradient_map_lasso,
+                                                          selective_inf_lasso)
 
 from selection.tests.flags import SMALL_SAMPLES, SET_SEED
 from selection.tests.decorators import (set_sampling_params_iftrue,
@@ -140,4 +136,3 @@ def test_reduced_lasso():
         print("\n")
         print("adjusted and unadjusted coverage", ad_cov, unad_cov)
         print("adjusted and unadjusted lengths", ad_len, unad_len)
-        

From ddf485de60264c4ffcbe64b1e90b6f319d500b69 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:10:27 -0700
Subject: [PATCH 130/617] added fs

---
 .../tests/test_carved_lasso.py                |   2 -
 .../reduced_optimization/tests/test_fs.py     | 128 ++++++++++++++++++
 .../tests/test_reduced_lasso.py               |   8 +-
 3 files changed, 132 insertions(+), 6 deletions(-)
 create mode 100644 selection/reduced_optimization/tests/test_fs.py

diff --git a/selection/reduced_optimization/tests/test_carved_lasso.py b/selection/reduced_optimization/tests/test_carved_lasso.py
index 36928b279..237fbb685 100644
--- a/selection/reduced_optimization/tests/test_carved_lasso.py
+++ b/selection/reduced_optimization/tests/test_carved_lasso.py
@@ -7,7 +7,6 @@
 
 from selection.reduced_optimization.estimator import M_estimator_approx_carved
 
-
 def carved_lasso_trial(X,
                        y,
                        beta,
@@ -113,4 +112,3 @@ def test_carved_lasso():
         print("adjusted and unadjusted coverage", ad_cov, unad_cov)
         print("adjusted and unadjusted lengths", ad_len, unad_len)
 
-test_carved_lasso()
\ No newline at end of file
diff --git a/selection/reduced_optimization/tests/test_fs.py b/selection/reduced_optimization/tests/test_fs.py
new file mode 100644
index 000000000..0c3cb6f02
--- /dev/null
+++ b/selection/reduced_optimization/tests/test_fs.py
@@ -0,0 +1,128 @@
+from __future__ import print_function
+import numpy as np
+
+from selection.reduced_optimization.initial_soln import selection, instance
+from selection.reduced_optimization.forward_stepwise_reduced import (neg_log_cube_probability_fs,
+                                                                     selection_probability_objective_fs,
+                                                                     sel_prob_gradient_map_fs,
+                                                                     selective_map_credible_fs)
+
+from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from selection.tests.decorators import (set_sampling_params_iftrue,
+                                        set_seed_iftrue)
+
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+
+
+def randomized_forward_step(X,
+                            y,
+                            beta,
+                            sigma):
+    from selection.api import randomization
+
+    n, p = X.shape
+
+    random_Z = np.random.standard_normal(p)
+    Z_stats = X.T.dot(y)
+    random_obs = X.T.dot(y) + random_Z
+
+    active_index = np.argmax(np.fabs(random_obs))
+    active = np.zeros(p, bool)
+    active[active_index] = 1
+    active_sign = np.sign(random_obs[active_index])
+    print("observed statistic", random_obs[active_index], Z_stats[active_index])
+    print("first step--chosen index and sign", active_index, active_sign)
+
+    feasible_point = np.fabs(random_obs[active_index])
+
+    noise_variance = sigma ** 2
+
+    randomizer = randomization.isotropic_gaussian((p,), 1.)
+
+    generative_X = X[:, active]
+    prior_variance = 1000.
+
+    grad_map = sel_prob_gradient_map_fs(X,
+                                        feasible_point,
+                                        active,
+                                        active_sign,
+                                        generative_X,
+                                        noise_variance,
+                                        randomizer)
+
+    inf = selective_map_credible_fs(y, grad_map, prior_variance)
+
+    samples = inf.posterior_samples()
+
+    adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
+
+    selective_mean = np.mean(samples, axis=0)
+
+    projection_active = X[:, active].dot(np.linalg.inv(X[:, active].T.dot(X[:, active])))
+    M_1 = prior_variance * (X.dot(X.T)) + noise_variance * np.identity(n)
+    M_2 = prior_variance * ((X.dot(X.T)).dot(projection_active))
+    M_3 = prior_variance * (projection_active.T.dot(X.dot(X.T)).dot(projection_active))
+    post_mean = M_2.T.dot(np.linalg.inv(M_1)).dot(y)
+
+    print("observed data", post_mean)
+
+    post_var = M_3 - M_2.T.dot(np.linalg.inv(M_1)).dot(M_2)
+
+    unadjusted_intervals = np.vstack([post_mean - 1.65 * (np.sqrt(post_var.diagonal())),
+                                      post_mean + 1.65 * (np.sqrt(post_var.diagonal()))])
+
+    coverage_ad = np.zeros(1)
+    coverage_unad = np.zeros(1)
+    ad_length = np.zeros(1)
+    unad_length = np.zeros(1)
+
+    true_val = projection_active.T.dot(X.dot(beta))
+
+
+    if (adjusted_intervals[0, 0] <= true_val[0]) and (true_val[0] <= adjusted_intervals[1, 0]):
+        coverage_ad[0] += 1
+
+    ad_length[0] = adjusted_intervals[1, 0] - adjusted_intervals[0, 0]
+    if (unadjusted_intervals[0, 0] <= true_val[0]) and (true_val[0] <= unadjusted_intervals[1, 0]):
+        coverage_unad[0] += 1
+
+    unad_length[0] = unadjusted_intervals[1, 0] - unadjusted_intervals[0, 0]
+
+    sel_cov = coverage_ad.sum() / 1.
+    naive_cov = coverage_unad.sum() / 1.
+    ad_len = ad_length.sum() / 1.
+    unad_len = unad_length.sum() / 1.
+    risk_ad = np.power(selective_mean - true_val, 2.).sum() / 1.
+    risk_unad = np.power(post_mean - true_val, 2.).sum() / 1.
+
+    return np.vstack([sel_cov, naive_cov, ad_len, unad_len, risk_ad, risk_unad])
+
+def test_fs():
+    n = 50
+    p = 300
+    s = 10
+    snr = 7.
+
+    sample = instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
+
+    ad_cov = 0.
+    unad_cov = 0.
+    ad_len = 0.
+    unad_len = 0.
+
+    X, y, beta, nonzero, sigma = sample.generate_response()
+
+    fs = randomized_forward_step(X,
+                                 y,
+                                 beta,
+                                 sigma)
+
+    ad_cov += fs[0, 0]
+    unad_cov += fs[1, 0]
+    ad_len += fs[2, 0]
+    unad_len += fs[3, 0]
+    print("\n")
+    print("adjusted and unadjusted coverage", ad_cov, unad_cov)
+    print("\n")
+    print("adjusted and unadjusted lengths", ad_len, unad_len)
diff --git a/selection/reduced_optimization/tests/test_reduced_lasso.py b/selection/reduced_optimization/tests/test_reduced_lasso.py
index 6262d87e9..ee3e76cf1 100644
--- a/selection/reduced_optimization/tests/test_reduced_lasso.py
+++ b/selection/reduced_optimization/tests/test_reduced_lasso.py
@@ -96,10 +96,10 @@ def randomized_lasso_trial(X,
         naive_cov = coverage_unad.sum() / nactive
         ad_len = ad_length.sum() / nactive
         unad_len = unad_length.sum() / nactive
-        bayes_risk_ad = np.power(selective_mean - true_val, 2.).sum() / nactive
-        bayes_risk_unad = np.power(post_mean - true_val, 2.).sum() / nactive
+        risk_ad = np.power(selective_mean - true_val, 2.).sum() / nactive
+        risk_unad = np.power(post_mean - true_val, 2.).sum() / nactive
 
-        return np.vstack([sel_cov, naive_cov, ad_len, unad_len, bayes_risk_ad, bayes_risk_unad])
+        return np.vstack([sel_cov, naive_cov, ad_len, unad_len, risk_ad, risk_unad])
 
     else:
         return None
@@ -133,6 +133,6 @@ def test_reduced_lasso():
         ad_len += lasso[2, 0]
         unad_len += lasso[3, 0]
         print("\n")
-        print("\n")
         print("adjusted and unadjusted coverage", ad_cov, unad_cov)
+        print("\n")
         print("adjusted and unadjusted lengths", ad_len, unad_len)

From 97175df4b85c896be8e7bdf867c607d3b96c053a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:11:37 -0700
Subject: [PATCH 131/617] cleaned up unnecessary files

---
 .../tests/test_fs_bayesian.py                 | 168 ------------------
 .../{test_reduced_lasso.py => test_lasso.py}  |   0
 .../tests/test_selection_random_lasso.py      |  57 ------
 3 files changed, 225 deletions(-)
 delete mode 100644 selection/reduced_optimization/tests/test_fs_bayesian.py
 rename selection/reduced_optimization/tests/{test_reduced_lasso.py => test_lasso.py} (100%)
 delete mode 100644 selection/reduced_optimization/tests/test_selection_random_lasso.py

diff --git a/selection/reduced_optimization/tests/test_fs_bayesian.py b/selection/reduced_optimization/tests/test_fs_bayesian.py
deleted file mode 100644
index 1359e6af2..000000000
--- a/selection/reduced_optimization/tests/test_fs_bayesian.py
+++ /dev/null
@@ -1,168 +0,0 @@
-from __future__ import print_function
-import time
-import sys
-import os
-
-import numpy as np
-from selection.reduced_optimization.initial_soln import selection, instance
-from selection.reduced_optimization.forward_stepwise_reduced import neg_log_cube_probability_fs, \
-    selection_probability_objective_fs, sel_prob_gradient_map_fs, selective_map_credible_fs
-
-class generate_data():
-
-    def __init__(self, n, p, sigma=1., rho=0., scale =True, center=True):
-         (self.n, self.p, self.sigma, self.rho) = (n, p, sigma, rho)
-
-         self.X = (np.sqrt(1 - self.rho) * np.random.standard_normal((self.n, self.p)) +
-                   np.sqrt(self.rho) * np.random.standard_normal(self.n)[:, None])
-         if center:
-             self.X -= self.X.mean(0)[None, :]
-         if scale:
-             self.X /= (self.X.std(0)[None, :] * np.sqrt(self.n))
-
-         beta_true = np.zeros(p)
-         u = np.random.uniform(0.,1.,p)
-         for i in range(p):
-             if u[i]<= 0.9:
-                 beta_true[i] = np.random.laplace(loc=0., scale=0.1)
-             else:
-                 beta_true[i] = np.random.laplace(loc=0., scale=1.)
-
-         self.beta = beta_true
-
-    def generate_response(self):
-
-        Y = (self.X.dot(self.beta) + np.random.standard_normal(self.n)) * self.sigma
-
-        return self.X, Y, self.beta * self.sigma, self.sigma
-
-def randomized_forward_step(X,
-                            y,
-                            beta,
-                            sigma):
-    from selection.api import randomization
-
-    n, p = X.shape
-
-    random_Z = np.random.standard_normal(p)
-    Z_stats = X.T.dot(y)
-    random_obs = X.T.dot(y) + random_Z
-
-    active_index = np.argmax(np.fabs(random_obs))
-    active = np.zeros(p, bool)
-    active[active_index] = 1
-    active_sign = np.sign(random_obs[active_index])
-    print("observed statistic", random_obs[active_index], Z_stats[active_index])
-    print("first step--chosen index and sign", active_index, active_sign)
-
-    feasible_point = np.fabs(random_obs[active_index])
-
-    noise_variance = sigma ** 2
-
-    randomizer = randomization.isotropic_gaussian((p,), 1.)
-
-    generative_X = X[:, active]
-    prior_variance = 1000.
-
-    grad_map = sel_prob_gradient_map_fs(X,
-                                        feasible_point,
-                                        active,
-                                        active_sign,
-                                        generative_X,
-                                        noise_variance,
-                                        randomizer)
-
-    inf = selective_map_credible_fs(y, grad_map, prior_variance)
-
-    samples = inf.posterior_samples()
-
-    adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
-    selective_mean = np.mean(samples, axis=0)
-
-    projection_active = X[:, active].dot(np.linalg.inv(X[:, active].T.dot(X[:, active])))
-    M_1 = prior_variance * (X.dot(X.T)) + noise_variance * np.identity(n)
-    M_2 = prior_variance * ((X.dot(X.T)).dot(projection_active))
-    M_3 = prior_variance * (projection_active.T.dot(X.dot(X.T)).dot(projection_active))
-    post_mean = M_2.T.dot(np.linalg.inv(M_1)).dot(y)
-
-    print("observed data", post_mean)
-
-    post_var = M_3 - M_2.T.dot(np.linalg.inv(M_1)).dot(M_2)
-
-    unadjusted_intervals = np.vstack([post_mean - 1.65 * (np.sqrt(post_var.diagonal())),
-                                      post_mean + 1.65 * (np.sqrt(post_var.diagonal()))])
-
-    coverage_ad = np.zeros(1)
-    coverage_unad = np.zeros(1)
-    ad_length = np.zeros(1)
-    unad_length = np.zeros(1)
-
-    true_val = projection_active.T.dot(X.dot(beta))
-
-
-    if (adjusted_intervals[0, 0] <= true_val[0]) and (true_val[0] <= adjusted_intervals[1, 0]):
-        coverage_ad[0] += 1
-
-    ad_length[0] = adjusted_intervals[1, 0] - adjusted_intervals[0, 0]
-    if (unadjusted_intervals[0, 0] <= true_val[0]) and (true_val[0] <= unadjusted_intervals[1, 0]):
-        coverage_unad[0] += 1
-
-    unad_length[0] = unadjusted_intervals[1, 0] - unadjusted_intervals[0, 0]
-
-    sel_cov = coverage_ad.sum() / 1.
-    naive_cov = coverage_unad.sum() / 1.
-    ad_len = ad_length.sum() / 1.
-    unad_len = unad_length.sum() / 1.
-    bayes_risk_ad = np.power(selective_mean - true_val, 2.).sum() / 1.
-    bayes_risk_unad = np.power(post_mean - true_val, 2.).sum() / 1.
-
-    return np.vstack([sel_cov, naive_cov, ad_len, unad_len, bayes_risk_ad, bayes_risk_unad])
-
-def test_FS():
-
-    n = 200
-    p = 1000
-    s = 0
-    snr = 5.
-
-    niter = 50
-    ad_cov = 0.
-    unad_cov = 0.
-    ad_len = 0.
-    unad_len = 0.
-    ad_risk = 0.
-    unad_risk = 0.
-
-    ### GENERATE X
-    np.random.seed(0)  # ensures same X
-
-    sample = generate_data(n, p)
-
-    ### GENERATE Y BASED ON SEED
-    for i in range(niter):
-        np.random.seed(i) # ensures different y
-        X, y, beta, sigma = sample.generate_response()
-        lasso = randomized_forward_step(X,
-                                        y,
-                                        beta,
-                                        sigma)
-
-        ad_cov += lasso[0, 0]
-        unad_cov += lasso[1, 0]
-        ad_len += lasso[2, 0]
-        unad_len += lasso[3, 0]
-        ad_risk += lasso[4, 0]
-        unad_risk += lasso[5, 0]
-
-        print("\n")
-        print("iteration completed", i)
-        print("\n")
-        print("adjusted and unadjusted coverage", ad_cov, unad_cov)
-        print("adjusted and unadjusted lengths", ad_len, unad_len)
-        print("adjusted and unadjusted risks", ad_risk, unad_risk)
-
-    print("adjusted and unadjusted coverage", ad_cov, unad_cov)
-    print("adjusted and unadjusted lengths", ad_len, unad_len)
-    print("adjusted and unadjusted risks", ad_risk, unad_risk)
-
-    #np.savetxt(outfile, lasso)
diff --git a/selection/reduced_optimization/tests/test_reduced_lasso.py b/selection/reduced_optimization/tests/test_lasso.py
similarity index 100%
rename from selection/reduced_optimization/tests/test_reduced_lasso.py
rename to selection/reduced_optimization/tests/test_lasso.py
diff --git a/selection/reduced_optimization/tests/test_selection_random_lasso.py b/selection/reduced_optimization/tests/test_selection_random_lasso.py
deleted file mode 100644
index bba9eab78..000000000
--- a/selection/reduced_optimization/tests/test_selection_random_lasso.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from __future__ import print_function
-import numpy as np
-import time
-import regreg.api as rr
-from selection.reduced_optimization.initial_soln import selection
-from selection.tests.instance import logistic_instance, gaussian_instance
-
-from ..par_random_lasso_reduced import (selection_probability_random_lasso, 
-                                        sel_inf_random_lasso)
-from ..estimator import M_estimator_approx
-from selection.api import randomization
-
-def test_selection():
-    n = 500
-    p = 100
-    s = 0
-    signal = 0.
-
-    np.random.seed(3)  # ensures different y
-    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, signal=signal)
-    lam = 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-    n, p = X.shape
-
-    loss = rr.glm.gaussian(X, y)
-    epsilon = 1. / np.sqrt(n)
-
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p),weights=dict(zip(np.arange(p), W)), lagrange=1.)
-    randomizer = randomization.isotropic_gaussian((p,), scale=1.)
-
-    M_est = M_estimator_approx(loss, epsilon, penalty, randomizer, 'gaussian', 'parametric')
-    M_est.solve_approx()
-    active = M_est._overall
-    active_set = np.asarray([i for i in range(p) if active[i]])
-    nactive = np.sum(active)
-
-    prior_variance = 1000.
-    noise_variance = sigma ** 2
-
-    generative_mean = np.zeros(p)
-    generative_mean[:nactive] = M_est.initial_soln[active]
-    sel_split = selection_probability_random_lasso(M_est, generative_mean)
-    min = sel_split.minimize2(nstep=200)
-    print(min[0], min[1])
-
-    test_point = np.append(M_est.observed_score_state, np.abs(M_est.initial_soln[M_est._overall]))
-    print("value of likelihood", sel_split.likelihood_loss.smooth_objective(test_point, mode= "func"))
-
-    inv_cov = np.linalg.inv(M_est.score_cov)
-    lik = (M_est.observed_score_state-generative_mean).T.dot(inv_cov).dot(M_est.observed_score_state-generative_mean)/2.
-    print("value of likelihood check", lik)
-    grad = inv_cov.dot(M_est.observed_score_state-generative_mean)
-    print("grad at likelihood loss", grad)
-
-
-

From f914c485d872e6daa6e03de7c3353f07e4bc16bd Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:15:16 -0700
Subject: [PATCH 132/617] changed snr to signal in instance for consistency

---
 selection/reduced_optimization/initial_soln.py     | 12 ++++++------
 selection/reduced_optimization/tests/test_fs.py    |  2 +-
 selection/reduced_optimization/tests/test_lasso.py |  4 +++-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/selection/reduced_optimization/initial_soln.py b/selection/reduced_optimization/initial_soln.py
index 813b2f0bd..4eedf4b64 100644
--- a/selection/reduced_optimization/initial_soln.py
+++ b/selection/reduced_optimization/initial_soln.py
@@ -33,14 +33,14 @@ def selection(X, y, random_Z, randomization_scale=1, sigma=None, method="theoret
 
 class instance(object):
 
-    def __init__(self, n, p, s, snr=5, sigma=1., rho=0, random_signs=False, scale =True, center=True):
+    def __init__(self, n, p, s, signal=5, sigma=1., rho=0, random_signs=False, scale =True, center=True):
          (self.n, self.p, self.s,
-         self.snr,
+         self.signal,
          self.sigma,
          self.rho) = (n, p, s,
-                     snr,
-                     sigma,
-                     rho)
+                      signal,
+                      sigma,
+                      rho)
 
          self.X = (np.sqrt(1 - self.rho) * np.random.standard_normal((self.n, self.p)) +
               np.sqrt(self.rho) * np.random.standard_normal(self.n)[:, None])
@@ -50,7 +50,7 @@ def __init__(self, n, p, s, snr=5, sigma=1., rho=0, random_signs=False, scale =T
              self.X /= (self.X.std(0)[None, :] * np.sqrt(self.n))
 
          self.beta = np.zeros(p)
-         self.beta[:self.s] = self.snr
+         self.beta[:self.s] = self.signal
          if random_signs:
              self.beta[:self.s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
          self.active = np.zeros(p, np.bool)
diff --git a/selection/reduced_optimization/tests/test_fs.py b/selection/reduced_optimization/tests/test_fs.py
index 0c3cb6f02..2a67844b3 100644
--- a/selection/reduced_optimization/tests/test_fs.py
+++ b/selection/reduced_optimization/tests/test_fs.py
@@ -104,7 +104,7 @@ def test_fs():
     s = 10
     snr = 7.
 
-    sample = instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
+    sample = instance(n=n, p=p, s=s, sigma=1., rho=0, signal=snr)
 
     ad_cov = 0.
     unad_cov = 0.
diff --git a/selection/reduced_optimization/tests/test_lasso.py b/selection/reduced_optimization/tests/test_lasso.py
index ee3e76cf1..74a0080c7 100644
--- a/selection/reduced_optimization/tests/test_lasso.py
+++ b/selection/reduced_optimization/tests/test_lasso.py
@@ -112,7 +112,7 @@ def test_reduced_lasso():
     s = 10
     snr = 7.
 
-    sample = instance(n=n, p=p, s=s, sigma=1., rho=0, snr=snr)
+    sample = instance(n=n, p=p, s=s, sigma=1., rho=0, signal=snr)
 
     ad_cov = 0.
     unad_cov = 0.
@@ -136,3 +136,5 @@ def test_reduced_lasso():
         print("adjusted and unadjusted coverage", ad_cov, unad_cov)
         print("\n")
         print("adjusted and unadjusted lengths", ad_len, unad_len)
+
+test_reduced_lasso()
\ No newline at end of file

From 52d15a2adcebabc00ad0479e3d43f29e490f97de Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:29:00 -0700
Subject: [PATCH 133/617] added inference post lasso using dual opt

---
 .../tests/test_dual_lasso.py                  | 138 ++++++++++++++++++
 .../reduced_optimization/tests/test_lasso.py  |   4 +-
 2 files changed, 139 insertions(+), 3 deletions(-)
 create mode 100644 selection/reduced_optimization/tests/test_dual_lasso.py

diff --git a/selection/reduced_optimization/tests/test_dual_lasso.py b/selection/reduced_optimization/tests/test_dual_lasso.py
new file mode 100644
index 000000000..93f08c944
--- /dev/null
+++ b/selection/reduced_optimization/tests/test_dual_lasso.py
@@ -0,0 +1,138 @@
+from __future__ import print_function
+import numpy as np
+
+from selection.api import randomization
+from selection.reduced_optimization.initial_soln import selection, instance
+
+from selection.reduced_optimization.dual_lasso import (selection_probability_lasso_dual,
+                                                       sel_prob_gradient_map_lasso,
+                                                       selective_inf_lasso)
+
+from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from selection.tests.decorators import (set_sampling_params_iftrue,
+                                        set_seed_iftrue)
+
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+
+def randomized_lasso_trial(X,
+                           y,
+                           beta,
+                           sigma):
+
+    from selection.api import randomization
+
+    n, p = X.shape
+
+    random_Z = np.random.standard_normal(p)
+    sel = selection(X, y, random_Z)
+    lam, epsilon, active, betaE, cube, initial_soln = sel
+
+    if sel is not None:
+
+        lagrange = lam * np.ones(p)
+        active_sign = np.sign(betaE)
+        nactive = active.sum()
+        print("number of selected variables by Lasso", nactive)
+
+        feasible_point = np.ones(p)
+        feasible_point[:nactive] = -np.fabs(betaE)
+
+        noise_variance = sigma ** 2
+
+        randomizer = randomization.isotropic_gaussian((p,), 1.)
+
+        generative_X = X[:, active]
+        prior_variance = 1000.
+
+        grad_map = sel_prob_gradient_map_lasso(X,
+                                               feasible_point,
+                                               active,
+                                               active_sign,
+                                               lagrange,
+                                               generative_X,
+                                               noise_variance,
+                                               randomizer,
+                                               epsilon)
+
+        inf = selective_inf_lasso(y, grad_map, prior_variance)
+
+        samples = inf.posterior_samples()
+
+        adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
+
+        selective_mean = np.mean(samples, axis=0)
+
+        projection_active = X[:, active].dot(np.linalg.inv(X[:, active].T.dot(X[:, active])))
+        M_1 = prior_variance * (X.dot(X.T)) + noise_variance * np.identity(n)
+        M_2 = prior_variance * ((X.dot(X.T)).dot(projection_active))
+        M_3 = prior_variance * (projection_active.T.dot(X.dot(X.T)).dot(projection_active))
+        post_mean = M_2.T.dot(np.linalg.inv(M_1)).dot(y)
+
+        print("observed data", post_mean)
+
+        post_var = M_3 - M_2.T.dot(np.linalg.inv(M_1)).dot(M_2)
+
+        unadjusted_intervals = np.vstack([post_mean - 1.65 * (np.sqrt(post_var.diagonal())),
+                                          post_mean + 1.65 * (np.sqrt(post_var.diagonal()))])
+
+        coverage_ad = np.zeros(nactive)
+        coverage_unad = np.zeros(nactive)
+        ad_length = np.zeros(nactive)
+        unad_length = np.zeros(nactive)
+
+        true_val = projection_active.T.dot(X.dot(beta))
+
+        for l in range(nactive):
+            if (adjusted_intervals[0, l] <= true_val[l]) and (true_val[l] <= adjusted_intervals[1, l]):
+                coverage_ad[l] += 1
+            ad_length[l] = adjusted_intervals[1, l] - adjusted_intervals[0, l]
+            if (unadjusted_intervals[0, l] <= true_val[l]) and (true_val[l] <= unadjusted_intervals[1, l]):
+                coverage_unad[l] += 1
+            unad_length[l] = unadjusted_intervals[1, l] - unadjusted_intervals[0, l]
+
+
+        sel_cov = coverage_ad.sum() / nactive
+        naive_cov = coverage_unad.sum() / nactive
+        ad_len = ad_length.sum() / nactive
+        unad_len = unad_length.sum() / nactive
+        risk_ad = np.power(selective_mean - true_val, 2.).sum() / nactive
+        risk_unad = np.power(post_mean - true_val, 2.).sum() / nactive
+
+        return np.vstack([sel_cov, naive_cov, ad_len, unad_len, risk_ad, risk_unad])
+
+    else:
+        return None
+
+def test_dual_lasso():
+    ### set parameters
+    n = 300
+    p = 100
+    s = 10
+    snr = 7.
+
+    sample = instance(n=n, p=p, s=s, sigma=1., rho=0, signal=snr)
+
+    ad_cov = 0.
+    unad_cov = 0.
+    ad_len = 0.
+    unad_len = 0.
+
+    X, y, beta, nonzero, sigma = sample.generate_response()
+
+    ### RUN LASSO AND TEST
+    lasso = randomized_lasso_trial(X,
+                                   y,
+                                   beta,
+                                   sigma)
+
+    if lasso is not None:
+        ad_cov += lasso[0,0]
+        unad_cov += lasso[1,0]
+        ad_len += lasso[2, 0]
+        unad_len += lasso[3, 0]
+        print("\n")
+        print("adjusted and unadjusted coverage", ad_cov, unad_cov)
+        print("\n")
+        print("adjusted and unadjusted lengths", ad_len, unad_len)
+
diff --git a/selection/reduced_optimization/tests/test_lasso.py b/selection/reduced_optimization/tests/test_lasso.py
index 74a0080c7..733bee7b9 100644
--- a/selection/reduced_optimization/tests/test_lasso.py
+++ b/selection/reduced_optimization/tests/test_lasso.py
@@ -105,7 +105,7 @@ def randomized_lasso_trial(X,
         return None
 
 
-def test_reduced_lasso():
+def test_lasso():
     ### set parameters
     n = 50
     p = 300
@@ -136,5 +136,3 @@ def test_reduced_lasso():
         print("adjusted and unadjusted coverage", ad_cov, unad_cov)
         print("\n")
         print("adjusted and unadjusted lengths", ad_len, unad_len)
-
-test_reduced_lasso()
\ No newline at end of file

From 278fc317d77f703d35d3b4363de9c5fc559633ea Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:38:58 -0700
Subject: [PATCH 134/617] added ms_lasso 2 stage screening

---
 .../tests/test_ms_lasso_2stage.py             | 154 ++++++++++++++++++
 1 file changed, 154 insertions(+)
 create mode 100644 selection/reduced_optimization/tests/test_ms_lasso_2stage.py

diff --git a/selection/reduced_optimization/tests/test_ms_lasso_2stage.py b/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
new file mode 100644
index 000000000..b39fa2324
--- /dev/null
+++ b/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
@@ -0,0 +1,154 @@
+from __future__ import print_function
+import numpy as np
+
+from selection.api import randomization
+from selection.reduced_optimization.initial_soln import selection, instance
+
+from selection.reduced_optimization.ms_lasso_2stage_reduced import (selection_probability_objective_ms_lasso,
+                                                                    sel_prob_gradient_map_ms_lasso,
+                                                                    selective_map_credible_ms_lasso)
+
+from selection.tests.flags import SMALL_SAMPLES, SET_SEED
+from selection.tests.decorators import (set_sampling_params_iftrue,
+                                        set_seed_iftrue)
+
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+
+def randomized_marginal_lasso_screening(X,
+                                        y,
+                                        beta,
+                                        sigma):
+
+    from selection.api import randomization
+
+    n, p = X.shape
+
+    random_Z = np.random.standard_normal(p)
+    Z_stats = X.T.dot(y)
+    randomized_Z_stats = np.true_divide(Z_stats, sigma) + random_Z
+
+    active_1 = np.zeros(p, bool)
+    active_1[np.fabs(randomized_Z_stats) > 2.33] = 1
+    active_signs_1 = np.sign(randomized_Z_stats[active_1])
+    nactive_1 = active_1.sum()
+    threshold = 2.33 * np.ones(p)
+
+    #print("active_1", active_1, nactive_1)
+
+    X_step2 = X[:, active_1]
+    random_Z_2 = np.random.standard_normal(nactive_1)
+    sel = selection(X_step2, y, random_Z_2)
+    lam, epsilon, active_2, betaE, cube, initial_soln = sel
+    noise_variance = 1.
+    lagrange = lam * np.ones(nactive_1)
+    nactive_2 = betaE.shape[0]
+    #print("active_2", active_2, nactive_2)
+    active_signs_2 = np.sign(betaE)
+
+    # getting the active indices
+    active = np.zeros(p, bool)
+    indices_stage2 = np.where(active_1 == 1)[0]
+    active[indices_stage2[active_2]] = 1
+    nactive = active.sum()
+    print("the active indices after two stages of screening", active.sum())
+
+    primal_feasible_1 = np.fabs(randomized_Z_stats[active_1])
+    primal_feasible_2 = np.fabs(betaE)
+    feasible_point = np.append(primal_feasible_1, primal_feasible_2)
+
+    randomizer = randomization.isotropic_gaussian((p,), 1.)
+
+    generative_X = X_step2[:, active_2]
+    prior_variance = 1000.
+
+    projection_active = X[:, active].dot(np.linalg.inv(X[:, active].T.dot(X[:, active])))
+    M_1 = prior_variance * (X.dot(X.T)) + noise_variance * np.identity(n)
+    M_2 = prior_variance * ((X.dot(X.T)).dot(projection_active))
+    M_3 = prior_variance * (projection_active.T.dot(X.dot(X.T)).dot(projection_active))
+    post_mean = M_2.T.dot(np.linalg.inv(M_1)).dot(y)
+
+    #print("observed data", post_mean)
+
+    post_var = M_3 - M_2.T.dot(np.linalg.inv(M_1)).dot(M_2)
+
+    unadjusted_intervals = np.vstack([post_mean - 1.65 * (np.sqrt(post_var.diagonal())),
+                                      post_mean + 1.65 * (np.sqrt(post_var.diagonal()))])
+
+    grad_map = sel_prob_gradient_map_ms_lasso(X,
+                                              feasible_point,  # in R^{|E|_1 + |E|_2}
+                                              active_1,  # the active set chosen by randomized marginal screening
+                                              active_2,  # the active set chosen by randomized lasso
+                                              active_signs_1,  # the set of signs of active coordinates chosen by ms
+                                              active_signs_2,  # the set of signs of active coordinates chosen by lasso
+                                              lagrange,  # in R^p
+                                              threshold,  # in R^p
+                                              generative_X,  # in R^{p}\times R^{n}
+                                              noise_variance,
+                                              randomizer,
+                                              epsilon)
+
+    ms = selective_map_credible_ms_lasso(y,
+                                         grad_map,
+                                         prior_variance)
+
+    samples = ms.posterior_samples()
+
+    adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
+
+    selective_mean = np.mean(samples, axis=0)
+
+    coverage_ad = np.zeros(nactive)
+    coverage_unad = np.zeros(nactive)
+    ad_length = np.zeros(nactive)
+    unad_length = np.zeros(nactive)
+
+    true_val = projection_active.T.dot(X.dot(beta))
+
+    for l in range(nactive):
+        if (adjusted_intervals[0, l] <= true_val[l]) and (true_val[l] <= adjusted_intervals[1, l]):
+            coverage_ad[l] += 1
+        ad_length[l] = adjusted_intervals[1, l] - adjusted_intervals[0, l]
+        if (unadjusted_intervals[0, l] <= true_val[l]) and (true_val[l] <= unadjusted_intervals[1, l]):
+            coverage_unad[l] += 1
+        unad_length[l] = unadjusted_intervals[1, l] - unadjusted_intervals[0, l]
+
+    sel_cov = coverage_ad.sum() / nactive
+    naive_cov = coverage_unad.sum() / nactive
+    ad_len = ad_length.sum() / nactive
+    unad_len = unad_length.sum() / nactive
+    risk_ad = np.power(selective_mean - true_val, 2.).sum() / nactive
+    risk_unad = np.power(post_mean - true_val, 2.).sum() / nactive
+
+    return np.vstack([sel_cov, naive_cov, ad_len, unad_len, risk_ad, risk_unad])
+
+def test_ms_lasso():
+    n = 500
+    p = 100
+    s = 10
+    snr = 7.
+
+    sample = instance(n=n, p=p, s=s, sigma=1., rho=0, signal=snr)
+
+    ad_cov = 0.
+    unad_cov = 0.
+    ad_len = 0.
+    unad_len = 0.
+
+    X, y, beta, nonzero, sigma = sample.generate_response()
+
+    ms_lasso = randomized_marginal_lasso_screening(X,
+                                                   y,
+                                                   beta,
+                                                   sigma)
+
+    ad_cov += ms_lasso[0, 0]
+    unad_cov += ms_lasso[1, 0]
+    ad_len += ms_lasso[2, 0]
+    unad_len += ms_lasso[3, 0]
+
+    print("\n")
+    print("adjusted and unadjusted coverage", ad_cov, unad_cov)
+    print("\n")
+    print("adjusted and unadjusted lengths", ad_len, unad_len)
+

From c3f54eec505d0c7493e2a22c8af7c2bb328c118a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:39:56 -0700
Subject: [PATCH 135/617] removed unnecessary import

---
 selection/reduced_optimization/tests/test_ms_lasso_2stage.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/selection/reduced_optimization/tests/test_ms_lasso_2stage.py b/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
index b39fa2324..a6681d2fd 100644
--- a/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
+++ b/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
@@ -20,8 +20,6 @@ def randomized_marginal_lasso_screening(X,
                                         beta,
                                         sigma):
 
-    from selection.api import randomization
-
     n, p = X.shape
 
     random_Z = np.random.standard_normal(p)

From 46a551923a45508e2e570c21f7183a89eeb26c05 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:41:33 -0700
Subject: [PATCH 136/617] added Bayesian generative model to intial_soln

---
 .../reduced_optimization/initial_soln.py      | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/selection/reduced_optimization/initial_soln.py b/selection/reduced_optimization/initial_soln.py
index 4eedf4b64..b015957e9 100644
--- a/selection/reduced_optimization/initial_soln.py
+++ b/selection/reduced_optimization/initial_soln.py
@@ -64,3 +64,31 @@ def generate_response(self):
         Y = (self.X.dot(self.beta) + self._noise()) * self.sigma
         return self.X, Y, self.beta * self.sigma, np.nonzero(self.active)[0], self.sigma
 
+
+class generate_data_bayesian():
+
+    def __init__(self, n, p, sigma=1., rho=0., scale =True, center=True):
+         (self.n, self.p, self.sigma, self.rho) = (n, p, sigma, rho)
+
+         self.X = (np.sqrt(1 - self.rho) * np.random.standard_normal((self.n, self.p)) +
+                   np.sqrt(self.rho) * np.random.standard_normal(self.n)[:, None])
+         if center:
+             self.X -= self.X.mean(0)[None, :]
+         if scale:
+             self.X /= (self.X.std(0)[None, :] * np.sqrt(self.n))
+
+         beta_true = np.zeros(p)
+         u = np.random.uniform(0.,1.,p)
+         for i in range(p):
+             if u[i]<= 0.95:
+                 beta_true[i] = np.random.laplace(loc=0., scale= 0.05)
+             else:
+                 beta_true[i] = np.random.laplace(loc=0., scale= 0.5)
+
+         self.beta = beta_true
+
+    def generate_response(self):
+
+        Y = (self.X.dot(self.beta) + np.random.standard_normal(self.n)) * self.sigma
+
+        return self.X, Y, self.beta * self.sigma, self.sigma

From 2ab55d827536e2a0c91f036ea139c609438abab4 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:43:16 -0700
Subject: [PATCH 137/617] added mixed model regime to intial_soln

---
 .../reduced_optimization/initial_soln.py      | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/selection/reduced_optimization/initial_soln.py b/selection/reduced_optimization/initial_soln.py
index b015957e9..e0541ab60 100644
--- a/selection/reduced_optimization/initial_soln.py
+++ b/selection/reduced_optimization/initial_soln.py
@@ -92,3 +92,34 @@ def generate_response(self):
         Y = (self.X.dot(self.beta) + np.random.standard_normal(self.n)) * self.sigma
 
         return self.X, Y, self.beta * self.sigma, self.sigma
+
+class instance_mixed(object):
+
+    def __init__(self, n, p, s, sigma=1., rho=0, random_signs=False, scale =True, center=True):
+         (self.n, self.p, self.s,
+         self.sigma,
+         self.rho) = (n, p, s,
+                     sigma,
+                     rho)
+
+         self.X = (np.sqrt(1 - self.rho) * np.random.standard_normal((self.n, self.p)) +
+              np.sqrt(self.rho) * np.random.standard_normal(self.n)[:, None])
+         if center:
+             self.X -= self.X.mean(0)[None, :]
+         if scale:
+             self.X /= (self.X.std(0)[None, :] * np.sqrt(self.n))
+
+         self.beta = np.zeros(p)
+         self.beta[:self.s] = np.linspace(0.5, 5.0, num=s)
+         if random_signs:
+             self.beta[:self.s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
+         self.active = np.zeros(p, np.bool)
+         self.active[:self.s] = True
+
+    def _noise(self):
+        return np.random.standard_normal(self.n)
+
+    def generate_response(self):
+
+        Y = (self.X.dot(self.beta) + self._noise()) * self.sigma
+        return self.X, Y, self.beta * self.sigma, np.nonzero(self.active)[0], self.sigma
\ No newline at end of file

From 0ed756ba9c6e6dedfba66cc3ee7f076b6aed3ca8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 11:45:36 -0700
Subject: [PATCH 138/617] removed redundant file

---
 .../reduced_optimization/generative_model.py  | 53 -------------------
 1 file changed, 53 deletions(-)
 delete mode 100644 selection/reduced_optimization/generative_model.py

diff --git a/selection/reduced_optimization/generative_model.py b/selection/reduced_optimization/generative_model.py
deleted file mode 100644
index bb8087fce..000000000
--- a/selection/reduced_optimization/generative_model.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import numpy as np
-
-class generate_data():
-
-    def __init__(self, n, p, sigma=1., rho=0., scale =True, center=True):
-         (self.n, self.p, self.sigma, self.rho) = (n, p, sigma, rho)
-
-         self.X = (np.sqrt(1 - self.rho) * np.random.standard_normal((self.n, self.p)) +
-                   np.sqrt(self.rho) * np.random.standard_normal(self.n)[:, None])
-         if center:
-             self.X -= self.X.mean(0)[None, :]
-         if scale:
-             self.X /= (self.X.std(0)[None, :] * np.sqrt(self.n))
-
-         beta_true = np.zeros(p)
-         u = np.random.uniform(0.,1.,p)
-         for i in range(p):
-             if u[i]<= 0.9:
-                 beta_true[i] = np.random.laplace(loc=0., scale=0.1)
-             else:
-                 beta_true[i] = np.random.laplace(loc=0., scale=1.)
-
-         self.beta = beta_true
-
-    def generate_response(self):
-
-        Y = (self.X.dot(self.beta) + np.random.standard_normal(self.n)) * self.sigma
-
-        return self.X, Y, self.beta * self.sigma, self.sigma
-
-def generate_data_random(n, p, sigma=1., rho=0., scale =True, center=True):
-
-    X = (np.sqrt(1 - rho) * np.random.standard_normal((n, p)) + np.sqrt(rho) * np.random.standard_normal(n)[:, None])
-
-    if center:
-        X -= X.mean(0)[None, :]
-    if scale:
-        X /= (X.std(0)[None, :] * np.sqrt(n))
-
-    beta_true = np.zeros(p)
-    u = np.random.uniform(0., 1., p)
-    for i in range(p):
-        if u[i] <= 0.9:
-            beta_true[i] = np.random.laplace(loc=0., scale=0.1)
-        else:
-            beta_true[i] = np.random.laplace(loc=0., scale=1.)
-
-    beta = beta_true
-
-    Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma
-
-    return X, Y, beta * sigma, sigma
-

From 905d02fcef45b41656ed7e5a465f162ddcffc553 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 12:42:18 -0700
Subject: [PATCH 139/617] final check

---
 selection/reduced_optimization/tests/test_lasso.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/selection/reduced_optimization/tests/test_lasso.py b/selection/reduced_optimization/tests/test_lasso.py
index 733bee7b9..03c73a168 100644
--- a/selection/reduced_optimization/tests/test_lasso.py
+++ b/selection/reduced_optimization/tests/test_lasso.py
@@ -136,3 +136,5 @@ def test_lasso():
         print("adjusted and unadjusted coverage", ad_cov, unad_cov)
         print("\n")
         print("adjusted and unadjusted lengths", ad_len, unad_len)
+
+test_lasso()
\ No newline at end of file

From 2f960f2e1fccdcec415c32d2cc712bb8054288c3 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c431.SUNet>
Date: Tue, 29 Aug 2017 12:46:10 -0700
Subject: [PATCH 140/617] cleaned files

---
 .../tests/single_python_run.sbatch            | 40 -------------------
 .../tests/submit_python_jobs.sh               | 18 ---------
 .../reduced_optimization/tests/test_lasso.py  |  2 -
 3 files changed, 60 deletions(-)
 delete mode 100755 selection/reduced_optimization/tests/single_python_run.sbatch
 delete mode 100755 selection/reduced_optimization/tests/submit_python_jobs.sh

diff --git a/selection/reduced_optimization/tests/single_python_run.sbatch b/selection/reduced_optimization/tests/single_python_run.sbatch
deleted file mode 100755
index 837cebc5c..000000000
--- a/selection/reduced_optimization/tests/single_python_run.sbatch
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash 
-#
-#all commands that start with SBATCH contain commands that are just used by SLURM for scheduling
-#################
-#set a job name  
-#SBATCH --job-name=job
-#################  
-#a file for job output, you can check job progress, append the job ID with %j to make it unique
-#SBATCH --output=jobs/%j.out
-#################
-# a file for errors from the job
-#SBATCH --error=jobs/%j.err
-#################
-#time you think you need; default is 2 hours
-#format could be dd-hh:mm:ss, hh:mm:ss, mm:ss, or mm
-#SBATCH --time=30:00:00
-#################
-#SBATCH --qos=normal
-#SBATCH -p normal 
-#################
-#number of nodes you are requesting, the more you ask for the longer you wait
-#SBATCH --nodes=1
-#################
-#SBATCH --mem=4000
-
-# You can use srun if your job is parallel
-#srun R CMD BATCH  ./rtest.R 
-# otherwise: 
-
-
-SEED=$1
-DIR=$2
-
-# cd to program directory
-cd /home/snigdha/src/selective-inference/selection/reduced_optimization/tests
-#cd /Users/snigdhapanigrahi/selective-inference/selection/reduced_optimization/tests
-
-source /home/snigdha/src/selective-inference/.env/bin/activate
-
-python dual_lasso_test.py $SEED $DIR
\ No newline at end of file
diff --git a/selection/reduced_optimization/tests/submit_python_jobs.sh b/selection/reduced_optimization/tests/submit_python_jobs.sh
deleted file mode 100755
index 75dfc4606..000000000
--- a/selection/reduced_optimization/tests/submit_python_jobs.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-# Setup bash job headers
-
-# load local environment
-
-# setup dir if needed
-
-DIR=/scratch/users/snigdha/reduced_opt/outputs/experiment_dual_0
-
-#DIR=/Users/snigdhapanigrahi/scratch
-
-mkdir -p $DIR
-
-for i in {0..50}
-do
-	#bash single_python_run.sbatch $i $DIR
-	sbatch single_python_run.sbatch $i $DIR
-done
\ No newline at end of file
diff --git a/selection/reduced_optimization/tests/test_lasso.py b/selection/reduced_optimization/tests/test_lasso.py
index 03c73a168..733bee7b9 100644
--- a/selection/reduced_optimization/tests/test_lasso.py
+++ b/selection/reduced_optimization/tests/test_lasso.py
@@ -136,5 +136,3 @@ def test_lasso():
         print("adjusted and unadjusted coverage", ad_cov, unad_cov)
         print("\n")
         print("adjusted and unadjusted lengths", ad_len, unad_len)
-
-test_lasso()
\ No newline at end of file

From 99a36b25c72fa112947c6ccbb5cb8d3b351564bf Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 15:48:48 -0700
Subject: [PATCH 141/617] test_carved_lasso with small samples

---
 .../par_carved_reduced.py                     | 15 +++++------
 .../reduced_optimization/tests/__init__.py    |  0
 .../tests/test_carved_lasso.py                | 26 +++++++++++++------
 3 files changed, 24 insertions(+), 17 deletions(-)
 create mode 100644 selection/reduced_optimization/tests/__init__.py

diff --git a/selection/reduced_optimization/par_carved_reduced.py b/selection/reduced_optimization/par_carved_reduced.py
index 7b79e8e01..6d8ddbed4 100644
--- a/selection/reduced_optimization/par_carved_reduced.py
+++ b/selection/reduced_optimization/par_carved_reduced.py
@@ -1,5 +1,4 @@
 import numpy as np
-import sys
 
 import regreg.api as rr
 from .lasso_reduced import nonnegative_softmax_scaled, neg_log_cube_probability
@@ -181,7 +180,6 @@ def __init__(self, solver, prior_variance, coef=1., offset=None, quadratic=None)
         self.prior_variance = prior_variance
 
         initial = self.solver.initial_soln[self.solver._overall]
-        print("initial_state", initial)
 
         rr.smooth_atom.__init__(self,
                                 (self.param_shape,),
@@ -257,7 +255,6 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
             while True:
                 proposal = current - step * newton_step
                 proposed_value = objective(proposal)
-                # print("proposal", proposal)
 
                 if proposed_value <= current_value:
                     break
@@ -279,9 +276,9 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, langevin_steps=1500, burnin=100):
+    def posterior_samples(self, ndraw=1500, burnin=100):
         state = self.initial_state
-        print("here", state.shape)
+
         gradient_map = lambda x: -self.smooth_objective_post(x, 'grad')
         projection_map = lambda x: x
         stepsize = 1. / self.param_shape
@@ -289,13 +286,13 @@ def posterior_samples(self, langevin_steps=1500, burnin=100):
 
         samples = []
 
-        for i in xrange(langevin_steps):
+        for i in xrange(ndraw + burnin):
             sampler.next()
-            samples.append(sampler.state.copy())
-            sys.stderr.write("sample number: " + str(i) + "\n")
+            if i >= burnin:
+                samples.append(sampler.state.copy())
 
         samples = np.array(samples)
-        return samples[burnin:, :]
+        return samples
 
 
 
diff --git a/selection/reduced_optimization/tests/__init__.py b/selection/reduced_optimization/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/reduced_optimization/tests/test_carved_lasso.py b/selection/reduced_optimization/tests/test_carved_lasso.py
index 237fbb685..1edfb5eba 100644
--- a/selection/reduced_optimization/tests/test_carved_lasso.py
+++ b/selection/reduced_optimization/tests/test_carved_lasso.py
@@ -1,18 +1,22 @@
 from __future__ import print_function
 import numpy as np
 import regreg.api as rr
-from selection.tests.instance import logistic_instance, gaussian_instance
 
-from selection.reduced_optimization.par_carved_reduced import selection_probability_carved, sel_inf_carved
+from ...tests.instance import logistic_instance, gaussian_instance
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue 
 
-from selection.reduced_optimization.estimator import M_estimator_approx_carved
+from ..par_carved_reduced import selection_probability_carved, sel_inf_carved
+from ..estimator import M_estimator_approx_carved
 
 def carved_lasso_trial(X,
                        y,
                        beta,
                        sigma,
                        lam,
-                       estimation='parametric'):
+                       estimation='parametric',
+                       ndraw=1000,
+                       burnin=100):
     n, p = X.shape
 
     loss = rr.glm.gaussian(X, y)
@@ -46,7 +50,7 @@ def carved_lasso_trial(X,
         unadjusted_intervals = np.vstack([post_mean - 1.65 * (np.sqrt(post_var.diagonal())),
                                           post_mean + 1.65 * (np.sqrt(post_var.diagonal()))])
         grad_lasso = sel_inf_carved(M_est, prior_variance)
-        samples = grad_lasso.posterior_samples()
+        samples = grad_lasso.posterior_samples(ndraw=ndraw, burnin=burnin)
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
         selective_mean = np.mean(samples, axis=0)
@@ -77,13 +81,17 @@ def carved_lasso_trial(X,
     else:
         return np.vstack([0.,0.,0.,0.,0.,0.])
 
-def test_carved_lasso():
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+def test_carved_lasso(ndraw=1000, burnin=100):
     ### set parameters
     n = 1000
     p = 100
     s = 20
     snr = 7.
 
+    import sys
+    sys.stderr.write(`(ndraw, burnin)`)
+
     ad_cov = 0.
     unad_cov = 0.
     ad_len = 0.
@@ -92,12 +100,14 @@ def test_carved_lasso():
     unad_risk = 0.
 
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, signal=snr)
-    lam = 0.8 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    lam = 0.8 * np.mean(np.fabs(X.T.dot(np.random.standard_normal((n, 2000)))).max(0)) * sigma
     lasso = carved_lasso_trial(X,
                                y,
                                beta,
                                sigma,
-                               lam)
+                               lam,
+                               ndraw=ndraw,
+                               burnin=burnin)
 
 
     if lasso is not None:

From 1b3fb8edc9f729fab0aa7b2a0464bfecc7bf2409 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 15:54:08 -0700
Subject: [PATCH 142/617] test_dual_lasso with small samples

---
 selection/reduced_optimization/dual_lasso.py  | 25 ++++++-------
 .../tests/test_dual_lasso.py                  | 35 ++++++++++---------
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/selection/reduced_optimization/dual_lasso.py b/selection/reduced_optimization/dual_lasso.py
index 09f8af9da..d0568976a 100644
--- a/selection/reduced_optimization/dual_lasso.py
+++ b/selection/reduced_optimization/dual_lasso.py
@@ -1,5 +1,4 @@
 import numpy as np
-import sys
 
 import regreg.api as rr
 
@@ -154,7 +153,6 @@ def minimize2(self, step=1, nstep=30, tol=1.e-8):
             while True:
                 proposal = current - step * newton_step
                 proposed_value = objective(proposal)
-                # print(current_value, proposed_value, 'minimize')
                 if proposed_value <= current_value:
                     break
                 step *= 0.5
@@ -172,7 +170,6 @@ def minimize2(self, step=1, nstep=30, tol=1.e-8):
             if itercount % 4 == 0:
                 step *= 2
 
-        # print('iter', itercount)
         value = objective(current)
         return current, value
 
@@ -240,6 +237,7 @@ def smooth_objective(self, true_param, mode='both', check_feasibility=False, tol
 
 
 class selective_inf_lasso(rr.smooth_atom):
+
     def __init__(self,
                  y,
                  grad_map,
@@ -343,9 +341,8 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, langevin_steps=1500, burnin=50):
+    def posterior_samples(self, ndraw=1500, burnin=50):
         state = self.initial_state
-        sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
         projection_map = lambda x: x
         stepsize = 1. / self.E
@@ -353,18 +350,17 @@ def posterior_samples(self, langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in xrange(langevin_steps):
+        for i in xrange(ndraw + burnin):
             sampler.next()
-            samples.append(sampler.state.copy())
-            #print i, sampler.state.copy()
-            sys.stderr.write("sample number: " + str(i)+"\n")
+            if i >= burnin:
+                samples.append(sampler.state.copy())
 
         samples = np.array(samples)
-        return samples[burnin:, :]
+        return samples
 
-    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, ndraw=2000, burnin=0):
         state = self.initial_state
-        sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
+
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
         projection_map = lambda x: x
         stepsize = 1. / self.E
@@ -373,11 +369,10 @@ def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(langevin_steps):
+        for i in range(ndraw):
             sampler.next()
             sample = sampler.state.copy()
 
-            #print(sample)
             risk_1 = ((estimator_1-sample)**2).sum()
             print("adjusted risk", risk_1)
             post_risk_1 += risk_1
@@ -387,7 +382,7 @@ def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0
             post_risk_2 += risk_2
 
 
-        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
+        return post_risk_1/ndraw, post_risk_2/ndraw
 
 
 
diff --git a/selection/reduced_optimization/tests/test_dual_lasso.py b/selection/reduced_optimization/tests/test_dual_lasso.py
index 93f08c944..6157496f2 100644
--- a/selection/reduced_optimization/tests/test_dual_lasso.py
+++ b/selection/reduced_optimization/tests/test_dual_lasso.py
@@ -1,26 +1,23 @@
 from __future__ import print_function
 import numpy as np
 
-from selection.api import randomization
-from selection.reduced_optimization.initial_soln import selection, instance
+from ...randomized.api import randomization
+from ..initial_soln import selection, instance
 
-from selection.reduced_optimization.dual_lasso import (selection_probability_lasso_dual,
-                                                       sel_prob_gradient_map_lasso,
-                                                       selective_inf_lasso)
+from ..dual_lasso import (selection_probability_lasso_dual,
+                          sel_prob_gradient_map_lasso,
+                          selective_inf_lasso)
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
-
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                                 set_seed_iftrue)
 
 def randomized_lasso_trial(X,
                            y,
                            beta,
-                           sigma):
-
-    from selection.api import randomization
+                           sigma,
+                           ndraw=1000,
+                           burnin=100):
 
     n, p = X.shape
 
@@ -57,7 +54,7 @@ def randomized_lasso_trial(X,
 
         inf = selective_inf_lasso(y, grad_map, prior_variance)
 
-        samples = inf.posterior_samples()
+        samples = inf.posterior_samples(ndraw=ndraw, burnin=burnin)
 
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -104,7 +101,9 @@ def randomized_lasso_trial(X,
     else:
         return None
 
-def test_dual_lasso():
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+def test_dual_lasso(ndraw=1000, burnin=100):
     ### set parameters
     n = 300
     p = 100
@@ -124,7 +123,9 @@ def test_dual_lasso():
     lasso = randomized_lasso_trial(X,
                                    y,
                                    beta,
-                                   sigma)
+                                   sigma,
+                                   ndraw=ndraw,
+                                   burnin=burnin)
 
     if lasso is not None:
         ad_cov += lasso[0,0]

From 14eb37e00c593e9ed7db2bacdfa2e5cc34715784 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 15:57:47 -0700
Subject: [PATCH 143/617] test_fs with small samples

---
 .../forward_stepwise_reduced.py               | 15 +++-----
 .../reduced_optimization/tests/test_fs.py     | 36 ++++++++++---------
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/selection/reduced_optimization/forward_stepwise_reduced.py b/selection/reduced_optimization/forward_stepwise_reduced.py
index 62f9a3b70..28944fd3e 100644
--- a/selection/reduced_optimization/forward_stepwise_reduced.py
+++ b/selection/reduced_optimization/forward_stepwise_reduced.py
@@ -1,5 +1,4 @@
 from math import log
-import sys
 import numpy as np
 import regreg.api as rr
 from scipy.stats import norm
@@ -212,7 +211,6 @@ def minimize2(self, step=1, nstep=30, tol=1.e-8):
             while True:
                 proposal = current - step * newton_step
                 proposed_value = objective(proposal)
-                # print(current_value, proposed_value, 'minimize')
                 if proposed_value <= current_value:
                     break
                 step *= 0.5
@@ -230,7 +228,6 @@ def minimize2(self, step=1, nstep=30, tol=1.e-8):
             if itercount % 4 == 0:
                 step *= 2
 
-        # print('iter', itercount)
         value = objective(current)
         return current, value
 
@@ -395,9 +392,8 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, langevin_steps=1000, burnin=100):
+    def posterior_samples(self, ndraw=1000, burnin=100):
         state = self.initial_state
-        print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
         projection_map = lambda x: x
         stepsize = 1. / self.E
@@ -405,11 +401,10 @@ def posterior_samples(self, langevin_steps=1000, burnin=100):
 
         samples = []
 
-        for i in range(langevin_steps):
+        for i in xrange(ndraw + burnin):
             sampler.next()
-            samples.append(sampler.state.copy())
-            #print i, sampler.state.copy()
-            sys.stderr.write("sample number: " + str(i) + "\n")
+            if i >= burnin:
+                samples.append(sampler.state.copy())
 
         samples = np.array(samples)
-        return samples[burnin:, :]
+        return samples
diff --git a/selection/reduced_optimization/tests/test_fs.py b/selection/reduced_optimization/tests/test_fs.py
index 2a67844b3..5508474e3 100644
--- a/selection/reduced_optimization/tests/test_fs.py
+++ b/selection/reduced_optimization/tests/test_fs.py
@@ -1,25 +1,23 @@
 from __future__ import print_function
 import numpy as np
 
-from selection.reduced_optimization.initial_soln import selection, instance
-from selection.reduced_optimization.forward_stepwise_reduced import (neg_log_cube_probability_fs,
-                                                                     selection_probability_objective_fs,
-                                                                     sel_prob_gradient_map_fs,
-                                                                     selective_map_credible_fs)
-
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
-
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+from ...randomized.api import randomization
+from ..initial_soln import selection, instance
+from ..forward_stepwise_reduced import (neg_log_cube_probability_fs,
+                                        selection_probability_objective_fs,
+                                        sel_prob_gradient_map_fs,
+                                        selective_map_credible_fs)
 
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                                 set_seed_iftrue)
 
 def randomized_forward_step(X,
                             y,
                             beta,
-                            sigma):
-    from selection.api import randomization
+                            sigma,
+                            ndraw=1000,
+                            burnin=100):
 
     n, p = X.shape
 
@@ -53,7 +51,7 @@ def randomized_forward_step(X,
 
     inf = selective_map_credible_fs(y, grad_map, prior_variance)
 
-    samples = inf.posterior_samples()
+    samples = inf.posterior_samples(ndraw=ndraw, burnin=burnin)
 
     adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -98,7 +96,9 @@ def randomized_forward_step(X,
 
     return np.vstack([sel_cov, naive_cov, ad_len, unad_len, risk_ad, risk_unad])
 
-def test_fs():
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=20, burnin=10)
+def test_fs(ndraw=1000, burnin=100):
     n = 50
     p = 300
     s = 10
@@ -116,7 +116,9 @@ def test_fs():
     fs = randomized_forward_step(X,
                                  y,
                                  beta,
-                                 sigma)
+                                 sigma,
+                                 ndraw=ndraw,
+                                 burnin=burnin)
 
     ad_cov += fs[0, 0]
     unad_cov += fs[1, 0]

From b626651b4ed588a5605ff402bfb67e513696bba3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 16:06:33 -0700
Subject: [PATCH 144/617] test_lasso and test_ms_lasso_2stage with small
 samples

---
 .../reduced_optimization/lasso_reduced.py     | 37 +++++++++----------
 .../ms_lasso_2stage_reduced.py                | 37 +++++++++----------
 .../reduced_optimization/tests/test_lasso.py  | 34 +++++++++--------
 .../tests/test_ms_lasso_2stage.py             | 34 +++++++++--------
 4 files changed, 71 insertions(+), 71 deletions(-)

diff --git a/selection/reduced_optimization/lasso_reduced.py b/selection/reduced_optimization/lasso_reduced.py
index db23df0c2..84fae997e 100644
--- a/selection/reduced_optimization/lasso_reduced.py
+++ b/selection/reduced_optimization/lasso_reduced.py
@@ -278,7 +278,6 @@ def minimize2(self, step=1, nstep=100, tol=1.e-8):
             while True:
                 count += 1
                 proposal = current - step * newton_step
-                # print("proposal", proposal[n:])
                 if np.all(proposal[n:] > 0):
                     break
                 step *= 0.5
@@ -291,7 +290,6 @@ def minimize2(self, step=1, nstep=100, tol=1.e-8):
             while True:
                 proposal = current - step * newton_step
                 proposed_value = objective(proposal)
-                # print(current_value, proposed_value, 'minimize')
                 if proposed_value <= current_value:
                     break
                 step *= 0.5
@@ -478,9 +476,9 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, langevin_steps=1500, burnin=50):
+    def posterior_samples(self, ndraw=1000, burnin=100):
         state = self.initial_state
-        sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
+
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
         projection_map = lambda x: x
         stepsize = 1. / self.E
@@ -488,16 +486,15 @@ def posterior_samples(self, langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in range(langevin_steps):
+        for i in range(ndraw + burnin):
             sampler.next()
-            samples.append(sampler.state.copy())
-            print(i, sampler.state.copy())
-            sys.stderr.write("sample number: " + str(i)+"\n")
+            if i >= burnin:
+                samples.append(sampler.state.copy())
 
         samples = np.array(samples)
-        return samples[burnin:, :]
+        return samples
 
-    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, ndraw=2000, burnin=0):
         state = self.initial_state
         sys.stderr.write("Number of selected variables by randomized lasso: "+str(state.shape)+"\n")
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -508,18 +505,18 @@ def posterior_risk(self, estimator_1, estimator_2, langevin_steps=2000, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(langevin_steps):
+        for i in range(ndraw + burnin):
             sampler.next()
-            sample = sampler.state.copy()
+            if i >= burnin:
+                sample = sampler.state.copy()
 
-            #print(sample)
-            risk_1 = ((estimator_1-sample)**2).sum()
-            print("adjusted risk", risk_1)
-            post_risk_1 += risk_1
+                risk_1 = ((estimator_1-sample)**2).sum()
+                print("adjusted risk", risk_1)
+                post_risk_1 += risk_1
 
-            risk_2 = ((estimator_2-sample) ** 2).sum()
-            print("unadjusted risk", risk_2)
-            post_risk_2 += risk_2
+                risk_2 = ((estimator_2-sample) ** 2).sum()
+                print("unadjusted risk", risk_2)
+                post_risk_2 += risk_2
 
 
-        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
+        return post_risk_1/ndraw, post_risk_2/ndraw
diff --git a/selection/reduced_optimization/ms_lasso_2stage_reduced.py b/selection/reduced_optimization/ms_lasso_2stage_reduced.py
index a1be52d8c..5454d26f0 100644
--- a/selection/reduced_optimization/ms_lasso_2stage_reduced.py
+++ b/selection/reduced_optimization/ms_lasso_2stage_reduced.py
@@ -407,9 +407,9 @@ def map_solve(self, step=1, nstep=100, tol=1.e-8):
         value = objective(current)
         return current, value
 
-    def posterior_samples(self, langevin_steps=1500, burnin=50):
+    def posterior_samples(self, ndraw=1000, burnin=100):
         state = self.initial_state
-        print("here", state.shape)
+
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
         projection_map = lambda x: x
         stepsize = 1. / self.E
@@ -417,16 +417,15 @@ def posterior_samples(self, langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in range(langevin_steps):
+        for i in range(ndraw + burnin):
             sampler.next()
-            samples.append(sampler.state.copy())
-            #print i, sampler.state.copy()
-            sys.stderr.write("sample number: " + str(i) + "\n")
+            if i >= burnin:
+                samples.append(sampler.state.copy())
 
         samples = np.array(samples)
-        return samples[burnin:, :]
+        return samples
 
-    def posterior_risk(self, estimator_1, estimator_2, langevin_steps=1200, burnin=0):
+    def posterior_risk(self, estimator_1, estimator_2, ndraw=1000, burnin=0):
         state = self.initial_state
         print("here", state.shape)
         gradient_map = lambda x: -self.smooth_objective(x, 'grad')
@@ -437,21 +436,21 @@ def posterior_risk(self, estimator_1, estimator_2, langevin_steps=1200, burnin=0
         post_risk_1 = 0.
         post_risk_2 = 0.
 
-        for i in range(langevin_steps):
+        for i in range(ndraw + burnin):
             sampler.next()
-            sample = sampler.state.copy()
-
-            #print(sample)
-            risk_1 = ((estimator_1-sample)**2).sum()
-            print("adjusted risk", risk_1)
-            post_risk_1 += risk_1
+            if i >= burnin:
+                sample = sampler.state.copy()
 
-            risk_2 = ((estimator_2-sample) ** 2).sum()
-            print("unadjusted risk", risk_2)
-            post_risk_2 += risk_2
+                #print(sample)
+                risk_1 = ((estimator_1-sample)**2).sum()
+                print("adjusted risk", risk_1)
+                post_risk_1 += risk_1
 
+                risk_2 = ((estimator_2-sample) ** 2).sum()
+                print("unadjusted risk", risk_2)
+                post_risk_2 += risk_2
 
-        return post_risk_1/langevin_steps, post_risk_2/langevin_steps
+        return post_risk_1/ndraw, post_risk_2/ndraw
 
 
 
diff --git a/selection/reduced_optimization/tests/test_lasso.py b/selection/reduced_optimization/tests/test_lasso.py
index 733bee7b9..bcb4446f9 100644
--- a/selection/reduced_optimization/tests/test_lasso.py
+++ b/selection/reduced_optimization/tests/test_lasso.py
@@ -1,20 +1,18 @@
 from __future__ import print_function
 import numpy as np
 
-from selection.api import randomization
-from selection.reduced_optimization.initial_soln import selection, instance
-from selection.reduced_optimization.lasso_reduced import (nonnegative_softmax_scaled,
-                                                          neg_log_cube_probability,
-                                                          selection_probability_lasso,
-                                                          sel_prob_gradient_map_lasso,
-                                                          selective_inf_lasso)
-
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
+from ...randomized.api import randomization
+from ..initial_soln import selection, instance
+from ..lasso_reduced import (nonnegative_softmax_scaled,
+                             neg_log_cube_probability,
+                             selection_probability_lasso,
+                             sel_prob_gradient_map_lasso,
+                             selective_inf_lasso)
+
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                                 set_seed_iftrue)
 
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
 def randomized_lasso_trial(X,
                            y,
                            beta,
@@ -57,7 +55,7 @@ def randomized_lasso_trial(X,
         inf = selective_inf_lasso(y, grad_map, prior_variance)
 
         # for the tests, just take a few steps
-        samples = inf.posterior_samples(langevin_steps=ndraw, burnin=burnin)
+        samples = inf.posterior_samples(ndraw=ndraw, burnin=burnin)
 
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -105,7 +103,9 @@ def randomized_lasso_trial(X,
         return None
 
 
-def test_lasso():
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+def test_lasso(ndraw=1000, burnin=100):
     ### set parameters
     n = 50
     p = 300
@@ -125,7 +125,9 @@ def test_lasso():
     lasso = randomized_lasso_trial(X,
                                    y,
                                    beta,
-                                   sigma)
+                                   sigma,
+                                   ndraw=ndraw,
+                                   burnin=burnin)
 
     if lasso is not None:
         ad_cov += lasso[0,0]
diff --git a/selection/reduced_optimization/tests/test_ms_lasso_2stage.py b/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
index a6681d2fd..dd0fa9264 100644
--- a/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
+++ b/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
@@ -1,24 +1,22 @@
 from __future__ import print_function
 import numpy as np
 
-from selection.api import randomization
-from selection.reduced_optimization.initial_soln import selection, instance
+from ...randomized.api import randomization
+from ..initial_soln import selection, instance
+from ..ms_lasso_2stage_reduced import (selection_probability_objective_ms_lasso,
+                                       sel_prob_gradient_map_ms_lasso,
+                                       selective_map_credible_ms_lasso)
 
-from selection.reduced_optimization.ms_lasso_2stage_reduced import (selection_probability_objective_ms_lasso,
-                                                                    sel_prob_gradient_map_ms_lasso,
-                                                                    selective_map_credible_ms_lasso)
-
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
-
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                           set_seed_iftrue)
 
 def randomized_marginal_lasso_screening(X,
                                         y,
                                         beta,
-                                        sigma):
+                                        sigma,
+                                        ndraw=1000,
+                                        burnin=100):
 
     n, p = X.shape
 
@@ -90,7 +88,7 @@ def randomized_marginal_lasso_screening(X,
                                          grad_map,
                                          prior_variance)
 
-    samples = ms.posterior_samples()
+    samples = ms.posterior_samples(ndraw=ndraw, burnin=burnin)
 
     adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -120,7 +118,9 @@ def randomized_marginal_lasso_screening(X,
 
     return np.vstack([sel_cov, naive_cov, ad_len, unad_len, risk_ad, risk_unad])
 
-def test_ms_lasso():
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+def test_ms_lasso(ndraw=1000, burnin=100):
     n = 500
     p = 100
     s = 10
@@ -138,7 +138,9 @@ def test_ms_lasso():
     ms_lasso = randomized_marginal_lasso_screening(X,
                                                    y,
                                                    beta,
-                                                   sigma)
+                                                   sigma,
+                                                   ndraw=ndraw,
+                                                   burnin=burnin)
 
     ad_cov += ms_lasso[0, 0]
     unad_cov += ms_lasso[1, 0]

From a110140b2639cb4373f624d335c1eb5b289d1c03 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 16:09:14 -0700
Subject: [PATCH 145/617] removing stderr write

---
 selection/reduced_optimization/tests/test_carved_lasso.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/selection/reduced_optimization/tests/test_carved_lasso.py b/selection/reduced_optimization/tests/test_carved_lasso.py
index 1edfb5eba..15137c93a 100644
--- a/selection/reduced_optimization/tests/test_carved_lasso.py
+++ b/selection/reduced_optimization/tests/test_carved_lasso.py
@@ -89,9 +89,6 @@ def test_carved_lasso(ndraw=1000, burnin=100):
     s = 20
     snr = 7.
 
-    import sys
-    sys.stderr.write(`(ndraw, burnin)`)
-
     ad_cov = 0.
     unad_cov = 0.
     ad_len = 0.

From 0cc8c11531e33cfc817ad8ca65a65cae28512eb9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 16:26:10 -0700
Subject: [PATCH 146/617] cleanup in approx_ci, missing arg in R solve_QP

---
 {selection/approx_ci/tests => sandbox}/inference_hiv_data.py | 0
 selection/algorithms/tests/test_compareR.py                  | 2 +-
 selection/approx_ci/tests/api.py                             | 0
 selection/approx_ci/tests/plot_intervals.py                  | 0
 4 files changed, 1 insertion(+), 1 deletion(-)
 rename {selection/approx_ci/tests => sandbox}/inference_hiv_data.py (100%)
 delete mode 100644 selection/approx_ci/tests/api.py
 delete mode 100644 selection/approx_ci/tests/plot_intervals.py

diff --git a/selection/approx_ci/tests/inference_hiv_data.py b/sandbox/inference_hiv_data.py
similarity index 100%
rename from selection/approx_ci/tests/inference_hiv_data.py
rename to sandbox/inference_hiv_data.py
diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index fe8a50db0..504977837 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -323,7 +323,7 @@ def test_solve_QP(): # check the R coordinate descent LASSO solver
     kkt_tol = 1.e-12
     objective_tol = 1.e-12
     maxiter = 500
-    soln_R = selectiveInference:::solve_QP(t(X) %*% X, lam, maxiter, soln_R, -t(X) %*% Y, grad, ever_active, nactive, kkt_tol, objective_tol)$soln
+    soln_R = selectiveInference:::solve_QP(t(X) %*% X, lam, maxiter, soln_R, -t(X) %*% Y, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln
 
     """ 
 
diff --git a/selection/approx_ci/tests/api.py b/selection/approx_ci/tests/api.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/selection/approx_ci/tests/plot_intervals.py b/selection/approx_ci/tests/plot_intervals.py
deleted file mode 100644
index e69de29bb..000000000

From 494ae668b4fed9969e98ded78db793c2e85e4178 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 16:27:06 -0700
Subject: [PATCH 147/617] cleanup of test_glm but import broken

---
 selection/approx_ci/tests/test_glm.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index 8a007bd7b..98c8c9328 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -1,20 +1,23 @@
 from __future__ import print_function
+
 import numpy as np
 import time
 import regreg.api as rr
+
 import selection.tests.reports as reports
-from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
-from selection.approx_ci.estimator_approx import M_estimator_approx
+from ...randomized.api import randomization
+from ...tests.instance import logistic_instance, gaussian_instance
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+
+from ..ci_via_approx_density import approximate_conditional_density
+from ..approx_ci.estimator_approx import M_estimator_approx
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-from selection.randomized.query import naive_confidence_intervals
-from selection.randomized.query import naive_pvalues
+from ...randomized.query import naive_confidence_intervals
+from ...randomized.query import naive_pvalues
 
 
 @register_report(['cover', 'ci_length', 'truth', 'naive_cover', 'naive_pvalues'])
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @wait_for_return_value()
 def test_approximate_ci(n=100,
                         p=10,
@@ -25,16 +28,14 @@ def test_approximate_ci(n=100,
                         loss='gaussian',
                         randomizer='gaussian'):
 
-    from selection.api import randomization
-
     if loss == "gaussian":
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
         loss = rr.glm.gaussian(X, y)
     elif loss == "logistic":
         X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
-        loss = rr.glm.logistic(X, y)
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
+        loss = rr.glm.logistic(X, y)
 
     epsilon = 1. / np.sqrt(n)
 
@@ -116,4 +117,4 @@ def report(niter=50, **kwargs):
 
 
 if __name__=='__main__':
-    report()
\ No newline at end of file
+    report()

From a493b780c17cc171aeb8b32ca6e59c5ddfe45f11 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 16:42:47 -0700
Subject: [PATCH 148/617] using R software as submodule

---
 .gitmodules | 3 +++
 .travis.yml | 5 ++++-
 R-software  | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)
 create mode 160000 R-software

diff --git a/.gitmodules b/.gitmodules
index e95b07276..6fce99856 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "travis-tools"]
 	path = travis-tools
 	url = https://github.com/matthew-brett/travis-tools.git
+[submodule "R-software"]
+	path = R-software
+	url = git@github.com:selective-inference/R-software
diff --git a/.travis.yml b/.travis.yml
index 1a1cc5f23..3dd755539 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,13 +38,16 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
   - sudo apt-get install -y r-base r-base-dev r-cran-devtools
-  - sudo Rscript -e "library(devtools)" -e "install_github('selective-inference/R-software', subdir='selectiveInference')"
 
 
 install:
   # Install selection
   - pip install -r requirements.txt
   - pip install -e .
+  - cd R-software
+  - git submodule init
+  - git submodule update
+  - make install
   - travis_install $INSTALL_TYPE
 
 # command to run tests, e.g. python setup.py test
diff --git a/R-software b/R-software
new file mode 160000
index 000000000..bbf7e19f4
--- /dev/null
+++ b/R-software
@@ -0,0 +1 @@
+Subproject commit bbf7e19f45b6222519e85f08f9e2af02880b4421

From 01dccfdf55a702ac3e1e86f0a9740fe5cbb4bd14 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 16:49:32 -0700
Subject: [PATCH 149/617] changing url for submodule

---
 .gitmodules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 6fce99856..22fcc6039 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,4 +3,4 @@
 	url = https://github.com/matthew-brett/travis-tools.git
 [submodule "R-software"]
 	path = R-software
-	url = git@github.com:selective-inference/R-software
+	url = https://github.com:selective-inference/R-software.git

From aabf1244ce818236b999e3b9109fb93b46fd4be4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 16:52:31 -0700
Subject: [PATCH 150/617] BF: url

---
 .gitmodules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 22fcc6039..fb40dbf24 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,4 +3,4 @@
 	url = https://github.com/matthew-brett/travis-tools.git
 [submodule "R-software"]
 	path = R-software
-	url = https://github.com:selective-inference/R-software.git
+	url = https://github.com/selective-inference/R-software

From e3068e641c6a2036a9122428508f51279a65fc0c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 31 Aug 2017 17:20:18 -0700
Subject: [PATCH 151/617] rcpp package

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 3dd755539..177cf1293 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,7 @@ before_install:
   - sudo add-apt-repository -y ppa:marutter/c2d4u
   - sudo add-apt-repository -y ppa:marutter/rrutter
   - sudo apt-get update
-  - sudo apt-get install -y r-base r-base-dev r-cran-devtools
+  - sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp
 
 
 install:

From d43f16bfed6627107a505a9343649824e4959bb0 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Thu, 31 Aug 2017 18:12:55 -0700
Subject: [PATCH 152/617] started test for sampling

---
 selection/randomized/randomization.py       |   4 +
 selection/randomized/tests/test_sampling.py | 132 ++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 selection/randomized/tests/test_sampling.py

diff --git a/selection/randomized/randomization.py b/selection/randomized/randomization.py
index debd91781..8104a834d 100644
--- a/selection/randomized/randomization.py
+++ b/selection/randomized/randomization.py
@@ -16,6 +16,7 @@ def __init__(self,
                  density,
                  cdf,
                  pdf,
+                 ppf,
                  derivative_log_density,
                  grad_negative_log_density,
                  sampler,
@@ -30,6 +31,7 @@ def __init__(self,
         self._density = density
         self._cdf = cdf
         self._pdf = pdf
+        self._ppf = ppf
         self._derivative_log_density = derivative_log_density
         self._grad_negative_log_density = grad_negative_log_density
         self._sampler = sampler
@@ -177,6 +179,7 @@ def laplace(shape, scale):
         sampler = lambda size: rv.rvs(size=shape + size)
         cdf = lambda x: laplace.cdf(x, loc=0., scale = scale)
         pdf = lambda x: laplace.pdf(x, loc=0., scale = scale)
+        ppf = lambda x: laplace.ppf(x, loc=0, scale=scale)
         derivative_log_density = lambda x: -np.sign(x)/scale
         grad_negative_log_density = lambda x: np.sign(x) / scale
         sampler = lambda size: rv.rvs(size=shape + size)
@@ -188,6 +191,7 @@ def laplace(shape, scale):
                              density,
                              cdf,
                              pdf,
+                             ppf,
                              derivative_log_density,
                              grad_negative_log_density,
                              sampler,
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
new file mode 100644
index 000000000..b217b292a
--- /dev/null
+++ b/selection/randomized/tests/test_sampling.py
@@ -0,0 +1,132 @@
+from itertools import product
+import numpy as np
+import nose.tools as nt
+
+from selection.randomized.convenience import lasso, step, threshold
+from selection.randomized.query import optimization_sampler
+from selection.tests.instance import (gaussian_instance,
+                               logistic_instance,
+                               poisson_instance)
+from selection.tests.flags import SMALL_SAMPLES
+from selection.tests.decorators import set_sampling_params_iftrue
+from scipy.stats import t as tdist
+
+
+def inverse_truncated_cdf(x, lower, upper, randomization):
+    #if (x<0 or x>1):
+    #    raise ValueError("argument for cdf inverse should be in (0,1)")
+    arg = randomization._cdf(lower) + np.multiply(x, randomization._cdf(upper) - randomization._cdf(lower))
+    return randomization._ppf(arg)
+
+
+def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
+    uniform_samples = np.random.uniform(0,1, size=(nsamples,randomization.shape[0]))
+    samples = np.zeros((nsamples, randomization.shape[0]))
+    for i in range(nsamples):
+        samples[i,:] = inverse_truncated_cdf(uniform_samples[i,:], lower, upper, randomization)
+    return samples
+
+
+def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =1000):
+    p = X.shape[1]
+    nactive = active.sum()
+    lower = np.zeros(p)
+    upper = np.zeros(p)
+    active_set = np.where(active)[0]
+
+    for i in range(nactive):
+        if signs[i]>0:
+            lower[i] = -np.dot(X[:, active_set[i]].T,y) + lam*signs[i]
+            upper[i] = np.inf
+        else:
+            lower[i] = -np.inf
+            upper[i] = -np.dot(X[:,active_set[i]].T,y) + lam*signs[i]
+
+    lower[range(nactive,p)] = -lam-np.dot(X[:, ~active].T, y)
+    upper[range(nactive,p)]= lam-np.dot(X[:,~active].T, y)
+
+    omega_samples = sampling_truncated_dist(lower, upper, randomization)
+
+    beta_samples = (omega_samples[:,:nactive]+np.dot(X[:,active].T, y))/(epsilon+1)
+    u_samples = (omega_samples[:, nactive:]+np.dot(X[:,~active].T, y))/lam
+
+    return np.concatenate((beta_samples, u_samples), axis=1)
+
+def orthogonal_design(n, p, s, signal, sigma, df=np.inf, random_signs=False):
+    X = np.identity(n)[:,:p]
+
+    beta = np.zeros(p)
+    signal = np.atleast_1d(signal)
+    if signal.shape == (1,):
+        beta[:s] = signal[0]
+    else:
+        beta[:s] = np.linspace(signal[0], signal[1], s)
+    if random_signs:
+        beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
+    np.random.shuffle(beta)
+
+    active = np.zeros(p, np.bool)
+    active[beta != 0] = True
+
+    # noise model
+    def _noise(n, df=np.inf):
+        if df == np.inf:
+            return np.random.standard_normal(n)
+        else:
+            sd_t = np.std(tdist.rvs(df, size=50000))
+        return tdist.rvs(df, size=n) / sd_t
+
+    Y = (X.dot(beta) + _noise(n, df)) * sigma
+    return X, Y, beta * sigma, np.nonzero(active)[0], sigma
+
+
+
+
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+def test_optimization_sampler(ndraw=1000, burnin=200):
+
+    cls = lasso
+    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace']):
+
+        inst, const = const_info
+
+        X, Y = orthogonal_design(n=100, p=10, s=0, signal=2, sigma=1)[:2]
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * 1
+        conv = const(X, Y, W, randomizer=rand)
+        signs = conv.fit()
+        print("signs", signs)
+
+        marginalizing_groups = np.zeros(p, np.bool)
+        #marginalizing_groups[:int(p/2)] = True
+        conditioning_groups = ~marginalizing_groups
+        #conditioning_groups[-int(p/4):] = False
+
+        selected_features = conv._view.selection_variable['variables']
+
+        #conv.summary(selected_features,
+        #             ndraw=ndraw,
+        #             burnin=burnin,
+        #             compute_intervals=True)
+
+        #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+        #                           conditioning_groups=conditioning_groups)
+        conv._queries.setup_sampler(form_covariances=None)
+        conv._queries.setup_opt_state()
+        target_sampler = optimization_sampler(conv._queries)
+
+        S = target_sampler.sample(ndraw,
+                                  burnin,
+                                  stepsize=1.e-3)
+        print(S.shape)
+        print([np.mean(S[:,i]) for i in range(p)])
+
+        opt_samples = sample_opt_vars(X,Y, selected_features, signs, W[0], conv.ridge_term,
+                                      conv.randomizer, nsamples =1000)
+
+        print([np.mean(opt_samples[:,i]) for i in range(p)])
+
+
+
+test_optimization_sampler()
\ No newline at end of file

From bbd2a73759565eace618974f88eb45a292e26bb1 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slgi.SUNet>
Date: Thu, 31 Aug 2017 18:28:02 -0700
Subject: [PATCH 153/617] updated with master of jonathan

---
 .../tests/test_carved_lasso.py                | 23 ++++++++----
 .../tests/test_dual_lasso.py                  | 35 +++++++++---------
 .../reduced_optimization/tests/test_fs.py     | 36 +++++++++---------
 .../reduced_optimization/tests/test_lasso.py  | 36 +++++++++---------
 .../tests/test_ms_lasso_2stage.py             | 37 ++++++++++---------
 5 files changed, 90 insertions(+), 77 deletions(-)

diff --git a/selection/reduced_optimization/tests/test_carved_lasso.py b/selection/reduced_optimization/tests/test_carved_lasso.py
index 237fbb685..90f47c21e 100644
--- a/selection/reduced_optimization/tests/test_carved_lasso.py
+++ b/selection/reduced_optimization/tests/test_carved_lasso.py
@@ -1,18 +1,22 @@
 from __future__ import print_function
 import numpy as np
 import regreg.api as rr
-from selection.tests.instance import logistic_instance, gaussian_instance
 
-from selection.reduced_optimization.par_carved_reduced import selection_probability_carved, sel_inf_carved
+from ...tests.instance import logistic_instance, gaussian_instance
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue
 
-from selection.reduced_optimization.estimator import M_estimator_approx_carved
+from ..par_carved_reduced import selection_probability_carved, sel_inf_carved
+from ..estimator import M_estimator_approx_carved
 
 def carved_lasso_trial(X,
                        y,
                        beta,
                        sigma,
                        lam,
-                       estimation='parametric'):
+                       estimation='parametric',
+                       ndraw=1000,
+                       burnin=100):
     n, p = X.shape
 
     loss = rr.glm.gaussian(X, y)
@@ -46,7 +50,7 @@ def carved_lasso_trial(X,
         unadjusted_intervals = np.vstack([post_mean - 1.65 * (np.sqrt(post_var.diagonal())),
                                           post_mean + 1.65 * (np.sqrt(post_var.diagonal()))])
         grad_lasso = sel_inf_carved(M_est, prior_variance)
-        samples = grad_lasso.posterior_samples()
+        samples = grad_lasso.posterior_samples(ndraw=ndraw, burnin=burnin)
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
         selective_mean = np.mean(samples, axis=0)
@@ -77,7 +81,8 @@ def carved_lasso_trial(X,
     else:
         return np.vstack([0.,0.,0.,0.,0.,0.])
 
-def test_carved_lasso():
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+def test_carved_lasso(ndraw=1000, burnin=100):
     ### set parameters
     n = 1000
     p = 100
@@ -92,12 +97,14 @@ def test_carved_lasso():
     unad_risk = 0.
 
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, sigma=1., rho=0, signal=snr)
-    lam = 0.8 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    lam = 0.8 * np.mean(np.fabs(X.T.dot(np.random.standard_normal((n, 2000)))).max(0)) * sigma
     lasso = carved_lasso_trial(X,
                                y,
                                beta,
                                sigma,
-                               lam)
+                               lam,
+                               ndraw=ndraw,
+                               burnin=burnin)
 
 
     if lasso is not None:
diff --git a/selection/reduced_optimization/tests/test_dual_lasso.py b/selection/reduced_optimization/tests/test_dual_lasso.py
index 93f08c944..6157496f2 100644
--- a/selection/reduced_optimization/tests/test_dual_lasso.py
+++ b/selection/reduced_optimization/tests/test_dual_lasso.py
@@ -1,26 +1,23 @@
 from __future__ import print_function
 import numpy as np
 
-from selection.api import randomization
-from selection.reduced_optimization.initial_soln import selection, instance
+from ...randomized.api import randomization
+from ..initial_soln import selection, instance
 
-from selection.reduced_optimization.dual_lasso import (selection_probability_lasso_dual,
-                                                       sel_prob_gradient_map_lasso,
-                                                       selective_inf_lasso)
+from ..dual_lasso import (selection_probability_lasso_dual,
+                          sel_prob_gradient_map_lasso,
+                          selective_inf_lasso)
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
-
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                                 set_seed_iftrue)
 
 def randomized_lasso_trial(X,
                            y,
                            beta,
-                           sigma):
-
-    from selection.api import randomization
+                           sigma,
+                           ndraw=1000,
+                           burnin=100):
 
     n, p = X.shape
 
@@ -57,7 +54,7 @@ def randomized_lasso_trial(X,
 
         inf = selective_inf_lasso(y, grad_map, prior_variance)
 
-        samples = inf.posterior_samples()
+        samples = inf.posterior_samples(ndraw=ndraw, burnin=burnin)
 
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -104,7 +101,9 @@ def randomized_lasso_trial(X,
     else:
         return None
 
-def test_dual_lasso():
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+def test_dual_lasso(ndraw=1000, burnin=100):
     ### set parameters
     n = 300
     p = 100
@@ -124,7 +123,9 @@ def test_dual_lasso():
     lasso = randomized_lasso_trial(X,
                                    y,
                                    beta,
-                                   sigma)
+                                   sigma,
+                                   ndraw=ndraw,
+                                   burnin=burnin)
 
     if lasso is not None:
         ad_cov += lasso[0,0]
diff --git a/selection/reduced_optimization/tests/test_fs.py b/selection/reduced_optimization/tests/test_fs.py
index 2a67844b3..5508474e3 100644
--- a/selection/reduced_optimization/tests/test_fs.py
+++ b/selection/reduced_optimization/tests/test_fs.py
@@ -1,25 +1,23 @@
 from __future__ import print_function
 import numpy as np
 
-from selection.reduced_optimization.initial_soln import selection, instance
-from selection.reduced_optimization.forward_stepwise_reduced import (neg_log_cube_probability_fs,
-                                                                     selection_probability_objective_fs,
-                                                                     sel_prob_gradient_map_fs,
-                                                                     selective_map_credible_fs)
-
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
-
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+from ...randomized.api import randomization
+from ..initial_soln import selection, instance
+from ..forward_stepwise_reduced import (neg_log_cube_probability_fs,
+                                        selection_probability_objective_fs,
+                                        sel_prob_gradient_map_fs,
+                                        selective_map_credible_fs)
 
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                                 set_seed_iftrue)
 
 def randomized_forward_step(X,
                             y,
                             beta,
-                            sigma):
-    from selection.api import randomization
+                            sigma,
+                            ndraw=1000,
+                            burnin=100):
 
     n, p = X.shape
 
@@ -53,7 +51,7 @@ def randomized_forward_step(X,
 
     inf = selective_map_credible_fs(y, grad_map, prior_variance)
 
-    samples = inf.posterior_samples()
+    samples = inf.posterior_samples(ndraw=ndraw, burnin=burnin)
 
     adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -98,7 +96,9 @@ def randomized_forward_step(X,
 
     return np.vstack([sel_cov, naive_cov, ad_len, unad_len, risk_ad, risk_unad])
 
-def test_fs():
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=20, burnin=10)
+def test_fs(ndraw=1000, burnin=100):
     n = 50
     p = 300
     s = 10
@@ -116,7 +116,9 @@ def test_fs():
     fs = randomized_forward_step(X,
                                  y,
                                  beta,
-                                 sigma)
+                                 sigma,
+                                 ndraw=ndraw,
+                                 burnin=burnin)
 
     ad_cov += fs[0, 0]
     unad_cov += fs[1, 0]
diff --git a/selection/reduced_optimization/tests/test_lasso.py b/selection/reduced_optimization/tests/test_lasso.py
index 733bee7b9..6462446b0 100644
--- a/selection/reduced_optimization/tests/test_lasso.py
+++ b/selection/reduced_optimization/tests/test_lasso.py
@@ -1,20 +1,18 @@
 from __future__ import print_function
 import numpy as np
 
-from selection.api import randomization
-from selection.reduced_optimization.initial_soln import selection, instance
-from selection.reduced_optimization.lasso_reduced import (nonnegative_softmax_scaled,
-                                                          neg_log_cube_probability,
-                                                          selection_probability_lasso,
-                                                          sel_prob_gradient_map_lasso,
-                                                          selective_inf_lasso)
-
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
+from ...randomized.api import randomization
+from ..initial_soln import selection, instance
+from ..lasso_reduced import (nonnegative_softmax_scaled,
+                             neg_log_cube_probability,
+                             selection_probability_lasso,
+                             sel_prob_gradient_map_lasso,
+                             selective_inf_lasso)
+
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                                 set_seed_iftrue)
 
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
 def randomized_lasso_trial(X,
                            y,
                            beta,
@@ -57,7 +55,7 @@ def randomized_lasso_trial(X,
         inf = selective_inf_lasso(y, grad_map, prior_variance)
 
         # for the tests, just take a few steps
-        samples = inf.posterior_samples(langevin_steps=ndraw, burnin=burnin)
+        samples = inf.posterior_samples(ndraw=ndraw, burnin=burnin)
 
         adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -105,7 +103,9 @@ def randomized_lasso_trial(X,
         return None
 
 
-def test_lasso():
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+def test_lasso(ndraw=1000, burnin=100):
     ### set parameters
     n = 50
     p = 300
@@ -125,7 +125,9 @@ def test_lasso():
     lasso = randomized_lasso_trial(X,
                                    y,
                                    beta,
-                                   sigma)
+                                   sigma,
+                                   ndraw=ndraw,
+                                   burnin=burnin)
 
     if lasso is not None:
         ad_cov += lasso[0,0]
@@ -135,4 +137,4 @@ def test_lasso():
         print("\n")
         print("adjusted and unadjusted coverage", ad_cov, unad_cov)
         print("\n")
-        print("adjusted and unadjusted lengths", ad_len, unad_len)
+        print("adjusted and unadjusted lengths", ad_len, unad_len)
\ No newline at end of file
diff --git a/selection/reduced_optimization/tests/test_ms_lasso_2stage.py b/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
index a6681d2fd..c7ab0bbec 100644
--- a/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
+++ b/selection/reduced_optimization/tests/test_ms_lasso_2stage.py
@@ -1,24 +1,22 @@
 from __future__ import print_function
 import numpy as np
 
-from selection.api import randomization
-from selection.reduced_optimization.initial_soln import selection, instance
+from ...randomized.api import randomization
+from ..initial_soln import selection, instance
+from ..ms_lasso_2stage_reduced import (selection_probability_objective_ms_lasso,
+                                       sel_prob_gradient_map_ms_lasso,
+                                       selective_map_credible_ms_lasso)
 
-from selection.reduced_optimization.ms_lasso_2stage_reduced import (selection_probability_objective_ms_lasso,
-                                                                    sel_prob_gradient_map_ms_lasso,
-                                                                    selective_map_credible_ms_lasso)
-
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import (set_sampling_params_iftrue,
-                                        set_seed_iftrue)
-
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.decorators import (set_sampling_params_iftrue,
+                           set_seed_iftrue)
 
 def randomized_marginal_lasso_screening(X,
                                         y,
                                         beta,
-                                        sigma):
+                                        sigma,
+                                        ndraw=1000,
+                                        burnin=100):
 
     n, p = X.shape
 
@@ -90,7 +88,7 @@ def randomized_marginal_lasso_screening(X,
                                          grad_map,
                                          prior_variance)
 
-    samples = ms.posterior_samples()
+    samples = ms.posterior_samples(ndraw=ndraw, burnin=burnin)
 
     adjusted_intervals = np.vstack([np.percentile(samples, 5, axis=0), np.percentile(samples, 95, axis=0)])
 
@@ -120,7 +118,9 @@ def randomized_marginal_lasso_screening(X,
 
     return np.vstack([sel_cov, naive_cov, ad_len, unad_len, risk_ad, risk_unad])
 
-def test_ms_lasso():
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=20)
+def test_ms_lasso(ndraw=1000, burnin=100):
     n = 500
     p = 100
     s = 10
@@ -138,7 +138,9 @@ def test_ms_lasso():
     ms_lasso = randomized_marginal_lasso_screening(X,
                                                    y,
                                                    beta,
-                                                   sigma)
+                                                   sigma,
+                                                   ndraw=ndraw,
+                                                   burnin=burnin)
 
     ad_cov += ms_lasso[0, 0]
     unad_cov += ms_lasso[1, 0]
@@ -148,5 +150,4 @@ def test_ms_lasso():
     print("\n")
     print("adjusted and unadjusted coverage", ad_cov, unad_cov)
     print("\n")
-    print("adjusted and unadjusted lengths", ad_len, unad_len)
-
+    print("adjusted and unadjusted lengths", ad_len, unad_len)
\ No newline at end of file

From 7516353a0aae0b55bdee93f0ae326cc4d0f0e5c2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slgi.SUNet>
Date: Thu, 31 Aug 2017 18:34:36 -0700
Subject: [PATCH 154/617] renamed directory as bayesian

---
 selection/{reduced_optimization => bayesian}/__init__.py          | 0
 selection/{reduced_optimization => bayesian}/barrier.py           | 0
 .../{reduced_optimization => bayesian}/credible_intervals.py      | 0
 selection/{reduced_optimization => bayesian}/dual_lasso.py        | 0
 selection/{reduced_optimization => bayesian}/estimator.py         | 0
 .../forward_stepwise_reduced.py                                   | 0
 selection/{reduced_optimization => bayesian}/initial_soln.py      | 0
 selection/{reduced_optimization => bayesian}/lasso_reduced.py     | 0
 .../marginal_screening_reduced.py                                 | 0
 .../{reduced_optimization => bayesian}/ms_lasso_2stage_reduced.py | 0
 .../{reduced_optimization => bayesian}/par_carved_reduced.py      | 0
 .../par_random_lasso_reduced.py                                   | 0
 .../{reduced_optimization => bayesian}/random_lasso_reduced.py    | 0
 .../{reduced_optimization => bayesian}/tests/test_carved_lasso.py | 0
 .../{reduced_optimization => bayesian}/tests/test_dual_lasso.py   | 0
 selection/{reduced_optimization => bayesian}/tests/test_fs.py     | 0
 selection/{reduced_optimization => bayesian}/tests/test_lasso.py  | 0
 .../tests/test_ms_lasso_2stage.py                                 | 0
 18 files changed, 0 insertions(+), 0 deletions(-)
 rename selection/{reduced_optimization => bayesian}/__init__.py (100%)
 rename selection/{reduced_optimization => bayesian}/barrier.py (100%)
 rename selection/{reduced_optimization => bayesian}/credible_intervals.py (100%)
 rename selection/{reduced_optimization => bayesian}/dual_lasso.py (100%)
 rename selection/{reduced_optimization => bayesian}/estimator.py (100%)
 rename selection/{reduced_optimization => bayesian}/forward_stepwise_reduced.py (100%)
 rename selection/{reduced_optimization => bayesian}/initial_soln.py (100%)
 rename selection/{reduced_optimization => bayesian}/lasso_reduced.py (100%)
 rename selection/{reduced_optimization => bayesian}/marginal_screening_reduced.py (100%)
 rename selection/{reduced_optimization => bayesian}/ms_lasso_2stage_reduced.py (100%)
 rename selection/{reduced_optimization => bayesian}/par_carved_reduced.py (100%)
 rename selection/{reduced_optimization => bayesian}/par_random_lasso_reduced.py (100%)
 rename selection/{reduced_optimization => bayesian}/random_lasso_reduced.py (100%)
 rename selection/{reduced_optimization => bayesian}/tests/test_carved_lasso.py (100%)
 rename selection/{reduced_optimization => bayesian}/tests/test_dual_lasso.py (100%)
 rename selection/{reduced_optimization => bayesian}/tests/test_fs.py (100%)
 rename selection/{reduced_optimization => bayesian}/tests/test_lasso.py (100%)
 rename selection/{reduced_optimization => bayesian}/tests/test_ms_lasso_2stage.py (100%)

diff --git a/selection/reduced_optimization/__init__.py b/selection/bayesian/__init__.py
similarity index 100%
rename from selection/reduced_optimization/__init__.py
rename to selection/bayesian/__init__.py
diff --git a/selection/reduced_optimization/barrier.py b/selection/bayesian/barrier.py
similarity index 100%
rename from selection/reduced_optimization/barrier.py
rename to selection/bayesian/barrier.py
diff --git a/selection/reduced_optimization/credible_intervals.py b/selection/bayesian/credible_intervals.py
similarity index 100%
rename from selection/reduced_optimization/credible_intervals.py
rename to selection/bayesian/credible_intervals.py
diff --git a/selection/reduced_optimization/dual_lasso.py b/selection/bayesian/dual_lasso.py
similarity index 100%
rename from selection/reduced_optimization/dual_lasso.py
rename to selection/bayesian/dual_lasso.py
diff --git a/selection/reduced_optimization/estimator.py b/selection/bayesian/estimator.py
similarity index 100%
rename from selection/reduced_optimization/estimator.py
rename to selection/bayesian/estimator.py
diff --git a/selection/reduced_optimization/forward_stepwise_reduced.py b/selection/bayesian/forward_stepwise_reduced.py
similarity index 100%
rename from selection/reduced_optimization/forward_stepwise_reduced.py
rename to selection/bayesian/forward_stepwise_reduced.py
diff --git a/selection/reduced_optimization/initial_soln.py b/selection/bayesian/initial_soln.py
similarity index 100%
rename from selection/reduced_optimization/initial_soln.py
rename to selection/bayesian/initial_soln.py
diff --git a/selection/reduced_optimization/lasso_reduced.py b/selection/bayesian/lasso_reduced.py
similarity index 100%
rename from selection/reduced_optimization/lasso_reduced.py
rename to selection/bayesian/lasso_reduced.py
diff --git a/selection/reduced_optimization/marginal_screening_reduced.py b/selection/bayesian/marginal_screening_reduced.py
similarity index 100%
rename from selection/reduced_optimization/marginal_screening_reduced.py
rename to selection/bayesian/marginal_screening_reduced.py
diff --git a/selection/reduced_optimization/ms_lasso_2stage_reduced.py b/selection/bayesian/ms_lasso_2stage_reduced.py
similarity index 100%
rename from selection/reduced_optimization/ms_lasso_2stage_reduced.py
rename to selection/bayesian/ms_lasso_2stage_reduced.py
diff --git a/selection/reduced_optimization/par_carved_reduced.py b/selection/bayesian/par_carved_reduced.py
similarity index 100%
rename from selection/reduced_optimization/par_carved_reduced.py
rename to selection/bayesian/par_carved_reduced.py
diff --git a/selection/reduced_optimization/par_random_lasso_reduced.py b/selection/bayesian/par_random_lasso_reduced.py
similarity index 100%
rename from selection/reduced_optimization/par_random_lasso_reduced.py
rename to selection/bayesian/par_random_lasso_reduced.py
diff --git a/selection/reduced_optimization/random_lasso_reduced.py b/selection/bayesian/random_lasso_reduced.py
similarity index 100%
rename from selection/reduced_optimization/random_lasso_reduced.py
rename to selection/bayesian/random_lasso_reduced.py
diff --git a/selection/reduced_optimization/tests/test_carved_lasso.py b/selection/bayesian/tests/test_carved_lasso.py
similarity index 100%
rename from selection/reduced_optimization/tests/test_carved_lasso.py
rename to selection/bayesian/tests/test_carved_lasso.py
diff --git a/selection/reduced_optimization/tests/test_dual_lasso.py b/selection/bayesian/tests/test_dual_lasso.py
similarity index 100%
rename from selection/reduced_optimization/tests/test_dual_lasso.py
rename to selection/bayesian/tests/test_dual_lasso.py
diff --git a/selection/reduced_optimization/tests/test_fs.py b/selection/bayesian/tests/test_fs.py
similarity index 100%
rename from selection/reduced_optimization/tests/test_fs.py
rename to selection/bayesian/tests/test_fs.py
diff --git a/selection/reduced_optimization/tests/test_lasso.py b/selection/bayesian/tests/test_lasso.py
similarity index 100%
rename from selection/reduced_optimization/tests/test_lasso.py
rename to selection/bayesian/tests/test_lasso.py
diff --git a/selection/reduced_optimization/tests/test_ms_lasso_2stage.py b/selection/bayesian/tests/test_ms_lasso_2stage.py
similarity index 100%
rename from selection/reduced_optimization/tests/test_ms_lasso_2stage.py
rename to selection/bayesian/tests/test_ms_lasso_2stage.py

From 3cc2e11b4debf7cb5ebd0e9611d59e64d75c2f8e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slgi.SUNet>
Date: Thu, 31 Aug 2017 19:25:44 -0700
Subject: [PATCH 155/617] updated randomized lasso inference and test_glm

---
 selection/approx_ci/ci_via_approx_density.py | 289 ++++++++++++-------
 selection/approx_ci/tests/plot_intervals.py  |   0
 selection/approx_ci/tests/test_glm.py        | 151 +++++-----
 3 files changed, 261 insertions(+), 179 deletions(-)
 delete mode 100644 selection/approx_ci/tests/plot_intervals.py

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index 601e45983..1b34448b4 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -1,83 +1,142 @@
+from __future__ import print_function
 from math import log
+import sys
+from scipy.stats import norm as normal
+
 import numpy as np
 import regreg.api as rr
-from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
-from scipy.stats import norm
-import sys
 
-def myround(a, decimals=1):
-    a_x = np.round(a, decimals=1)* 10.
-    rem = np.zeros(a.shape[0], bool)
-    rem[(np.remainder(a_x, 2) == 1)] = 1
-    a_x[rem] = a_x[rem] + 1.
-    return a_x/10.
+from selection.randomized.M_estimator import M_estimator
+
+class M_estimator_map(M_estimator):
+
+    def __init__(self, loss, epsilon, penalty, randomization, randomization_scale = 1.):
+        M_estimator.__init__(self, loss, epsilon, penalty, randomization)
+        self.randomization_scale = randomization_scale
 
+    def solve_approx(self):
+        self.solve()
+        (_opt_linear_term, _opt_affine_term) = self.opt_transform
+        self._opt_linear_term = np.concatenate(
+            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
+        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]), 0)
+        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
+
+        (_score_linear_term, _) = self.score_transform
+        self._score_linear_term = np.concatenate(
+            (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
+        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
+        self.feasible_point = np.abs(self.initial_soln[self._overall])
+        lagrange = []
+        for key, value in self.penalty.weights.iteritems():
+            lagrange.append(value)
+        lagrange = np.asarray(lagrange)
+        self.inactive_lagrange = lagrange[~self._overall]
+
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+
+        nactive = self._overall.sum()
+        score_cov = np.zeros((p, p))
+        X_active_inv = np.linalg.inv(X[:,self._overall].T.dot(X[:,self._overall]))
+        projection_perp = np.identity(n) - X[:,self._overall].dot(X_active_inv).dot( X[:,self._overall].T)
+        score_cov[:nactive, :nactive] = X_active_inv
+        score_cov[nactive:, nactive:] = X[:,~self._overall].T.dot(projection_perp).dot(X[:,~self._overall])
+
+        self.score_target_cov = score_cov[:, :nactive]
+        self.target_cov = score_cov[:nactive, :nactive]
+        self.target_observed = self.observed_score_state[:nactive]
+        self.nactive = nactive
+
+        self.B_active = self._opt_linear_term[:nactive, :nactive]
+        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
+
+
+    def setup_map(self, j):
+
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+
+        self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
+        self.offset_inactive = self.null_statistic[self.nactive:]
+
+
+class nonnegative_softmax_scaled(rr.smooth_atom):
+    """
+    The nonnegative softmax objective
+    .. math::
+         \mu \mapsto
+         \sum_{i=1}^{m} \log \left(1 +
+         \frac{1}{\mu_i} \right)
+    """
+
+    objective_template = r"""\text{nonneg_softmax}\left(%(var)s\right)"""
 
-class neg_log_cube_probability_laplace(rr.smooth_atom):
     def __init__(self,
-                 q, #equals p - E in our case
-                 lagrange,
-                 randomization_scale = 1., #equals the randomization variance in our case
+                 shape,
+                 barrier_scale=1.,
                  coef=1.,
                  offset=None,
-                 quadratic=None):
-
-        self.b = randomization_scale
-        self.lagrange = lagrange
-        self.q = q
+                 quadratic=None,
+                 initial=None):
 
         rr.smooth_atom.__init__(self,
-                                (self.q,),
+                                shape,
                                 offset=offset,
                                 quadratic=quadratic,
-                                initial=None,
+                                initial=initial,
                                 coef=coef)
 
-    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
-
-        arg = self.apply_offset(arg)
-
-        arg_u = (arg + self.lagrange)/self.b
-        arg_l = (arg - self.lagrange)/self.b
-        scaled_lagrange = (2* self.lagrange)/self.b
-
-        ind_arg_1 = np.zeros(self.q, bool)
-        ind_arg_1[(arg_u <0.)] = 1
-        ind_arg_2 = np.zeros(self.q, bool)
-        ind_arg_2[(arg_l >0.)] = 1
-        ind_arg_3 = np.logical_and(~ind_arg_1, ~ind_arg_2)
-        cube_prob = np.zeros(self.q)
-        cube_prob[ind_arg_1] = np.exp(arg_u[ind_arg_1])/2. - np.exp(arg_l[ind_arg_1])/2.
-        cube_prob[ind_arg_2] = -np.exp(-arg_u[ind_arg_2])/2. + np.exp(-arg_l[ind_arg_2])/2.
-        cube_prob[ind_arg_3] = 1- np.exp(-arg_u[ind_arg_3])/2. - np.exp(arg_l[ind_arg_3])/2.
-        neg_log_cube_prob = -np.log(cube_prob).sum()
-
-        log_cube_grad = np.zeros(self.q)
-        log_cube_grad[ind_arg_1] = 1./self.b
-        log_cube_grad[ind_arg_2] = np.true_divide((np.exp(-scaled_lagrange[ind_arg_2])+ 1.)/self.b,
-                                                  np.exp(-scaled_lagrange[ind_arg_2])-1.)
-        num_cube_grad = np.true_divide(np.exp(-scaled_lagrange[ind_arg_3]), 2 * self.b) - \
-                        np.true_divide(np.exp((2* arg_l[ind_arg_3])), 2 * self.b)
-        den_cube_grad = np.exp(arg_l[ind_arg_3]) - np.exp(-scaled_lagrange[ind_arg_3])/2. - \
-                        np.exp(2* arg_l[ind_arg_3])/2.
-        log_cube_grad[ind_arg_3] = np.true_divide(num_cube_grad,den_cube_grad)
-        neg_log_cube_grad = -log_cube_grad
+        # a feasible point
+        self.coefs[:] = np.ones(shape)
+        self.barrier_scale = barrier_scale
+
+    def smooth_objective(self, mean_param, mode='both', check_feasibility=False):
+        """
+        Evaluate the smooth objective, computing its value, gradient or both.
+        Parameters
+        ----------
+        mean_param : ndarray
+            The current parameter values.
+        mode : str
+            One of ['func', 'grad', 'both'].
+        check_feasibility : bool
+            If True, return `np.inf` when
+            point is not feasible, i.e. when `mean_param` is not
+            in the domain.
+        Returns
+        -------
+        If `mode` is 'func' returns just the objective value
+        at `mean_param`, else if `mode` is 'grad' returns the gradient
+        else returns both.
+        """
+
+        slack = self.apply_offset(mean_param)
+
+        if mode in ['both', 'func']:
+            if np.all(slack > 0):
+                f = self.scale(np.log((slack + self.barrier_scale) / slack).sum())
+            else:
+                f = np.inf
+        if mode in ['both', 'grad']:
+            g = self.scale(1. / (slack + self.barrier_scale) - 1. / slack)
 
-        if mode == 'func':
-            return self.scale(neg_log_cube_prob)
+        if mode == 'both':
+            return f, g
         elif mode == 'grad':
-            return self.scale(neg_log_cube_grad)
-        elif mode == 'both':
-            return self.scale(neg_log_cube_prob), self.scale(neg_log_cube_grad)
+            return g
+        elif mode == 'func':
+            return f
         else:
             raise ValueError("mode incorrectly specified")
 
 
 class neg_log_cube_probability(rr.smooth_atom):
     def __init__(self,
-                 q, #equals p - E in our case
+                 q,  # equals p - E in our case
                  lagrange,
-                 randomization_scale = 1., #equals the randomization variance in our case
+                 randomization_scale=1.,  # equals the randomization variance in our case
                  coef=1.,
                  offset=None,
                  quadratic=None):
@@ -97,41 +156,59 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
 
         arg = self.apply_offset(arg)
 
-        arg_u = (arg + self.lagrange)/self.randomization_scale
-        arg_l = (arg - self.lagrange)/self.randomization_scale
-        prod_arg = np.exp(-(2. * self.lagrange * arg)/(self.randomization_scale**2))
-        neg_prod_arg = np.exp((2. * self.lagrange * arg)/(self.randomization_scale**2))
-        cube_prob = norm.cdf(arg_u) - norm.cdf(arg_l)
-        log_cube_prob = -np.log(cube_prob).sum()
+        arg_u = (arg + self.lagrange) / self.randomization_scale
+        arg_l = (arg - self.lagrange) / self.randomization_scale
+        prod_arg = np.exp(-(2. * self.lagrange * arg) / (self.randomization_scale ** 2))
+        neg_prod_arg = np.exp((2. * self.lagrange * arg) / (self.randomization_scale ** 2))
+        cube_prob = normal.cdf(arg_u) - normal.cdf(arg_l)
+
         threshold = 10 ** -10
         indicator = np.zeros(self.q, bool)
         indicator[(cube_prob > threshold)] = 1
         positive_arg = np.zeros(self.q, bool)
-        positive_arg[(arg>0)] = 1
+        positive_arg[(arg > 0)] = 1
         pos_index = np.logical_and(positive_arg, ~indicator)
         neg_index = np.logical_and(~positive_arg, ~indicator)
-        log_cube_grad = np.zeros(self.q)
-        log_cube_grad[indicator] = (np.true_divide(-norm.pdf(arg_u[indicator]) + norm.pdf(arg_l[indicator]),
-                                        cube_prob[indicator]))/self.randomization_scale
 
-        log_cube_grad[pos_index] = ((-1. + prod_arg[pos_index])/
-                                     ((prod_arg[pos_index]/arg_u[pos_index])-
-                                      (1./arg_l[pos_index])))/self.randomization_scale
+        log_cube_prob = np.zeros(self.q)
+        log_cube_prob[indicator] = -np.log(cube_prob)[indicator]
 
-        log_cube_grad[neg_index] = ((arg_u[neg_index] -(arg_l[neg_index]*neg_prod_arg[neg_index]))
-                                    /self.randomization_scale)/(1.- neg_prod_arg[neg_index])
+        random_var = self.randomization_scale ** 2
+        log_cube_prob[neg_index] = (arg[neg_index] ** 2. / (2. * random_var)) + (
+        arg[neg_index] * self.lagrange[neg_index] / random_var) + \
+                                   (self.lagrange[neg_index] ** 2. / (2. * random_var)) \
+                                   - np.log(
+            1. / np.abs(arg_u[neg_index]) - neg_prod_arg[neg_index] / np.abs(arg_l[neg_index]))
 
+        log_cube_prob[pos_index] = (arg[pos_index] ** 2. / (2. * random_var)) - (
+        arg[pos_index] * self.lagrange[pos_index] / random_var) + \
+                                   (self.lagrange[pos_index] ** 2. / (2. * random_var)) \
+                                   - np.log(
+            1. / np.abs(arg_l[pos_index]) - prod_arg[pos_index] / np.abs(arg_u[pos_index]))
+
+        neg_log_cube_prob = log_cube_prob.sum()
+
+        log_cube_grad = np.zeros(self.q)
+        log_cube_grad[indicator] = (np.true_divide(-normal.pdf(arg_u[indicator]) + normal.pdf(arg_l[indicator]),
+                                                   cube_prob[indicator])) / self.randomization_scale
+
+        log_cube_grad[pos_index] = ((-1. + prod_arg[pos_index]) /
+                                    ((prod_arg[pos_index] / np.abs(arg_u[pos_index])) -
+                                     (1. / np.abs(arg_l[pos_index])))) / self.randomization_scale
+
+        log_cube_grad[neg_index] = ((-1. + neg_prod_arg[neg_index]) /
+                                    ((-neg_prod_arg[neg_index] / np.abs(arg_l[neg_index])) +
+                                     (1. / np.abs(arg_u[neg_index])))) / self.randomization_scale
 
         if mode == 'func':
-            return self.scale(log_cube_prob)
+            return self.scale(neg_log_cube_prob)
         elif mode == 'grad':
             return self.scale(log_cube_grad)
         elif mode == 'both':
-            return self.scale(log_cube_prob), self.scale(log_cube_grad)
+            return self.scale(neg_log_cube_prob), self.scale(log_cube_grad)
         else:
             raise ValueError("mode incorrectly specified")
 
-
 class approximate_conditional_prob(rr.smooth_atom):
 
     def __init__(self,
@@ -176,10 +253,8 @@ def sel_prob_smooth_objective(self, param, mode='both', check_feasibility=False)
         active_conj_loss = rr.affine_smooth(self.active_conjugate,
                                             rr.affine_transform(self.map.B_active, offset_active))
 
-        if self.map.randomizer == 'laplace':
-            cube_obj = neg_log_cube_probability_laplace(self.q, self.inactive_lagrange, randomization_scale = 1.)
-        elif self.map.randomizer == 'gaussian':
-            cube_obj = neg_log_cube_probability(self.q, self.inactive_lagrange, randomization_scale = 1.)
+
+        cube_obj = neg_log_cube_probability(self.q, self.inactive_lagrange, randomization_scale = self.map.randomization_scale)
 
         cube_loss = rr.affine_smooth(cube_obj, rr.affine_transform(self.map.B_inactive, offset_inactive))
 
@@ -269,8 +344,6 @@ def __init__(self, sel_alg,
                                 quadratic=quadratic,
                                 coef=coef)
 
-        self.coefs[:] = 0.
-
         self.target_observed = self.sel_alg.target_observed
         self.nactive = self.target_observed.shape[0]
         self.target_cov = self.sel_alg.target_cov
@@ -278,54 +351,58 @@ def __init__(self, sel_alg,
     def solve_approx(self):
 
         #defining the grid on which marginal conditional densities will be evaluated
-        grid_length = 301
-
-        #self.grid = np.linspace(-15,65, num=grid_length)
-        #self.grid = np.linspace(-5*np.amax(np.absolute(target_observed)), 5*np.amax(np.absolute(target_observed)), num=grid_length)
-        #s_obs = np.round(self.target_observed, decimals =1)
-        self.grid = np.zeros((self.nactive, grid_length))
+        self.grid_length = 241
 
-        print("observed values", self.target_observed)
+        #print("observed values", self.target_observed)
         self.ind_obs = np.zeros(self.nactive, int)
         self.norm = np.zeros(self.nactive)
-        self.h_approx = np.zeros((self.nactive, self.grid.shape[0]))
+        self.h_approx = np.zeros((self.nactive, self.grid_length))
+        self.grid = np.zeros((self.nactive, self.grid_length))
 
         for j in range(self.nactive):
             obs = self.target_observed[j]
-            self.grid[j, :] = np.linspace(self.target_observed[j] - 15., self.target_observed[j] + 15., num=grid_length)
+
+            self.grid[j,:] = np.linspace(self.target_observed[j]-12., self.target_observed[j]+12.,num=self.grid_length)
+
             self.norm[j] = self.target_cov[j,j]
-            if obs < self.grid[0]:
+            if obs < self.grid[j,0]:
                 self.ind_obs[j] = 0
-            elif obs > np.max(self.grid):
-                self.ind_obs[j] = grid_length-1
+            elif obs > np.max(self.grid[j,:]):
+                self.ind_obs[j] = self.grid_length-1
             else:
                 self.ind_obs[j] = np.argmin(np.abs(self.grid[j,:]-obs))
 
             sys.stderr.write("number of variable being computed: " + str(j) + "\n")
             self.h_approx[j, :] = self.approx_conditional_prob(j)
 
-
     def approx_conditional_prob(self, j):
         h_hat = []
 
         self.sel_alg.setup_map(j)
 
-        for i in xrange(self.grid[j, :].shape[0]):
+        for i in range(self.grid[j, :].shape[0]):
             approx = approximate_conditional_prob((self.grid[j, :])[i], self.sel_alg)
-            val = -(approx.minimize2(step=1, nstep=100)[::-1])[0]
+            val = -(approx.minimize2(step=1, nstep=200)[::-1])[0]
 
             if val != -float('Inf'):
                 h_hat.append(val)
-            else:
+            elif val == -float('Inf') and i == 0:
+                h_hat.append(-500.)
+            elif val == -float('Inf') and i > 0:
                 h_hat.append(h_hat[i - 1])
 
+            #sys.stderr.write("point on grid: " + str(i) + "\n")
+            #sys.stderr.write("value on grid: " + str(h_hat[i]) + "\n")
+
+        return np.array(h_hat)
+
     def area_normalized_density(self, j, mean):
 
         normalizer = 0.
-        grad_normalizer = 0.
         approx_nonnormalized = []
+        grad_normalizer = 0.
 
-        for i in range(self.grid.shape[0]):
+        for i in range(self.grid_length):
             approx_density = np.exp(-np.true_divide(((self.grid[j,:])[i] - mean) ** 2, 2 * self.norm[j])
                                     + (self.h_approx[j,:])[i])
             normalizer += approx_density
@@ -338,13 +415,13 @@ def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
 
         param = self.apply_offset(param)
 
-        approx_normalizer = self.area_normalized_density(j,param)
+        approx_normalizer = self.area_normalized_density(j, param)
 
-        f = (param**2)/(2*self.norm[j]) - (self.target_observed[j]*param)/self.norm[j] + \
+        f = (param ** 2) / (2 * self.norm[j]) - (self.target_observed[j] * param) / self.norm[j] + \
             log(approx_normalizer[1])
 
-        g = param/self.norm[j] - self.target_observed[j]/self.norm[j] + \
-            approx_normalizer[2]/approx_normalizer[1]
+        g = param / self.norm[j] - self.target_observed[j] / self.norm[j] + \
+            approx_normalizer[2] / approx_normalizer[1]
 
         if mode == 'func':
             return self.scale(f)
@@ -355,7 +432,7 @@ def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
         else:
             raise ValueError("mode incorrectly specified")
 
-    def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
+    def approx_MLE_solver(self, j, step=1, nstep=150, tol=1.e-5):
 
         current = self.target_observed[j]
         current_value = np.inf
@@ -391,11 +468,13 @@ def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
                 step *= 2
 
         value = objective(current)
+
         return current, value
 
     def approximate_ci(self, j):
 
-        param_grid = np.linspace(-15., 15., num=301)
+        grid_num = 301
+        param_grid = np.linspace(-10,10, num=grid_num)
         area = np.zeros(param_grid.shape[0])
 
         for k in range(param_grid.shape[0]):
@@ -413,4 +492,4 @@ def approximate_pvalue(self, j, param):
         area_vec = self.area_normalized_density(j, param)[0]
         area = area_vec[self.ind_obs[j]]
 
-        return 2*min(area, 1-area)
\ No newline at end of file
+        return 2*min(area, 1.-area)
\ No newline at end of file
diff --git a/selection/approx_ci/tests/plot_intervals.py b/selection/approx_ci/tests/plot_intervals.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index 8a007bd7b..699a62582 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -1,119 +1,122 @@
 from __future__ import print_function
 import numpy as np
-import time
+import sys
 import regreg.api as rr
-import selection.tests.reports as reports
 from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
-from selection.approx_ci.estimator_approx import M_estimator_approx
-
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+from selection.approx_ci.randomized_lasso import M_estimator_map, approximate_conditional_density
 from selection.randomized.query import naive_confidence_intervals
 from selection.randomized.query import naive_pvalues
 
-
-@register_report(['cover', 'ci_length', 'truth', 'naive_cover', 'naive_pvalues'])
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-@wait_for_return_value()
-def test_approximate_ci(n=100,
-                        p=10,
-                        s=3,
-                        snr=5,
-                        rho=0.1,
-                        lam_frac = 1.,
-                        loss='gaussian',
-                        randomizer='gaussian'):
+def test_approximate_inference(X,
+                               y,
+                               true_mean,
+                               sigma,
+                               seed_n = 0,
+                               lam_frac = 1.,
+                               loss='gaussian',
+                               randomization_scale = 1.):
 
     from selection.api import randomization
 
+    n, p = X.shape
+    np.random.seed(seed_n)
     if loss == "gaussian":
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
         loss = rr.glm.gaussian(X, y)
     elif loss == "logistic":
-        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
-        loss = rr.glm.logistic(X, y)
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
+        loss = rr.glm.logistic(X, y)
 
     epsilon = 1. / np.sqrt(n)
 
     W = np.ones(p) * lam
     penalty = rr.group_lasso(np.arange(p),
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
-    if randomizer=='gaussian':
-        randomization = randomization.isotropic_gaussian((p,), scale=1.)
-    elif randomizer=='laplace':
-        randomization = randomization.laplace((p,), scale=1.)
 
-    M_est = M_estimator_approx(loss, epsilon, penalty, randomization, randomizer)
-    M_est.solve_approx()
-    ci = approximate_conditional_density(M_est)
-    ci.solve_approx()
+    randomization = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    M_est = M_estimator_map(loss, epsilon, penalty, randomization, randomization_scale = randomization_scale)
 
+    M_est.solve_approx()
     active = M_est._overall
     active_set = np.asarray([i for i in range(p) if active[i]])
-
-    true_support = np.asarray([i for i in range(p) if i < s])
-
     nactive = np.sum(active)
+    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
+    sys.stderr.write("Active set selected by lasso" + str(active_set) + "\n")
+    sys.stderr.write("Observed target" + str(M_est.target_observed) + "\n")
 
-    print("active set, true_support", active_set, true_support)
-
-    true_vec = beta[active]
+    if nactive == 0:
+        return None
 
-    print("true coefficients", true_vec)
+    else:
+        true_vec = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
 
-    if (set(active_set).intersection(set(true_support)) == set(true_support))== True:
-
-        ci_active = np.zeros((nactive, 2))
-        #mle_active = np.zeros(nactive)
-        covered = np.zeros(nactive, np.bool)
-        ci_length = np.zeros(nactive)
-        pivots = np.zeros(nactive)
+        sys.stderr.write("True target to be covered" + str(true_vec) + "\n")
 
         class target_class(object):
             def __init__(self, target_cov):
                 self.target_cov = target_cov
                 self.shape = target_cov.shape
+
         target = target_class(M_est.target_cov)
 
         ci_naive = naive_confidence_intervals(target, M_est.target_observed)
         naive_pvals = naive_pvalues(target, M_est.target_observed, true_vec)
-        naive_covered = np.zeros(nactive)
-        toc = time.time()
-
-        for j in range(nactive):
-            ci_active[j, :] = np.array(ci.approximate_ci(j))
-            #mle_active[j] = ci.approx_MLE_solver(j, nstep= 100)[0]
-            if (ci_active[j, 0] <= true_vec[j]) and (ci_active[j,1] >= true_vec[j]):
-                covered[j] = 1
-            ci_length[j] = ci_active[j,1] - ci_active[j,0]
-            print(ci_active[j, :])
-            pivots[j] = ci.approximate_pvalue(j, true_vec[j])
 
-            # naive ci
-            if (ci_naive[j,0]<=true_vec[j]) and (ci_naive[j,1]>=true_vec[j]):
-                naive_covered[j]+=1
+        ci = approximate_conditional_density(M_est)
+        ci.solve_approx()
 
-        tic = time.time()
-        print('ci time now', tic - toc)
-        return covered, ci_length, pivots, naive_covered, naive_pvals
+        ci_sel = np.zeros((nactive, 2))
+        sel_MLE = np.zeros(nactive)
+        sel_length = np.zeros(nactive)
 
+        for j in range(nactive):
+            ci_sel[j, :] = np.array(ci.approximate_ci(j))
+            sel_MLE[j] = ci.approx_MLE_solver(j, step=1, nstep=150)[0]
+            sel_length[j] = ci_sel[j, 1] - ci_sel[j, 0]
 
-def report(niter=50, **kwargs):
-
-    kwargs = {'s': 0, 'n': 200, 'p': 30, 'snr': 7, 'loss': 'gaussian', 'randomizer':'gaussian'}
-    split_report = reports.reports['test_approximate_ci']
-    screened_results = reports.collect_multiple_runs(split_report['test'],
-                                                     split_report['columns'],
-                                                     niter,
-                                                     reports.summarize_all,
-                                                     **kwargs)
+        sel_covered = np.zeros(nactive, np.bool)
+        sel_risk = np.zeros(nactive)
+        naive_covered = np.zeros(nactive)
+        naive_risk = np.zeros(nactive)
 
-    fig = reports.pivot_plot_plus_naive(screened_results)
-    fig.savefig('approx_pivots_glm.pdf')
+        for j in range(nactive):
 
+            sel_risk[j] = (sel_MLE[j] - true_vec[j]) ** 2.
+            naive_risk[j] = (M_est.target_observed[j]- true_vec[j]) ** 2.
+
+            if (ci_sel[j, 0] <= true_vec[j]) and (ci_sel[j, 1] >= true_vec[j]):
+                sel_covered[j] = 1
+            if (ci_naive[j, 0] <= true_vec[j]) and (ci_naive[j, 1] >= true_vec[j]):
+                naive_covered[j] = 1
+
+        print("lengths", sel_length.sum()/nactive)
+        print("selective intervals", ci_sel.T)
+        print("risks", sel_risk.sum() / nactive)
+
+        return np.transpose(np.vstack((ci_sel[:, 0],
+                                       ci_sel[:, 1],
+                                       ci_naive[:,0],
+                                       ci_naive[:, 1],
+                                       sel_MLE,
+                                       M_est.target_observed,
+                                       sel_covered,
+                                       naive_covered,
+                                       sel_risk,
+                                       naive_risk)))
+
+
+def test_lasso(n, p, s, signal):
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+    true_mean = X.dot(beta)
+    lasso = test_approximate_inference(X,
+                                       y,
+                                       true_mean,
+                                       sigma,
+                                       seed_n=0,
+                                       lam_frac=1.,
+                                       loss='gaussian')
+
+    if lasso is not None:
+        print("output of selection adjusted inference", lasso)
+        return(lasso)
 
-if __name__=='__main__':
-    report()
\ No newline at end of file

From 6320b82eac1de45a0d802b291f83b1d0871a100c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51sk9f.SUNet>
Date: Thu, 31 Aug 2017 22:27:31 -0700
Subject: [PATCH 156/617] updated gradient in log cube prob for fs

---
 selection/approx_ci/ci_approx_greedy_step.py  | 222 +++++++++++++++---
 selection/approx_ci/ci_via_approx_density.py  |  58 +++++
 selection/approx_ci/tests/test_glm.py         |   4 +-
 selection/approx_ci/tests/test_greedy_step.py |  35 ++-
 4 files changed, 261 insertions(+), 58 deletions(-)

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index b97e46f40..3fff7849d 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -1,10 +1,149 @@
 from math import log
 import numpy as np
+import sys
 import regreg.api as rr
-from selection.bayesian.selection_probability_rr import nonnegative_softmax_scaled
 from scipy.stats import norm
+from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
+from selection.randomized.greedy_step import greedy_score_step
 
 
+class greedy_score_step_map(greedy_score_step):
+    def __init__(self, loss,
+                 penalty,
+                 active_groups,
+                 inactive_groups,
+                 randomization,
+                 randomization_scale=1.):
+
+        greedy_score_step.__init__(self, loss,
+                                   penalty,
+                                   active_groups,
+                                   inactive_groups,
+                                   randomization)
+
+        self.randomization_scale = randomization_scale
+
+    def solve_approx(self):
+        self.solve()
+        self.setup_sampler()
+        p = self.inactive.sum()
+        self.feasible_point = self.observed_scaling
+        self._overall = np.zeros(p, dtype=bool)
+        # print(self.selection_variable['variables'])
+        self._overall[self.selection_variable['variables']] = 1
+
+        self.observed_opt_state = np.hstack([self.observed_scaling, self.observed_subgradients])
+
+        _opt_linear_term = np.concatenate((np.atleast_2d(self.maximizing_subgrad).T, self.losing_padding_map), 1)
+        self._opt_linear_term = np.concatenate(
+            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
+
+        self.opt_transform = (self._opt_linear_term, np.zeros(p))
+
+        (self._score_linear_term, _) = self.score_transform
+
+        self.inactive_lagrange = self.observed_scaling * self.penalty.weights[0] * np.ones(p - 1)
+
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self.active,
+                                              inactive=~self.active)[0]
+
+        bootstrap_target, target_observed = pairs_bootstrap_glm(self.loss,
+                                                                self._overall,
+                                                                beta_full=None,
+                                                                inactive=None)
+
+        sampler = lambda: np.random.choice(n, size=(n,), replace=True)
+        self.target_cov, target_score_cov = bootstrap_cov(sampler, bootstrap_target, cross_terms=(bootstrap_score,))
+        self.score_target_cov = np.atleast_2d(target_score_cov).T
+        self.target_observed = target_observed
+
+        nactive = self._overall.sum()
+        self.nactive = nactive
+
+        self.B_active = self._opt_linear_term[:nactive, :nactive]
+        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
+
+    def setup_map(self, j):
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+
+        self.offset_active = self.null_statistic[:self.nactive]
+        self.offset_inactive = self.null_statistic[self.nactive:]
+
+
+class nonnegative_softmax_scaled(rr.smooth_atom):
+    """
+    The nonnegative softmax objective
+    .. math::
+         \mu \mapsto
+         \sum_{i=1}^{m} \log \left(1 +
+         \frac{1}{\mu_i} \right)
+    """
+
+    objective_template = r"""\text{nonneg_softmax}\left(%(var)s\right)"""
+
+    def __init__(self,
+                 shape,
+                 barrier_scale=1.,
+                 coef=1.,
+                 offset=None,
+                 quadratic=None,
+                 initial=None):
+
+        rr.smooth_atom.__init__(self,
+                                shape,
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=initial,
+                                coef=coef)
+
+        # a feasible point
+        self.coefs[:] = np.ones(shape)
+        self.barrier_scale = barrier_scale
+
+    def smooth_objective(self, mean_param, mode='both', check_feasibility=False):
+        """
+        Evaluate the smooth objective, computing its value, gradient or both.
+        Parameters
+        ----------
+        mean_param : ndarray
+            The current parameter values.
+        mode : str
+            One of ['func', 'grad', 'both'].
+        check_feasibility : bool
+            If True, return `np.inf` when
+            point is not feasible, i.e. when `mean_param` is not
+            in the domain.
+        Returns
+        -------
+        If `mode` is 'func' returns just the objective value
+        at `mean_param`, else if `mode` is 'grad' returns the gradient
+        else returns both.
+        """
+
+        slack = self.apply_offset(mean_param)
+
+        if mode in ['both', 'func']:
+            if np.all(slack > 0):
+                f = self.scale(np.log((slack + self.barrier_scale) / slack).sum())
+            else:
+                f = np.inf
+        if mode in ['both', 'grad']:
+            g = self.scale(1. / (slack + self.barrier_scale) - 1. / slack)
+
+        if mode == 'both':
+            return f, g
+        elif mode == 'grad':
+            return g
+        elif mode == 'func':
+            return f
+        else:
+            raise ValueError("mode incorrectly specified")
+
 class neg_log_cube_probability_fs(rr.smooth_atom):
     def __init__(self,
                  q, #equals p - E in our case
@@ -51,10 +190,11 @@ def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6)
 
         log_cube_grad_vec[pos_index] = ((1. + prod_arg[pos_index]) /
                                     ((prod_arg[pos_index] / arg_u[pos_index]) +
-                                     (1. / arg_l[pos_index]))) / (self.randomization_scale ** 2)
+                                     (1. / arg_l[pos_index]))) / (self.randomization_scale)
 
-        log_cube_grad_vec[neg_index] = ((arg_u[neg_index] - (arg_l[neg_index] * neg_prod_arg[neg_index]))
-                                    / (self.randomization_scale ** 2)) / (1. + neg_prod_arg[neg_index])
+        log_cube_grad_vec[neg_index] = ((1. + neg_prod_arg[neg_index]) /
+                                    (-(neg_prod_arg[neg_index] / arg_l[neg_index]) +
+                                     (1. / arg_u[neg_index]))) / (self.randomization_scale)
 
         log_cube_grad = log_cube_grad_vec.sum()
 
@@ -169,10 +309,7 @@ def sel_prob_smooth_objective(self, param, mode='both', check_feasibility=False)
         active_conj_loss = rr.affine_smooth(self.active_conjugate,
                                             rr.affine_transform(self.map.B_active, offset_active))
 
-        #if self.map.randomizer == 'laplace':
-        #    cube_obj = neg_log_cube_probability_laplace(self.q, self.inactive_lagrange, randomization_scale = 1.)
-        #elif self.map.randomizer == 'gaussian':
-        cube_loss = neg_log_cube_probability_fs(self.q, offset_inactive, randomization_scale = 1.)
+        cube_loss = neg_log_cube_probability_fs(self.q, offset_inactive, randomization_scale = self.map.randomization_scale)
 
         total_loss = rr.smooth_sum([active_conj_loss,
                                     cube_loss,
@@ -268,52 +405,63 @@ def __init__(self, sel_alg,
 
     def solve_approx(self):
 
-        #defining the grid on which marginal conditional densities will be evaluated
-        grid_length = 201
-        self.grid = np.linspace(-5, 15, num=grid_length)
-        #self.grid = np.linspace(-5*np.amax(np.absolute(target_observed)), 5*np.amax(np.absolute(target_observed)), num=grid_length)
-        #s_obs = np.round(self.target_observed, decimals =1)
+        self.grid_length = 241
 
-        print("observed values", self.target_observed)
+        # print("observed values", self.target_observed)
         self.ind_obs = np.zeros(self.nactive, int)
         self.norm = np.zeros(self.nactive)
-        self.h_approx = np.zeros((self.nactive, self.grid.shape[0]))
+        self.h_approx = np.zeros((self.nactive, self.grid_length))
+        self.grid = np.zeros((self.nactive, self.grid_length))
 
         for j in range(self.nactive):
             obs = self.target_observed[j]
-            self.norm[j] = self.target_cov[j,j]
-            if obs < self.grid[0]:
+
+            self.grid[j, :] = np.linspace(self.target_observed[j] - 12., self.target_observed[j] + 12.,
+                                          num=self.grid_length)
+
+            self.norm[j] = self.target_cov[j, j]
+            if obs < self.grid[j, 0]:
                 self.ind_obs[j] = 0
-            elif obs > np.max(self.grid):
-                self.ind_obs[j] = grid_length-1
+            elif obs > np.max(self.grid[j, :]):
+                self.ind_obs[j] = self.grid_length - 1
             else:
-                self.ind_obs[j] = np.argmin(np.abs(self.grid-obs))
-            self.h_approx[j, :] = self.approx_conditional_prob(j)
+                self.ind_obs[j] = np.argmin(np.abs(self.grid[j, :] - obs))
 
+            sys.stderr.write("number of variable being computed: " + str(j) + "\n")
+            self.h_approx[j, :] = self.approx_conditional_prob(j)
 
     def approx_conditional_prob(self, j):
         h_hat = []
 
         self.sel_alg.setup_map(j)
 
-        for i in range(self.grid.shape[0]):
+        for i in range(self.grid[j, :].shape[0]):
+            approx = approximate_conditional_prob_fs((self.grid[j, :])[i], self.sel_alg)
+            val = -(approx.minimize2(step=1, nstep=200)[::-1])[0]
+
+            if val != -float('Inf'):
+                h_hat.append(val)
+            elif val == -float('Inf') and i == 0:
+                h_hat.append(-500.)
+            elif val == -float('Inf') and i > 0:
+                h_hat.append(h_hat[i - 1])
 
-            approx = approximate_conditional_prob_fs(self.grid[i], self.sel_alg)
-            h_hat.append(-(approx.minimize2(j, nstep=50)[::-1])[0])
+        # sys.stderr.write("point on grid: " + str(i) + "\n")
+        # sys.stderr.write("value on grid: " + str(h_hat[i]) + "\n")
 
         return np.array(h_hat)
 
     def area_normalized_density(self, j, mean):
 
         normalizer = 0.
-        grad_normalizer = 0.
         approx_nonnormalized = []
+        grad_normalizer = 0.
 
-        for i in range(self.grid.shape[0]):
-            approx_density = np.exp(-np.true_divide((self.grid[i] - mean) ** 2, 2 * self.norm[j])
+        for i in range(self.grid_length):
+            approx_density = np.exp(-np.true_divide(((self.grid[j,:])[i] - mean) ** 2, 2 * self.norm[j])
                                     + (self.h_approx[j,:])[i])
             normalizer += approx_density
-            grad_normalizer +=  (-mean/self.norm[j] + self.grid[i]/self.norm[j])* approx_density
+            grad_normalizer += (-mean / self.norm[j] + (self.grid[j, :])[i] / self.norm[j]) * approx_density
             approx_nonnormalized.append(approx_density)
 
         return np.cumsum(np.array(approx_nonnormalized / normalizer)), normalizer, grad_normalizer
@@ -322,13 +470,13 @@ def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
 
         param = self.apply_offset(param)
 
-        approx_normalizer = self.area_normalized_density(j,param)
+        approx_normalizer = self.area_normalized_density(j, param)
 
-        f = (param**2)/(2*self.norm[j]) - (self.target_observed[j]*param)/self.norm[j] + \
+        f = (param ** 2) / (2 * self.norm[j]) - (self.target_observed[j] * param) / self.norm[j] + \
             log(approx_normalizer[1])
 
-        g = param/self.norm[j] - self.target_observed[j]/self.norm[j] + \
-            approx_normalizer[2]/approx_normalizer[1]
+        g = param / self.norm[j] - self.target_observed[j] / self.norm[j] + \
+            approx_normalizer[2] / approx_normalizer[1]
 
         if mode == 'func':
             return self.scale(f)
@@ -339,7 +487,7 @@ def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
         else:
             raise ValueError("mode incorrectly specified")
 
-    def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
+    def approx_MLE_solver(self, j, step=1, nstep=150, tol=1.e-5):
 
         current = self.target_observed[j]
         current_value = np.inf
@@ -375,13 +523,13 @@ def approx_MLE_solver(self, j, step=1, nstep=100, tol=1.e-5):
                 step *= 2
 
         value = objective(current)
+
         return current, value
 
     def approximate_ci(self, j):
 
-        grid_length = 201
-        #param_grid = np.linspace(-5*np.amax(np.absolute(self.target_observed)), 5*np.amax(np.absolute(self.target_observed)), num=grid_length)
-        param_grid = np.linspace(-5, 15, num=201)
+        grid_num = 301
+        param_grid = np.linspace(-10,10, num=grid_num)
         area = np.zeros(param_grid.shape[0])
 
         for k in range(param_grid.shape[0]):
@@ -399,4 +547,4 @@ def approximate_pvalue(self, j, param):
         area_vec = self.area_normalized_density(j, param)[0]
         area = area_vec[self.ind_obs[j]]
 
-        return 2*min(area, 1-area)
+        return 2*min(area, 1.-area)
\ No newline at end of file
diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index 1b34448b4..45dbc1d23 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -131,6 +131,64 @@ def smooth_objective(self, mean_param, mode='both', check_feasibility=False):
         else:
             raise ValueError("mode incorrectly specified")
 
+class neg_log_cube_probability_laplace(rr.smooth_atom):
+    def __init__(self,
+                 q, #equals p - E in our case
+                 lagrange,
+                 randomization_scale = 1., #equals the randomization variance in our case
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+
+        self.b = randomization_scale
+        self.lagrange = lagrange
+        self.q = q
+
+        rr.smooth_atom.__init__(self,
+                                (self.q,),
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=None,
+                                coef=coef)
+
+    def smooth_objective(self, arg, mode='both', check_feasibility=False, tol=1.e-6):
+
+        arg = self.apply_offset(arg)
+
+        arg_u = (arg + self.lagrange)/self.b
+        arg_l = (arg - self.lagrange)/self.b
+        scaled_lagrange = (2* self.lagrange)/self.b
+
+        ind_arg_1 = np.zeros(self.q, bool)
+        ind_arg_1[(arg_u <0.)] = 1
+        ind_arg_2 = np.zeros(self.q, bool)
+        ind_arg_2[(arg_l >0.)] = 1
+        ind_arg_3 = np.logical_and(~ind_arg_1, ~ind_arg_2)
+        cube_prob = np.zeros(self.q)
+        cube_prob[ind_arg_1] = np.exp(arg_u[ind_arg_1])/2. - np.exp(arg_l[ind_arg_1])/2.
+        cube_prob[ind_arg_2] = -np.exp(-arg_u[ind_arg_2])/2. + np.exp(-arg_l[ind_arg_2])/2.
+        cube_prob[ind_arg_3] = 1- np.exp(-arg_u[ind_arg_3])/2. - np.exp(arg_l[ind_arg_3])/2.
+        neg_log_cube_prob = -np.log(cube_prob).sum()
+
+        log_cube_grad = np.zeros(self.q)
+        log_cube_grad[ind_arg_1] = 1./self.b
+        log_cube_grad[ind_arg_2] = np.true_divide((np.exp(-scaled_lagrange[ind_arg_2])+ 1.)/self.b,
+                                                  np.exp(-scaled_lagrange[ind_arg_2])-1.)
+        num_cube_grad = np.true_divide(np.exp(-scaled_lagrange[ind_arg_3]), 2 * self.b) - \
+                        np.true_divide(np.exp((2* arg_l[ind_arg_3])), 2 * self.b)
+        den_cube_grad = np.exp(arg_l[ind_arg_3]) - np.exp(-scaled_lagrange[ind_arg_3])/2. - \
+                        np.exp(2* arg_l[ind_arg_3])/2.
+        log_cube_grad[ind_arg_3] = np.true_divide(num_cube_grad,den_cube_grad)
+        neg_log_cube_grad = -log_cube_grad
+
+        if mode == 'func':
+            return self.scale(neg_log_cube_prob)
+        elif mode == 'grad':
+            return self.scale(neg_log_cube_grad)
+        elif mode == 'both':
+            return self.scale(neg_log_cube_prob), self.scale(neg_log_cube_grad)
+        else:
+            raise ValueError("mode incorrectly specified")
 
 class neg_log_cube_probability(rr.smooth_atom):
     def __init__(self,
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index 699a62582..fa90e7f0b 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -3,7 +3,8 @@
 import sys
 import regreg.api as rr
 from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.randomized_lasso import M_estimator_map, approximate_conditional_density
+from selection.approx_ci.randomized_lasso import (M_estimator_map,
+                                                  approximate_conditional_density)
 from selection.randomized.query import naive_confidence_intervals
 from selection.randomized.query import naive_pvalues
 
@@ -17,7 +18,6 @@ def test_approximate_inference(X,
                                randomization_scale = 1.):
 
     from selection.api import randomization
-
     n, p = X.shape
     np.random.seed(seed_n)
     if loss == "gaussian":
diff --git a/selection/approx_ci/tests/test_greedy_step.py b/selection/approx_ci/tests/test_greedy_step.py
index 9d50d3446..500918785 100644
--- a/selection/approx_ci/tests/test_greedy_step.py
+++ b/selection/approx_ci/tests/test_greedy_step.py
@@ -1,29 +1,29 @@
 from __future__ import print_function
 import numpy as np
-import time
 import regreg.api as rr
 from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_approx_greedy_step import neg_log_cube_probability_fs, approximate_conditional_prob_fs, \
-    approximate_conditional_density
-from selection.approx_ci.estimator_approx import greedy_score_step_approx
-
-def test_approximate_ci(n=100,
-                        p=10,
-                        s=0,
-                        snr=5,
-                        rho=0.1,
-                        lam_frac = 1.,
-                        loss='gaussian',
-                        randomizer='gaussian'):
+from selection.approx_ci.ci_approx_greedy_step import (greedy_score_step_map,
+                                                       approximate_conditional_density)
 
-    from selection.api import randomization
+from selection.randomized.query import naive_confidence_intervals
+from selection.randomized.query import naive_pvalues
+
+def test_approximate_inference(X,
+                               y,
+                               true_mean,
+                               sigma,
+                               seed_n = 0,
+                               lam_frac = 1.,
+                               loss='gaussian',
+                               randomization_scale = 1.):
 
+    from selection.api import randomization
+    n, p = X.shape
+    np.random.seed(seed_n)
     if loss == "gaussian":
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
         loss = rr.glm.gaussian(X, y)
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
     elif loss == "logistic":
-        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
         loss = rr.glm.logistic(X, y)
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
 
@@ -68,8 +68,6 @@ def test_approximate_ci(n=100,
         ci_length = np.zeros(nactive)
         pivots = np.zeros(nactive)
 
-        toc = time.time()
-
         for j in range(nactive):
             ci_active[j, :] = np.array(ci.approximate_ci(j))
             if (ci_active[j, 0] <= true_vec[j]) and (ci_active[j, 1] >= true_vec[j]):
@@ -79,7 +77,6 @@ def test_approximate_ci(n=100,
             pivots[j] = ci.approximate_pvalue(j, true_vec[j])
 
         print("confidence intervals", ci_active)
-        tic = time.time()
         print('ci time now', tic - toc)
 
 

From 239543e574e1a11854d43d652d399914ef4616a2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51sk9f.SUNet>
Date: Thu, 31 Aug 2017 22:56:49 -0700
Subject: [PATCH 157/617] updated fs test

---
 selection/approx_ci/ci_approx_greedy_step.py  |  30 ++--
 selection/approx_ci/tests/test_glm.py         |   8 +-
 selection/approx_ci/tests/test_greedy_step.py | 130 +++++++++++-------
 3 files changed, 101 insertions(+), 67 deletions(-)

diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index 3fff7849d..317610936 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -7,13 +7,13 @@
 from selection.randomized.greedy_step import greedy_score_step
 
 
-class greedy_score_step_map(greedy_score_step):
+class greedy_score_map(greedy_score_step):
     def __init__(self, loss,
-                 penalty,
-                 active_groups,
-                 inactive_groups,
-                 randomization,
-                 randomization_scale=1.):
+                       penalty,
+                       active_groups,
+                       inactive_groups,
+                       randomization,
+                       randomization_scale=1.):
 
         greedy_score_step.__init__(self, loss,
                                    penalty,
@@ -26,7 +26,9 @@ def __init__(self, loss,
     def solve_approx(self):
         self.solve()
         self.setup_sampler()
-        p = self.inactive.sum()
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
         self.feasible_point = self.observed_scaling
         self._overall = np.zeros(p, dtype=bool)
         # print(self.selection_variable['variables'])
@@ -44,9 +46,6 @@ def solve_approx(self):
 
         self.inactive_lagrange = self.observed_scaling * self.penalty.weights[0] * np.ones(p - 1)
 
-        X, _ = self.loss.data
-        n, p = X.shape
-        self.p = p
         bootstrap_score = pairs_bootstrap_glm(self.loss,
                                               self.active,
                                               inactive=~self.active)[0]
@@ -405,7 +404,7 @@ def __init__(self, sel_alg,
 
     def solve_approx(self):
 
-        self.grid_length = 241
+        self.grid_length = 301
 
         # print("observed values", self.target_observed)
         self.ind_obs = np.zeros(self.nactive, int)
@@ -416,8 +415,7 @@ def solve_approx(self):
         for j in range(self.nactive):
             obs = self.target_observed[j]
 
-            self.grid[j, :] = np.linspace(self.target_observed[j] - 12., self.target_observed[j] + 12.,
-                                          num=self.grid_length)
+            self.grid[j, :] = np.linspace(-15.,15.,num=self.grid_length)
 
             self.norm[j] = self.target_cov[j, j]
             if obs < self.grid[j, 0]:
@@ -446,8 +444,8 @@ def approx_conditional_prob(self, j):
             elif val == -float('Inf') and i > 0:
                 h_hat.append(h_hat[i - 1])
 
-        # sys.stderr.write("point on grid: " + str(i) + "\n")
-        # sys.stderr.write("value on grid: " + str(h_hat[i]) + "\n")
+            sys.stderr.write("point on grid: " + str(i) + "\n")
+            sys.stderr.write("value on grid: " + str(h_hat[i]) + "\n")
 
         return np.array(h_hat)
 
@@ -529,7 +527,7 @@ def approx_MLE_solver(self, j, step=1, nstep=150, tol=1.e-5):
     def approximate_ci(self, j):
 
         grid_num = 301
-        param_grid = np.linspace(-10,10, num=grid_num)
+        param_grid = np.linspace(-15,15, num=grid_num)
         area = np.zeros(param_grid.shape[0])
 
         for k in range(param_grid.shape[0]):
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index fa90e7f0b..cab1f1dcd 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -6,7 +6,6 @@
 from selection.approx_ci.randomized_lasso import (M_estimator_map,
                                                   approximate_conditional_density)
 from selection.randomized.query import naive_confidence_intervals
-from selection.randomized.query import naive_pvalues
 
 def test_approximate_inference(X,
                                y,
@@ -60,7 +59,8 @@ def __init__(self, target_cov):
         target = target_class(M_est.target_cov)
 
         ci_naive = naive_confidence_intervals(target, M_est.target_observed)
-        naive_pvals = naive_pvalues(target, M_est.target_observed, true_vec)
+        naive_covered = np.zeros(nactive)
+        naive_risk = np.zeros(nactive)
 
         ci = approximate_conditional_density(M_est)
         ci.solve_approx()
@@ -76,8 +76,6 @@ def __init__(self, target_cov):
 
         sel_covered = np.zeros(nactive, np.bool)
         sel_risk = np.zeros(nactive)
-        naive_covered = np.zeros(nactive)
-        naive_risk = np.zeros(nactive)
 
         for j in range(nactive):
 
@@ -91,7 +89,7 @@ def __init__(self, target_cov):
 
         print("lengths", sel_length.sum()/nactive)
         print("selective intervals", ci_sel.T)
-        print("risks", sel_risk.sum() / nactive)
+        print("risks", sel_risk.sum()/nactive)
 
         return np.transpose(np.vstack((ci_sel[:, 0],
                                        ci_sel[:, 1],
diff --git a/selection/approx_ci/tests/test_greedy_step.py b/selection/approx_ci/tests/test_greedy_step.py
index 500918785..fd3fba50e 100644
--- a/selection/approx_ci/tests/test_greedy_step.py
+++ b/selection/approx_ci/tests/test_greedy_step.py
@@ -1,16 +1,16 @@
 from __future__ import print_function
+import sys
 import numpy as np
 import regreg.api as rr
 from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_approx_greedy_step import (greedy_score_step_map,
+from selection.approx_ci.ci_approx_greedy_step import (greedy_score_map,
                                                        approximate_conditional_density)
 
 from selection.randomized.query import naive_confidence_intervals
-from selection.randomized.query import naive_pvalues
 
 def test_approximate_inference(X,
                                y,
-                               true_mean,
+                               beta,
                                sigma,
                                seed_n = 0,
                                lam_frac = 1.,
@@ -27,57 +27,95 @@ def test_approximate_inference(X,
         loss = rr.glm.logistic(X, y)
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
 
-    if randomizer == 'gaussian':
-        randomization = randomization.isotropic_gaussian((p,), scale=1.)
-    elif randomizer == 'laplace':
-        randomization = randomization.laplace((p,), scale=1.)
+    randomization = randomization.isotropic_gaussian((p,), scale=randomization_scale)
 
     W = np.ones(p) * lam
     penalty = rr.group_lasso(np.arange(p),
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-    # active_bool = np.zeros(p, np.bool)
-    # active_bool[range(3)] = 1
-    # inactive_bool = ~active_bool
-
-    GS = greedy_score_step_approx(loss,
-                                  penalty,
-                                  np.zeros(p, dtype=bool),
-                                  np.ones(p, dtype=bool),
-                                  randomization,
-                                  randomizer)
+    GS = greedy_score_map(loss,
+                          penalty,
+                          np.zeros(p, dtype=bool),
+                          np.ones(p, dtype=bool),
+                          randomization,
+                          randomization_scale)
 
     GS.solve_approx()
     active = GS._overall
-    print("nactive", active.sum())
-
-    ci = approximate_conditional_density(GS)
-    ci.solve_approx()
-
-    active_set = np.asarray([i for i in range(p) if active[i]])
-    true_support = np.asarray([i for i in range(p) if i < s])
     nactive = np.sum(active)
-    print("active set, true_support", active_set, true_support)
-    true_vec = beta[active]
-    print("true coefficients", true_vec)
-
-    if (set(active_set).intersection(set(true_support)) == set(true_support)) == True:
-
-        ci_active = np.zeros((nactive, 2))
-        covered = np.zeros(nactive, np.bool)
-        ci_length = np.zeros(nactive)
-        pivots = np.zeros(nactive)
-
-        for j in range(nactive):
-            ci_active[j, :] = np.array(ci.approximate_ci(j))
-            if (ci_active[j, 0] <= true_vec[j]) and (ci_active[j, 1] >= true_vec[j]):
-                covered[j] = 1
-            ci_length[j] = ci_active[j, 1] - ci_active[j, 0]
-            # print(ci_active[j, :])
-            pivots[j] = ci.approximate_pvalue(j, true_vec[j])
-
-        print("confidence intervals", ci_active)
-        print('ci time now', tic - toc)
 
+    if nactive == 0:
+        return None
+    else:
+        active_set = np.asarray([i for i in range(p) if active[i]])
+        s = beta.sum()
+        true_support = np.asarray([i for i in range(p) if i < s])
+        true_vec = beta[active]
+
+        if (set(active_set).intersection(set(true_support)) == set(true_support)) == True:
+            ci = approximate_conditional_density(GS)
+            ci.solve_approx()
+            sys.stderr.write("True target to be covered" + str(true_vec) + "\n")
+
+            class target_class(object):
+                def __init__(self, target_cov):
+                    self.target_cov = target_cov
+                    self.shape = target_cov.shape
+
+            target = target_class(GS.target_cov)
+            ci_naive = naive_confidence_intervals(target, GS.target_observed)
+            naive_covered = np.zeros(nactive)
+            naive_risk = np.zeros(nactive)
+
+            ci_sel = np.zeros((nactive, 2))
+            sel_MLE = np.zeros(nactive)
+            sel_length = np.zeros(nactive)
+
+            for j in range(nactive):
+                ci_sel[j, :] = np.array(ci.approximate_ci(j))
+                sel_MLE[j] = ci.approx_MLE_solver(j, step=1, nstep=150)[0]
+                sel_length[j] = ci_sel[j, 1] - ci_sel[j, 0]
+
+            sel_covered = np.zeros(nactive, np.bool)
+            sel_risk = np.zeros(nactive)
+
+            for j in range(nactive):
+
+                sel_risk[j] = (sel_MLE[j] - true_vec[j]) ** 2.
+                naive_risk[j] = (GS.target_observed[j] - true_vec[j]) ** 2.
+
+                if (ci_sel[j, 0] <= true_vec[j]) and (ci_sel[j, 1] >= true_vec[j]):
+                    sel_covered[j] = 1
+                if (ci_naive[j, 0] <= true_vec[j]) and (ci_naive[j, 1] >= true_vec[j]):
+                    naive_covered[j] = 1
+
+            print("lengths", sel_length.sum() / nactive)
+            print("selective intervals", ci_sel.T)
+            print("risks", sel_risk.sum() / nactive)
+
+            return np.transpose(np.vstack((ci_sel[:, 0],
+                                           ci_sel[:, 1],
+                                           ci_naive[:, 0],
+                                           ci_naive[:, 1],
+                                           sel_MLE,
+                                           GS.target_observed,
+                                           sel_covered,
+                                           naive_covered,
+                                           sel_risk,
+                                           naive_risk)))
+
+
+def test_greedy_step(n, p, s, signal):
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+    greedy_step = test_approximate_inference(X,
+                                             y,
+                                             beta,
+                                             sigma,
+                                             seed_n=0,
+                                             lam_frac=1.,
+                                             loss='gaussian')
+
+    if greedy_step is not None:
+        print("output of selection adjusted inference", greedy_step)
+        return(greedy_step)
 
-test_approximate_ci()

From 9ad1018564f24a2b2bac50223f5a8b7df62b5b65 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51sk9f.SUNet>
Date: Thu, 31 Aug 2017 23:11:12 -0700
Subject: [PATCH 158/617] made a test for HIV data

---
 selection/approx_ci/ci_via_approx_density.py  |   2 +-
 .../approx_ci/tests/inference_hiv_data.py     | 308 +++++-------------
 selection/approx_ci/tests/test_glm.py         |   1 +
 selection/approx_ci/tests/test_greedy_step.py |   1 +
 selection/approx_ci/tests/test_mle_approx.py  |  69 ----
 5 files changed, 92 insertions(+), 289 deletions(-)
 delete mode 100644 selection/approx_ci/tests/test_mle_approx.py

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index 45dbc1d23..3f59da487 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -532,7 +532,7 @@ def approx_MLE_solver(self, j, step=1, nstep=150, tol=1.e-5):
     def approximate_ci(self, j):
 
         grid_num = 301
-        param_grid = np.linspace(-10,10, num=grid_num)
+        param_grid = np.linspace(-15.,15., num=grid_num)
         area = np.zeros(param_grid.shape[0])
 
         for k in range(param_grid.shape[0]):
diff --git a/selection/approx_ci/tests/inference_hiv_data.py b/selection/approx_ci/tests/inference_hiv_data.py
index 3eb9fd2ca..49a311a7c 100644
--- a/selection/approx_ci/tests/inference_hiv_data.py
+++ b/selection/approx_ci/tests/inference_hiv_data.py
@@ -1,225 +1,95 @@
 from __future__ import print_function
 import os, numpy as np, pandas, statsmodels.api as sm
-import time
 import regreg.api as rr
-from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
-from selection.approx_ci.estimator_approx import M_estimator_approx
+from selection.approx_ci.ci_via_approx_density import (M_estimator_map,
+                                                      approximate_conditional_density)
 
 from selection.randomized.query import naive_confidence_intervals
-from selection.api import randomization
-import matplotlib.pyplot as plt
 
-
-if not os.path.exists("NRTI_DATA.txt"):
-    NRTI = pandas.read_table("http://hivdb.stanford.edu/pages/published_analysis/genophenoPNAS2006/DATA/NRTI_DATA.txt", na_values="NA")
-else:
-    NRTI = pandas.read_table("NRTI_DATA.txt")
-
-NRTI_specific = []
-NRTI_muts = []
-mixtures = np.zeros(NRTI.shape[0])
-for i in range(1,241):
-    d = NRTI['P%d' % i]
-    for mut in np.unique(d):
-        if mut not in ['-','.'] and len(mut) == 1:
-            test = np.equal(d, mut)
-            if test.sum() > 10:
-                NRTI_specific.append(np.array(np.equal(d, mut)))
-                NRTI_muts.append("P%d%s" % (i,mut))
-
-NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts)
-
-X_NRTI = np.array(NRTI_specific, np.float)
-Y = NRTI['3TC'] # shorthand
-keep = ~np.isnan(Y).astype(np.bool)
-X_NRTI = X_NRTI[np.nonzero(keep)]; Y=Y[keep]
-Y = np.array(np.log(Y), np.float); Y -= Y.mean()
-X_NRTI -= X_NRTI.mean(0)[None, :]; X_NRTI /= X_NRTI.std(0)[None,:]
-X = X_NRTI # shorthand
-n, p = X.shape
-X /= np.sqrt(n)
-
-ols_fit = sm.OLS(Y, X).fit()
-sigma_3TC = np.linalg.norm(ols_fit.resid) / np.sqrt(n-p-1)
-OLS_3TC = ols_fit.params
-
-lam_frac = 1.
-loss = rr.glm.gaussian(X, Y)
-epsilon = 1. / np.sqrt(n)
-lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_3TC
-print(lam)
-
-W = np.ones(p) * lam
-penalty = rr.group_lasso(np.arange(p),weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-randomization = randomization.isotropic_gaussian((p,), scale=1.)
-
-M_est = M_estimator_approx(loss, epsilon, penalty, randomization, randomizer='gaussian')
-M_est.solve_approx()
-active = M_est._overall
-active_set = np.asarray([i for i in range(p) if active[i]])
-nactive = np.sum(active)
-
-active_set_0 = [NRTI_muts[i] for i in range(p) if active[i]]
-
-ci_active = np.zeros((nactive, 2))
-ci_length = np.zeros(nactive)
-mle_active = np.zeros((nactive,1))
-
-ci = approximate_conditional_density(M_est)
-ci.solve_approx()
-
-class target_class(object):
-    def __init__(self, target_cov):
-        self.target_cov = target_cov
-        self.shape = target_cov.shape
-
-
-target = target_class(M_est.target_cov)
-ci_naive = naive_confidence_intervals(target, M_est.target_observed)
-
-for j in range(nactive):
-    ci_active[j, :] = np.array(ci.approximate_ci(j))
-    ci_length[j] = ci_active[j,1] - ci_active[j,0]
-    mle_active[j, :] = ci.approx_MLE_solver(j, nstep=100)[0]
-
-unadjusted_mle = np.zeros((nactive,1))
-for j in range(nactive):
-    unadjusted_mle[j, :] = ci.target_observed[j]
-
-adjusted_intervals = np.hstack([mle_active, ci_active]).T
-unadjusted_intervals = np.hstack([unadjusted_mle, ci_naive]).T
-
-print("adjusted confidence", adjusted_intervals)
-print("naive confidence", unadjusted_intervals)
-
-intervals = np.vstack([unadjusted_intervals, adjusted_intervals])
-
-un_mean = intervals[0,:]
-un_lower_error = list(un_mean-intervals[1,:])
-un_upper_error = list(intervals[2,:]-un_mean)
-unStd = [un_lower_error, un_upper_error]
-
-ad_mean = intervals[3,:]
-ad_lower_error = list(ad_mean-intervals[4,:])
-ad_upper_error = list(intervals[5,:]- ad_mean)
-adStd = [ad_lower_error, ad_upper_error]
-
-
-N = len(un_mean)               # number of data entries
-ind = np.arange(N)              # the x locations for the groups
-width = 0.35                    # bar width
-
-width_0 = 0.10
-
-print('here')
-
-fig, ax = plt.subplots()
-
-rects1 = ax.bar(ind, un_mean,                  # data
-                width,                          # bar width
-                color='darkgrey',        # bar colour
-                yerr=unStd,  # data for error bars
-                error_kw={'ecolor':'dimgrey',    # error-bars colour
-                          'linewidth':2})       # error-bar width
-
-rects2 = ax.bar(ind + width, ad_mean,
-                width,
-                color='thistle',
-                yerr=adStd,
-                error_kw={'ecolor':'darkmagenta',
-                          'linewidth':2})
-
-axes = plt.gca()
-axes.set_ylim([-6, 60])             # y-axis bounds
-
-ax.set_ylabel('Credible')
-ax.set_title('selected variables'.format(active_set))
-ax.set_xticks(ind + 1.2* width)
-
-ax.set_xticklabels(active_set_0, rotation=90)
-
-
-#ax.set_xticklabels(('Coef1', 'Coef2', 'Coef3', 'Coef4', 'Coef5', 'Coef6'))
-
-ax.legend((rects1[0], rects2[0]), ('Unadjusted', 'Adjusted'), loc='upper left')
-
-print('here')
-
-#def autolabel(rects):
-#    for rect in rects:
-#        height = rect.get_height()
-#        ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
-#                '%d' % int(height),
-#                ha='center',            # vertical alignment
-#                va='bottom'             # horizontal alignment
-#                )
-
-#autolabel(rects1)
-#autolabel(rects2)
-
-#plt.show()                              # render the plot
-
-plt.savefig('/Users/snigdhapanigrahi/Documents/Research/Python_plots/icml_hiv_plots.pdf', bbox_inches='tight')
-
-##################################################
-ind = np.zeros(len(active_set), np.bool)
-
-index = active_set_0.index('P184V')
-ind[index] = 1
-
-active_set_0.pop(index)
-
-active_set = [i for i in range(p) if active[i]]
-active_set.pop(index)
-
-intervals = intervals[:, ~ind]
-
-
-un_mean = intervals[0,:]
-un_lower_error = list(un_mean-intervals[1,:])
-un_upper_error = list(intervals[2,:]-un_mean)
-unStd = [un_lower_error, un_upper_error]
-ad_mean = intervals[3,:]
-ad_lower_error = list(ad_mean-intervals[4,:])
-ad_upper_error = list(intervals[5,:]- ad_mean)
-adStd = [ad_lower_error, ad_upper_error]
-
-
-N = len(un_mean)               # number of data entries
-ind = np.arange(N)              # the x locations for the groups
-width = 0.35                    # bar width
-
-print('here')
-
-fig, ax = plt.subplots()
-
-rects1 = ax.bar(ind, un_mean,                  # data
-                width,                          # bar width
-                color='darkgrey',        # bar colour
-                yerr=unStd,  # data for error bars
-                error_kw={'ecolor':'dimgrey',    # error-bars colour
-                          'linewidth':2})       # error-bar width
-
-rects2 = ax.bar(ind + width, ad_mean,
-                width,
-                color='thistle',
-                yerr=adStd,
-                error_kw={'ecolor':'darkmagenta',
-                          'linewidth':2})
-
-axes = plt.gca()
-axes.set_ylim([-6, 12])             # y-axis bounds
-
-ax.set_ylabel('Credible')
-ax.set_title('selected variables'.format(active_set))
-ax.set_xticks(ind + 1.2* width)
-
-ax.set_xticklabels(active_set_0, rotation=90)
-
-ax.legend((rects1[0], rects2[0]), ('Unadjusted', 'Adjusted'), loc='upper right')
-
-print('here')
-
-plt.savefig('/Users/snigdhapanigrahi/Documents/Research/Python_plots/icml_hiv_plots_0.pdf', bbox_inches='tight')
\ No newline at end of file
+def hiv_inference_test():
+    if not os.path.exists("NRTI_DATA.txt"):
+        NRTI = pandas.read_table(
+            "http://hivdb.stanford.edu/pages/published_analysis/genophenoPNAS2006/DATA/NRTI_DATA.txt", na_values="NA")
+    else:
+        NRTI = pandas.read_table("NRTI_DATA.txt")
+
+    NRTI_specific = []
+    NRTI_muts = []
+    for i in range(1, 241):
+        d = NRTI['P%d' % i]
+        for mut in np.unique(d):
+            if mut not in ['-', '.'] and len(mut) == 1:
+                test = np.equal(d, mut)
+                if test.sum() > 10:
+                    NRTI_specific.append(np.array(np.equal(d, mut)))
+                    NRTI_muts.append("P%d%s" % (i, mut))
+
+    NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts)
+
+    X_NRTI = np.array(NRTI_specific, np.float)
+    Y = NRTI['3TC']  # shorthand
+    keep = ~np.isnan(Y).astype(np.bool)
+    X_NRTI = X_NRTI[np.nonzero(keep)];
+    Y = Y[keep]
+    Y = np.array(np.log(Y), np.float);
+    Y -= Y.mean()
+    X_NRTI -= X_NRTI.mean(0)[None, :];
+    X_NRTI /= X_NRTI.std(0)[None, :]
+    X = X_NRTI  # shorthand
+    n, p = X.shape
+    X /= np.sqrt(n)
+
+    ols_fit = sm.OLS(Y, X).fit()
+    sigma_3TC = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1)
+
+    lam_frac = 1.
+    loss = rr.glm.gaussian(X, Y)
+    epsilon = 1. / np.sqrt(n)
+    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_3TC
+    print(lam)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    from selection.api import randomization
+    randomization = randomization.isotropic_gaussian((p,), scale=1.)
+
+    #change grid for parameter for HIV data
+    M_est = M_estimator_map(loss, epsilon, penalty, randomization, randomization_scale=0.7)
+    M_est.solve_approx()
+    active = M_est._overall
+    nactive = np.sum(active)
+
+    ci_active = np.zeros((nactive, 2))
+    ci_length = np.zeros(nactive)
+    mle_active = np.zeros((nactive, 1))
+
+    ci = approximate_conditional_density(M_est)
+    ci.solve_approx()
+
+    class target_class(object):
+        def __init__(self, target_cov):
+            self.target_cov = target_cov
+            self.shape = target_cov.shape
+
+    target = target_class(M_est.target_cov)
+    ci_naive = naive_confidence_intervals(target, M_est.target_observed)
+
+    for j in range(nactive):
+        ci_active[j, :] = np.array(ci.approximate_ci(j))
+        ci_length[j] = ci_active[j, 1] - ci_active[j, 0]
+        mle_active[j, :] = ci.approx_MLE_solver(j, nstep=100)[0]
+
+    unadjusted_mle = np.zeros((nactive, 1))
+    for j in range(nactive):
+        unadjusted_mle[j, :] = ci.target_observed[j]
+
+    adjusted_intervals = np.hstack([mle_active, ci_active]).T
+    unadjusted_intervals = np.hstack([unadjusted_mle, ci_naive]).T
+
+    print("adjusted confidence", adjusted_intervals)
+    print("naive confidence", unadjusted_intervals)
+
+    intervals = np.vstack([unadjusted_intervals, adjusted_intervals])
+
+    return intervals
\ No newline at end of file
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index cab1f1dcd..0e0fee636 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -118,3 +118,4 @@ def test_lasso(n, p, s, signal):
         print("output of selection adjusted inference", lasso)
         return(lasso)
 
+test_lasso(n=100, p=200, s=5, signal=5.)
\ No newline at end of file
diff --git a/selection/approx_ci/tests/test_greedy_step.py b/selection/approx_ci/tests/test_greedy_step.py
index fd3fba50e..084e8a25b 100644
--- a/selection/approx_ci/tests/test_greedy_step.py
+++ b/selection/approx_ci/tests/test_greedy_step.py
@@ -119,3 +119,4 @@ def test_greedy_step(n, p, s, signal):
         print("output of selection adjusted inference", greedy_step)
         return(greedy_step)
 
+test_greedy_step(n=200, p=30, s=0, signal=5.)
\ No newline at end of file
diff --git a/selection/approx_ci/tests/test_mle_approx.py b/selection/approx_ci/tests/test_mle_approx.py
deleted file mode 100644
index 104f8d070..000000000
--- a/selection/approx_ci/tests/test_mle_approx.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from __future__ import print_function
-import numpy as np
-import time
-import regreg.api as rr
-
-from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
-from selection.approx_ci.estimator_approx import M_estimator_approx
-
-def test_approximate_mle(n=100,
-                         p=10,
-                         s=3,
-                         snr=5,
-                         rho=0.1,
-                         lam_frac = 1.,
-                         loss='gaussian',
-                         randomizer='gaussian'):
-
-    from selection.api import randomization
-
-    if loss == "gaussian":
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-        loss = rr.glm.gaussian(X, y)
-    elif loss == "logistic":
-        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
-        loss = rr.glm.logistic(X, y)
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
-
-    epsilon = 1. / np.sqrt(n)
-
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-    if randomizer == 'gaussian':
-        randomization = randomization.isotropic_gaussian((p,), scale=1.)
-    elif randomizer == 'laplace':
-        randomization = randomization.laplace((p,), scale=1.)
-
-    M_est = M_estimator_approx(loss, epsilon, penalty, randomization, randomizer)
-    M_est.solve_approx()
-
-    inf = approximate_conditional_density(M_est)
-    inf.solve_approx()
-
-    active = M_est._overall
-    active_set = np.asarray([i for i in range(p) if active[i]])
-
-    true_support = np.asarray([i for i in range(p) if i < s])
-
-    nactive = np.sum(active)
-
-    print("active set, true_support", active_set, true_support)
-
-    true_vec = beta[active]
-
-    print("true coefficients", true_vec)
-
-    if (set(active_set).intersection(set(true_support)) == set(true_support)) == True:
-
-        mle_active = np.zeros(nactive)
-
-        for j in range(nactive):
-            mle_active[j] = inf.approx_MLE_solver(j, nstep=100)[0]
-
-        print("mle for target", mle_active)
-
-test_approximate_mle()
-

From 91833c22a9759642e841d42060132ad52cbeef1e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51sk9f.SUNet>
Date: Thu, 31 Aug 2017 23:33:45 -0700
Subject: [PATCH 159/617] threshold map needs to be fixed as per master

---
 selection/approx_ci/ci_via_approx_density.py  |  59 +++++-
 ...inference_hiv_data.py => test_hiv_data.py} |   0
 .../approx_ci/tests/test_threshold_score.py   | 169 +++++++++---------
 3 files changed, 145 insertions(+), 83 deletions(-)
 rename selection/approx_ci/tests/{inference_hiv_data.py => test_hiv_data.py} (100%)

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_via_approx_density.py
index 3f59da487..9b14cbd5c 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_via_approx_density.py
@@ -5,7 +5,8 @@
 
 import numpy as np
 import regreg.api as rr
-
+from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
+from selection.randomized.threshold_score import threshold_score
 from selection.randomized.M_estimator import M_estimator
 
 class M_estimator_map(M_estimator):
@@ -61,6 +62,62 @@ def setup_map(self, j):
         self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
         self.offset_inactive = self.null_statistic[self.nactive:]
 
+class threshold_score_map(threshold_score):
+
+    def __init__(self, loss,
+                 threshold,
+                 randomization,
+                 active_bool,
+                 inactive_bool,
+                 randomization_scale=1.):
+
+        threshold_score.__init__(self, loss, threshold, randomization, active_bool, inactive_bool)
+        self.randomization_scale = randomization_scale
+
+    def solve_approx(self):
+        self.solve()
+        self.setup_sampler()
+        print("boundary", self.observed_opt_state, self.boundary)
+        self.feasible_point = self.observed_opt_state[self.boundary]
+        (_opt_linear_term, _opt_offset) = self.opt_transform
+        self._opt_linear_term = np.concatenate((_opt_linear_term[self.boundary, :], _opt_linear_term[self.interior, :]),
+                                               0)
+        self._opt_affine_term = np.concatenate((_opt_offset[self.boundary], _opt_offset[self.interior]), 0)
+        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
+
+        (_score_linear_term, _) = self.score_transform
+        self._score_linear_term = np.concatenate(
+            (_score_linear_term[self.boundary, :], _score_linear_term[self.interior, :]), 0)
+        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
+        self._overall = self.boundary
+        self.inactive_lagrange = self.threshold[0] * np.ones(np.sum(~self.boundary))
+
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self._overall,
+                                              beta_full=self._beta_full,
+                                              inactive=~self._overall)[0]
+
+        score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), bootstrap_score)
+        nactive = self._overall.sum()
+        self.score_target_cov = score_cov[:, :nactive]
+        self.target_cov = score_cov[:nactive, :nactive]
+        self.target_observed = self.observed_score_state[:nactive]
+        self.nactive = nactive
+
+        self.B_active = self._opt_linear_term[:nactive, :nactive]
+        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
+
+
+    def setup_map(self, j):
+
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+
+        self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
+        self.offset_inactive = self.null_statistic[self.nactive:]
 
 class nonnegative_softmax_scaled(rr.smooth_atom):
     """
diff --git a/selection/approx_ci/tests/inference_hiv_data.py b/selection/approx_ci/tests/test_hiv_data.py
similarity index 100%
rename from selection/approx_ci/tests/inference_hiv_data.py
rename to selection/approx_ci/tests/test_hiv_data.py
diff --git a/selection/approx_ci/tests/test_threshold_score.py b/selection/approx_ci/tests/test_threshold_score.py
index 263c72a1e..db60a9529 100644
--- a/selection/approx_ci/tests/test_threshold_score.py
+++ b/selection/approx_ci/tests/test_threshold_score.py
@@ -1,76 +1,59 @@
 from __future__ import print_function
 import numpy as np
-import time
+import sys
 import regreg.api as rr
-import selection.tests.reports as reports
 from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_via_approx_density import approximate_conditional_density
-from selection.approx_ci.estimator_approx import threshold_score_approx
+from selection.approx_ci.ci_via_approx_density import (threshold_score_map,
+                                                       approximate_conditional_density)
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
 from selection.randomized.query import naive_confidence_intervals
-from selection.randomized.query import naive_pvalues
-
-
-@register_report(['cover', 'ci_length', 'truth', 'naive_cover', 'naive_pvalues'])
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-@wait_for_return_value()
-def test_approximate_ci(n=200,
-                        p=50,
-                        s=0,
-                        snr=5,
-                        threshold = 3.,
-                        rho=0.1,
-                        lam_frac = 1.,
-                        loss='gaussian',
-                        randomizer='gaussian'):
 
-    from selection.api import randomization
+def test_approximate_inference(X,
+                               y,
+                               true_mean,
+                               sigma,
+                               threshold = 3.,
+                               seed_n = 0,
+                               lam_frac = 1.,
+                               loss='gaussian',
+                               randomization_scale = 1.):
 
+    from selection.api import randomization
+    n, p = X.shape
+    np.random.seed(seed_n)
     if loss == "gaussian":
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr, sigma=1.)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
         loss = rr.glm.gaussian(X, y)
     elif loss == "logistic":
-        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
         loss = rr.glm.logistic(X, y)
 
-    if randomizer=='gaussian':
-        randomization = randomization.isotropic_gaussian((p,), scale=1.)
-    elif randomizer=='laplace':
-        randomization = randomization.laplace((p,), scale=1.)
-
     active_bool = np.zeros(p, np.bool)
-    #active_bool[range(3)] = 1
     inactive_bool = ~active_bool
 
-    TS = threshold_score_approx(loss,
-                                threshold,
-                                randomization,
-                                active_bool,
-                                inactive_bool,
-                                randomizer)
+    randomization = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    TS = threshold_score_map(loss,
+                             threshold,
+                             randomization,
+                             active_bool,
+                             inactive_bool,
+                             randomization_scale)
 
     TS.solve_approx()
     active = TS._overall
-    print("nactive", active.sum())
-
-    ci = approximate_conditional_density(TS)
-    ci.solve_approx()
-
     active_set = np.asarray([i for i in range(p) if active[i]])
-    true_support = np.asarray([i for i in range(p) if i < s])
     nactive = np.sum(active)
-    print("active set, true_support", active_set, true_support)
-    true_vec = beta[active]
-    print("true coefficients", true_vec)
+    sys.stderr.write("number of active selected by thresholding" + str(nactive) + "\n")
+    sys.stderr.write("Active set selected by thresholding" + str(active_set) + "\n")
+    sys.stderr.write("Observed target" + str(TS.target_observed) + "\n")
+
+    if nactive == 0:
+        return None
 
-    if (set(active_set).intersection(set(true_support)) == set(true_support))== True:
+    else:
+        true_vec = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
 
-        ci_active = np.zeros((nactive, 2))
-        covered = np.zeros(nactive, np.bool)
-        ci_length = np.zeros(nactive)
-        pivots = np.zeros(nactive)
+        sys.stderr.write("True target to be covered" + str(true_vec) + "\n")
 
         class target_class(object):
             def __init__(self, target_cov):
@@ -78,43 +61,65 @@ def __init__(self, target_cov):
                 self.shape = target_cov.shape
 
         target = target_class(TS.target_cov)
+
         ci_naive = naive_confidence_intervals(target, TS.target_observed)
-        naive_pvals = naive_pvalues(target, TS.target_observed, true_vec)
         naive_covered = np.zeros(nactive)
-        toc = time.time()
+        naive_risk = np.zeros(nactive)
 
-        for j in range(nactive):
-            ci_active[j, :] = np.array(ci.approximate_ci(j))
-            if (ci_active[j, 0] <= true_vec[j]) and (ci_active[j,1] >= true_vec[j]):
-                covered[j] = 1
-            ci_length[j] = ci_active[j,1] - ci_active[j,0]
-            print(ci_active[j, :])
-            pivots[j] = ci.approximate_pvalue(j, true_vec[j])
-
-            # naive ci
-            if (ci_naive[j,0]<=true_vec[j]) and (ci_naive[j,1]>=true_vec[j]):
-                naive_covered[j]+=1
-
-        tic = time.time()
-        print('ci time now', tic - toc)
+        ci = approximate_conditional_density(TS)
+        ci.solve_approx()
 
-        return covered, ci_length, pivots, naive_covered, naive_pvals
-    #else:
-    #    return 0
+        ci_sel = np.zeros((nactive, 2))
+        sel_MLE = np.zeros(nactive)
+        sel_length = np.zeros(nactive)
 
-def report(niter=200, **kwargs):
-
-    kwargs = {'s': 0, 'n': 200, 'p': 20, 'snr': 7, 'loss': 'gaussian', 'randomizer': 'gaussian'}
-    split_report = reports.reports['test_approximate_ci']
-    screened_results = reports.collect_multiple_runs(split_report['test'],
-                                                     split_report['columns'],
-                                                     niter,
-                                                     reports.summarize_all,
-                                                     **kwargs)
+        for j in range(nactive):
+            ci_sel[j, :] = np.array(ci.approximate_ci(j))
+            sel_MLE[j] = ci.approx_MLE_solver(j, step=1, nstep=150)[0]
+            sel_length[j] = ci_sel[j, 1] - ci_sel[j, 0]
 
-    fig = reports.pivot_plot_plus_naive(screened_results)
-    fig.savefig('approx_pivots_threshold.pdf')
+        sel_covered = np.zeros(nactive, np.bool)
+        sel_risk = np.zeros(nactive)
 
+        for j in range(nactive):
 
-if __name__=='__main__':
-    report()
\ No newline at end of file
+            sel_risk[j] = (sel_MLE[j] - true_vec[j]) ** 2.
+            naive_risk[j] = (TS.target_observed[j]- true_vec[j]) ** 2.
+
+            if (ci_sel[j, 0] <= true_vec[j]) and (ci_sel[j, 1] >= true_vec[j]):
+                sel_covered[j] = 1
+            if (ci_naive[j, 0] <= true_vec[j]) and (ci_naive[j, 1] >= true_vec[j]):
+                naive_covered[j] = 1
+
+        print("lengths", sel_length.sum()/nactive)
+        print("selective intervals", ci_sel.T)
+        print("risks", sel_risk.sum()/nactive)
+
+        return np.transpose(np.vstack((ci_sel[:, 0],
+                                       ci_sel[:, 1],
+                                       ci_naive[:,0],
+                                       ci_naive[:, 1],
+                                       sel_MLE,
+                                       TS.target_observed,
+                                       sel_covered,
+                                       naive_covered,
+                                       sel_risk,
+                                       naive_risk)))
+
+
+def test_threshold(n, p, s, signal):
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+    true_mean = X.dot(beta)
+    threshold = test_approximate_inference(X,
+                                           y,
+                                           true_mean,
+                                           sigma,
+                                           seed_n=0,
+                                           lam_frac=1.,
+                                           loss='gaussian')
+
+    if threshold is not None:
+        print("output of selection adjusted inference", threshold)
+        return(threshold)
+
+test_threshold(n=100, p=50, s=0, signal=5.)
\ No newline at end of file

From 036330a405284848f669746882c2ce23f911fb8b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51sk9f.SUNet>
Date: Thu, 31 Aug 2017 23:54:50 -0700
Subject: [PATCH 160/617] more restructuring

---
 ...approx_density.py => ci_approx_density.py} | 113 ------------
 selection/approx_ci/ci_approx_greedy_step.py  |  69 --------
 .../{estimator_approx.py => selection_map.py} | 163 +++++++++---------
 selection/approx_ci/tests/test_glm.py         |   5 +-
 selection/approx_ci/tests/test_greedy_step.py |   5 +-
 selection/approx_ci/tests/test_hiv_data.py    |   4 +-
 .../approx_ci/tests/test_threshold_score.py   |   4 +-
 7 files changed, 90 insertions(+), 273 deletions(-)
 rename selection/approx_ci/{ci_via_approx_density.py => ci_approx_density.py} (77%)
 rename selection/approx_ci/{estimator_approx.py => selection_map.py} (79%)

diff --git a/selection/approx_ci/ci_via_approx_density.py b/selection/approx_ci/ci_approx_density.py
similarity index 77%
rename from selection/approx_ci/ci_via_approx_density.py
rename to selection/approx_ci/ci_approx_density.py
index 9b14cbd5c..14d467b7b 100644
--- a/selection/approx_ci/ci_via_approx_density.py
+++ b/selection/approx_ci/ci_approx_density.py
@@ -5,119 +5,6 @@
 
 import numpy as np
 import regreg.api as rr
-from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
-from selection.randomized.threshold_score import threshold_score
-from selection.randomized.M_estimator import M_estimator
-
-class M_estimator_map(M_estimator):
-
-    def __init__(self, loss, epsilon, penalty, randomization, randomization_scale = 1.):
-        M_estimator.__init__(self, loss, epsilon, penalty, randomization)
-        self.randomization_scale = randomization_scale
-
-    def solve_approx(self):
-        self.solve()
-        (_opt_linear_term, _opt_affine_term) = self.opt_transform
-        self._opt_linear_term = np.concatenate(
-            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
-        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]), 0)
-        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
-
-        (_score_linear_term, _) = self.score_transform
-        self._score_linear_term = np.concatenate(
-            (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
-        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
-        self.feasible_point = np.abs(self.initial_soln[self._overall])
-        lagrange = []
-        for key, value in self.penalty.weights.iteritems():
-            lagrange.append(value)
-        lagrange = np.asarray(lagrange)
-        self.inactive_lagrange = lagrange[~self._overall]
-
-        X, _ = self.loss.data
-        n, p = X.shape
-        self.p = p
-
-        nactive = self._overall.sum()
-        score_cov = np.zeros((p, p))
-        X_active_inv = np.linalg.inv(X[:,self._overall].T.dot(X[:,self._overall]))
-        projection_perp = np.identity(n) - X[:,self._overall].dot(X_active_inv).dot( X[:,self._overall].T)
-        score_cov[:nactive, :nactive] = X_active_inv
-        score_cov[nactive:, nactive:] = X[:,~self._overall].T.dot(projection_perp).dot(X[:,~self._overall])
-
-        self.score_target_cov = score_cov[:, :nactive]
-        self.target_cov = score_cov[:nactive, :nactive]
-        self.target_observed = self.observed_score_state[:nactive]
-        self.nactive = nactive
-
-        self.B_active = self._opt_linear_term[:nactive, :nactive]
-        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
-
-
-    def setup_map(self, j):
-
-        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
-        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
-
-        self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
-        self.offset_inactive = self.null_statistic[self.nactive:]
-
-class threshold_score_map(threshold_score):
-
-    def __init__(self, loss,
-                 threshold,
-                 randomization,
-                 active_bool,
-                 inactive_bool,
-                 randomization_scale=1.):
-
-        threshold_score.__init__(self, loss, threshold, randomization, active_bool, inactive_bool)
-        self.randomization_scale = randomization_scale
-
-    def solve_approx(self):
-        self.solve()
-        self.setup_sampler()
-        print("boundary", self.observed_opt_state, self.boundary)
-        self.feasible_point = self.observed_opt_state[self.boundary]
-        (_opt_linear_term, _opt_offset) = self.opt_transform
-        self._opt_linear_term = np.concatenate((_opt_linear_term[self.boundary, :], _opt_linear_term[self.interior, :]),
-                                               0)
-        self._opt_affine_term = np.concatenate((_opt_offset[self.boundary], _opt_offset[self.interior]), 0)
-        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
-
-        (_score_linear_term, _) = self.score_transform
-        self._score_linear_term = np.concatenate(
-            (_score_linear_term[self.boundary, :], _score_linear_term[self.interior, :]), 0)
-        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
-        self._overall = self.boundary
-        self.inactive_lagrange = self.threshold[0] * np.ones(np.sum(~self.boundary))
-
-        X, _ = self.loss.data
-        n, p = X.shape
-        self.p = p
-        bootstrap_score = pairs_bootstrap_glm(self.loss,
-                                              self._overall,
-                                              beta_full=self._beta_full,
-                                              inactive=~self._overall)[0]
-
-        score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), bootstrap_score)
-        nactive = self._overall.sum()
-        self.score_target_cov = score_cov[:, :nactive]
-        self.target_cov = score_cov[:nactive, :nactive]
-        self.target_observed = self.observed_score_state[:nactive]
-        self.nactive = nactive
-
-        self.B_active = self._opt_linear_term[:nactive, :nactive]
-        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
-
-
-    def setup_map(self, j):
-
-        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
-        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
-
-        self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
-        self.offset_inactive = self.null_statistic[self.nactive:]
 
 class nonnegative_softmax_scaled(rr.smooth_atom):
     """
diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index 317610936..d34fab7c0 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -3,75 +3,6 @@
 import sys
 import regreg.api as rr
 from scipy.stats import norm
-from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
-from selection.randomized.greedy_step import greedy_score_step
-
-
-class greedy_score_map(greedy_score_step):
-    def __init__(self, loss,
-                       penalty,
-                       active_groups,
-                       inactive_groups,
-                       randomization,
-                       randomization_scale=1.):
-
-        greedy_score_step.__init__(self, loss,
-                                   penalty,
-                                   active_groups,
-                                   inactive_groups,
-                                   randomization)
-
-        self.randomization_scale = randomization_scale
-
-    def solve_approx(self):
-        self.solve()
-        self.setup_sampler()
-        X, _ = self.loss.data
-        n, p = X.shape
-        self.p = p
-        self.feasible_point = self.observed_scaling
-        self._overall = np.zeros(p, dtype=bool)
-        # print(self.selection_variable['variables'])
-        self._overall[self.selection_variable['variables']] = 1
-
-        self.observed_opt_state = np.hstack([self.observed_scaling, self.observed_subgradients])
-
-        _opt_linear_term = np.concatenate((np.atleast_2d(self.maximizing_subgrad).T, self.losing_padding_map), 1)
-        self._opt_linear_term = np.concatenate(
-            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
-
-        self.opt_transform = (self._opt_linear_term, np.zeros(p))
-
-        (self._score_linear_term, _) = self.score_transform
-
-        self.inactive_lagrange = self.observed_scaling * self.penalty.weights[0] * np.ones(p - 1)
-
-        bootstrap_score = pairs_bootstrap_glm(self.loss,
-                                              self.active,
-                                              inactive=~self.active)[0]
-
-        bootstrap_target, target_observed = pairs_bootstrap_glm(self.loss,
-                                                                self._overall,
-                                                                beta_full=None,
-                                                                inactive=None)
-
-        sampler = lambda: np.random.choice(n, size=(n,), replace=True)
-        self.target_cov, target_score_cov = bootstrap_cov(sampler, bootstrap_target, cross_terms=(bootstrap_score,))
-        self.score_target_cov = np.atleast_2d(target_score_cov).T
-        self.target_observed = target_observed
-
-        nactive = self._overall.sum()
-        self.nactive = nactive
-
-        self.B_active = self._opt_linear_term[:nactive, :nactive]
-        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
-
-    def setup_map(self, j):
-        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
-        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
-
-        self.offset_active = self.null_statistic[:self.nactive]
-        self.offset_inactive = self.null_statistic[self.nactive:]
 
 
 class nonnegative_softmax_scaled(rr.smooth_atom):
diff --git a/selection/approx_ci/estimator_approx.py b/selection/approx_ci/selection_map.py
similarity index 79%
rename from selection/approx_ci/estimator_approx.py
rename to selection/approx_ci/selection_map.py
index 5d1624af4..750787380 100644
--- a/selection/approx_ci/estimator_approx.py
+++ b/selection/approx_ci/selection_map.py
@@ -1,15 +1,14 @@
 import numpy as np
 from selection.randomized.M_estimator import M_estimator
 from selection.randomized.glm import pairs_bootstrap_glm, bootstrap_cov
-
-from selection.randomized.threshold_score import threshold_score
 from selection.randomized.greedy_step import greedy_score_step
+from selection.randomized.threshold_score import threshold_score
 
-class M_estimator_approx(M_estimator):
+class M_estimator_map(M_estimator):
 
-    def __init__(self, loss, epsilon, penalty, randomization, randomizer):
+    def __init__(self, loss, epsilon, penalty, randomization, randomization_scale = 1.):
         M_estimator.__init__(self, loss, epsilon, penalty, randomization)
-        self.randomizer = randomizer
+        self.randomization_scale = randomization_scale
 
     def solve_approx(self):
         self.solve()
@@ -33,13 +32,14 @@ def solve_approx(self):
         X, _ = self.loss.data
         n, p = X.shape
         self.p = p
-        bootstrap_score = pairs_bootstrap_glm(self.loss,
-                                              self._overall,
-                                              beta_full=self._beta_full,
-                                              inactive=~self._overall)[0]
 
-        score_cov = bootstrap_cov(lambda: np.random.choice(n, size=(n,), replace=True), bootstrap_score)
         nactive = self._overall.sum()
+        score_cov = np.zeros((p, p))
+        X_active_inv = np.linalg.inv(X[:,self._overall].T.dot(X[:,self._overall]))
+        projection_perp = np.identity(n) - X[:,self._overall].dot(X_active_inv).dot( X[:,self._overall].T)
+        score_cov[:nactive, :nactive] = X_active_inv
+        score_cov[nactive:, nactive:] = X[:,~self._overall].T.dot(projection_perp).dot(X[:,~self._overall])
+
         self.score_target_cov = score_cov[:, :nactive]
         self.target_cov = score_cov[:nactive, :nactive]
         self.target_observed = self.observed_score_state[:nactive]
@@ -57,22 +57,89 @@ def setup_map(self, j):
         self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
         self.offset_inactive = self.null_statistic[self.nactive:]
 
+class greedy_score_map(greedy_score_step):
+    def __init__(self, loss,
+                       penalty,
+                       active_groups,
+                       inactive_groups,
+                       randomization,
+                       randomization_scale=1.):
+
+        greedy_score_step.__init__(self, loss,
+                                   penalty,
+                                   active_groups,
+                                   inactive_groups,
+                                   randomization)
+
+        self.randomization_scale = randomization_scale
+
+    def solve_approx(self):
+        self.solve()
+        self.setup_sampler()
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+        self.feasible_point = self.observed_scaling
+        self._overall = np.zeros(p, dtype=bool)
+        # print(self.selection_variable['variables'])
+        self._overall[self.selection_variable['variables']] = 1
+
+        self.observed_opt_state = np.hstack([self.observed_scaling, self.observed_subgradients])
+
+        _opt_linear_term = np.concatenate((np.atleast_2d(self.maximizing_subgrad).T, self.losing_padding_map), 1)
+        self._opt_linear_term = np.concatenate(
+            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
+
+        self.opt_transform = (self._opt_linear_term, np.zeros(p))
+
+        (self._score_linear_term, _) = self.score_transform
+
+        self.inactive_lagrange = self.observed_scaling * self.penalty.weights[0] * np.ones(p - 1)
+
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self.active,
+                                              inactive=~self.active)[0]
+
+        bootstrap_target, target_observed = pairs_bootstrap_glm(self.loss,
+                                                                self._overall,
+                                                                beta_full=None,
+                                                                inactive=None)
+
+        sampler = lambda: np.random.choice(n, size=(n,), replace=True)
+        self.target_cov, target_score_cov = bootstrap_cov(sampler, bootstrap_target, cross_terms=(bootstrap_score,))
+        self.score_target_cov = np.atleast_2d(target_score_cov).T
+        self.target_observed = target_observed
+
+        nactive = self._overall.sum()
+        self.nactive = nactive
+
+        self.B_active = self._opt_linear_term[:nactive, :nactive]
+        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
+
+    def setup_map(self, j):
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+
+        self.offset_active = self.null_statistic[:self.nactive]
+        self.offset_inactive = self.null_statistic[self.nactive:]
+
 
-class threshold_score_approx(threshold_score):
+class threshold_score_map(threshold_score):
 
     def __init__(self, loss,
                  threshold,
                  randomization,
                  active_bool,
                  inactive_bool,
-                 randomizer):
+                 randomization_scale=1.):
 
         threshold_score.__init__(self, loss, threshold, randomization, active_bool, inactive_bool)
-        self.randomizer = randomizer
+        self.randomization_scale = randomization_scale
 
     def solve_approx(self):
         self.solve()
         self.setup_sampler()
+        print("boundary", self.observed_opt_state, self.boundary)
         self.feasible_point = self.observed_opt_state[self.boundary]
         (_opt_linear_term, _opt_offset) = self.opt_transform
         self._opt_linear_term = np.concatenate((_opt_linear_term[self.boundary, :], _opt_linear_term[self.interior, :]),
@@ -112,74 +179,4 @@ def setup_map(self, j):
         self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
 
         self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
-        self.offset_inactive = self.null_statistic[self.nactive:]
-
-class greedy_score_step_approx(greedy_score_step):
-
-    def __init__(self, loss,
-                 penalty,
-                 active_groups,
-                 inactive_groups,
-                 randomization,
-                 randomizer):
-
-        greedy_score_step.__init__(self, loss,
-                                 penalty,
-                                 active_groups,
-                                 inactive_groups,
-                                 randomization)
-        self.randomizer = randomizer
-
-
-    def solve_approx(self):
-
-        self.solve()
-        self.setup_sampler()
-        p = self.inactive.sum()
-        self.feasible_point = self.observed_scaling
-        self._overall = np.zeros(p, dtype=bool)
-        #print(self.selection_variable['variables'])
-        self._overall[self.selection_variable['variables']] = 1
-
-        self.observed_opt_state = np.hstack([self.observed_scaling, self.observed_subgradients])
-
-        _opt_linear_term = np.concatenate((np.atleast_2d(self.maximizing_subgrad).T, self.losing_padding_map), 1)
-        self._opt_linear_term = np.concatenate((_opt_linear_term[self._overall,:], _opt_linear_term[~self._overall,:]), 0)
-
-        self.opt_transform = (self._opt_linear_term, np.zeros(p))
-
-        (self._score_linear_term, _) = self.score_transform
-
-        self.inactive_lagrange = self.observed_scaling * self.penalty.weights[0] * np.ones(p-1)
-
-        X, _ = self.loss.data
-        n, p = X.shape
-        self.p = p
-        bootstrap_score = pairs_bootstrap_glm(self.loss,
-                                              self.active,
-                                              inactive=~self.active)[0]
-
-        bootstrap_target, target_observed = pairs_bootstrap_glm(self.loss,
-                                                             self._overall,
-                                                             beta_full=None,
-                                                             inactive=None)
-
-        sampler = lambda : np.random.choice(n, size=(n,), replace=True)
-        self.target_cov, target_score_cov = bootstrap_cov(sampler, bootstrap_target, cross_terms=(bootstrap_score,))
-        self.score_target_cov = np.atleast_2d(target_score_cov).T
-        self.target_observed = target_observed
-
-        nactive = self._overall.sum()
-        self.nactive = nactive
-
-        self.B_active = self._opt_linear_term[:nactive, :nactive]
-        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
-
-
-    def setup_map(self, j):
-
-        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
-        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
-
-        self.offset_active = self.null_statistic[:self.nactive]
         self.offset_inactive = self.null_statistic[self.nactive:]
\ No newline at end of file
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index 0e0fee636..a577b6376 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -3,8 +3,9 @@
 import sys
 import regreg.api as rr
 from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.randomized_lasso import (M_estimator_map,
-                                                  approximate_conditional_density)
+from selection.approx_ci.selection_map import M_estimator_map
+from selection.approx_ci.ci_approx_density import approximate_conditional_density
+
 from selection.randomized.query import naive_confidence_intervals
 
 def test_approximate_inference(X,
diff --git a/selection/approx_ci/tests/test_greedy_step.py b/selection/approx_ci/tests/test_greedy_step.py
index 084e8a25b..2b1b97ef8 100644
--- a/selection/approx_ci/tests/test_greedy_step.py
+++ b/selection/approx_ci/tests/test_greedy_step.py
@@ -3,8 +3,9 @@
 import numpy as np
 import regreg.api as rr
 from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_approx_greedy_step import (greedy_score_map,
-                                                       approximate_conditional_density)
+from selection.approx_ci.selection_map import greedy_score_map
+from selection.approx_ci.ci_approx_greedy_step import approximate_conditional_density
+
 
 from selection.randomized.query import naive_confidence_intervals
 
diff --git a/selection/approx_ci/tests/test_hiv_data.py b/selection/approx_ci/tests/test_hiv_data.py
index 49a311a7c..20593dc7e 100644
--- a/selection/approx_ci/tests/test_hiv_data.py
+++ b/selection/approx_ci/tests/test_hiv_data.py
@@ -1,8 +1,8 @@
 from __future__ import print_function
 import os, numpy as np, pandas, statsmodels.api as sm
 import regreg.api as rr
-from selection.approx_ci.ci_via_approx_density import (M_estimator_map,
-                                                      approximate_conditional_density)
+from selection.approx_ci.selection_map import M_estimator_map
+from selection.approx_ci.ci_approx_density import approximate_conditional_density
 
 from selection.randomized.query import naive_confidence_intervals
 
diff --git a/selection/approx_ci/tests/test_threshold_score.py b/selection/approx_ci/tests/test_threshold_score.py
index db60a9529..89cf494b0 100644
--- a/selection/approx_ci/tests/test_threshold_score.py
+++ b/selection/approx_ci/tests/test_threshold_score.py
@@ -3,8 +3,8 @@
 import sys
 import regreg.api as rr
 from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.ci_via_approx_density import (threshold_score_map,
-                                                       approximate_conditional_density)
+from selection.approx_ci.selection_map import threshold_score_map
+from selection.approx_ci.ci_approx_density import approximate_conditional_density
 
 from selection.randomized.query import naive_confidence_intervals
 

From d00127642157c1eda93d8f8a790d30ba885c2f75 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Fri, 1 Sep 2017 00:17:30 -0700
Subject: [PATCH 161/617] grad needs a minus

---
 selection/randomized/query.py               |  2 +-
 selection/randomized/tests/test_sampling.py | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 7eb5af32f..c55e98aa9 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1129,7 +1129,7 @@ def gradient(self, state):
             _, opt_grad[self.opt_slice[i]] = \
                 self.objectives[i].randomization_gradient(0., self.target_transform[i], opt_state[self.opt_slice[i]])
 
-        return opt_grad
+        return -opt_grad
 
 
     def sample(self, ndraw, burnin, stepsize=None):
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index b217b292a..0fc2dfd17 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -35,20 +35,21 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     active_set = np.where(active)[0]
 
     for i in range(nactive):
+        var = active_set[i]
         if signs[i]>0:
-            lower[i] = -np.dot(X[:, active_set[i]].T,y) + lam*signs[i]
+            lower[i] = -np.dot(X[:, var].T,y) + lam*signs[var]
             upper[i] = np.inf
         else:
             lower[i] = -np.inf
-            upper[i] = -np.dot(X[:,active_set[i]].T,y) + lam*signs[i]
+            upper[i] = -np.dot(X[:,var].T,y) + lam*signs[var]
 
     lower[range(nactive,p)] = -lam-np.dot(X[:, ~active].T, y)
     upper[range(nactive,p)]= lam-np.dot(X[:,~active].T, y)
 
     omega_samples = sampling_truncated_dist(lower, upper, randomization)
 
-    beta_samples = (omega_samples[:,:nactive]+np.dot(X[:,active].T, y))/(epsilon+1)
-    u_samples = (omega_samples[:, nactive:]+np.dot(X[:,~active].T, y))/lam
+    beta_samples = (omega_samples[:,:nactive]+np.dot(X[:,active].T, y)-lam*signs[active])/(epsilon+1)
+    u_samples = (omega_samples[:, nactive:]+np.dot(X[:,~active].T, y))
 
     return np.concatenate((beta_samples, u_samples), axis=1)
 
@@ -83,7 +84,7 @@ def _noise(n, df=np.inf):
 
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_optimization_sampler(ndraw=1000, burnin=200):
+def test_optimization_sampler(ndraw=10000, burnin=2000):
 
     cls = lasso
     for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace']):

From 7e0b403eb816e00a4be1a5582a2c1f8e5afff841 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Sun, 3 Sep 2017 19:30:36 -0700
Subject: [PATCH 162/617] weighted opt intervals added

---
 selection/randomized/query.py                 | 157 +++++++++++++++++-
 .../tests/test_opt_weighted_intervals.py      |  72 ++++++++
 selection/randomized/tests/test_sampling.py   |  14 +-
 3 files changed, 228 insertions(+), 15 deletions(-)
 create mode 100644 selection/randomized/tests/test_opt_weighted_intervals.py

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index c55e98aa9..786d27b76 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1058,7 +1058,7 @@ def __init__(self,
         # the corresponding block of `target_cov` is zeroed out
 
         # we need these attributes of multi_view
-
+        self.multi_view = multi_view
         self.nqueries = len(multi_view.objectives)
         self.opt_slice = multi_view.opt_slice
         self.objectives = multi_view.objectives
@@ -1173,6 +1173,25 @@ def sample(self, ndraw, burnin, stepsize=None):
                 samples.append(target_langevin.state.copy())
         return np.asarray(samples)
 
+
+    def setup_target(self,
+                     target_info,
+                     observed_target_state,
+                     form_covariances,
+                     target_set=None,
+                     parametric=False):
+
+        targeted_sampler.__init__(self,
+                                  self.multi_view,
+                                  target_info=target_info,
+                                  observed_target_state=observed_target_state,
+                                  form_covariances=form_covariances,
+                                  reference=None,
+                                  target_set=target_set,
+                                  parametric=parametric)
+        self._setup_target=True
+
+
     def hypothesis_test(self,
                         test_stat,
                         observed_value,
@@ -1231,7 +1250,6 @@ def hypothesis_test(self,
 
         sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
 
-
         delta = self.target_inv_cov.dot(parameter - self.reference)
         W = np.exp(sample.dot(delta))
 
@@ -1246,7 +1264,7 @@ def hypothesis_test(self,
             return 2 * min(pval, 1 - pval)
 
     def confidence_intervals(self,
-                             observed,
+                             observed_target,
                              ndraw=10000,
                              burnin=2000,
                              stepsize=None,
@@ -1287,13 +1305,20 @@ def confidence_intervals(self,
         if sample is None:
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
 
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
+        _intervals = opt_weighted_intervals(self,
+                                            sample,
+                                            observed_target)
+
+        limits = []
+
+        for i in range(observed_target.shape[0]):
+            print("ci", i)
+            keep = np.zeros_like(observed_target)
+            keep[i] = 1.
+            limits.append(_intervals.confidence_interval(keep, level=level))
+
+        return np.array(limits)
 
-        return intervals_instance.confidence_intervals_all(level=level)
 
     def coefficient_pvalues(self,
                             observed,
@@ -1838,3 +1863,117 @@ def _weights(self, candidate):
         return candidate_sample, np.exp(_logratio)
 
 
+class opt_weighted_intervals(object): # intervals_from_sample):
+
+    """
+    Location family based intervals... (cryptic)
+    randomization density should be `g` composed with the affine
+    mapping and take an argument like one row of sample
+    target_linear is the linear part of the affine mapping with
+    respect to target
+    weights for a given candidate will look like
+          randomization_density(sample + (candidate, 0, 0) - (reference, 0, 0)) /
+          randomization_density(sample)
+    if the samples are samples of \bar{\beta}. if we have samples of
+    \Delta from our reference, then the weights will look like
+    randomization_density(sample + (candidate, 0, 0))
+    randomization_density(sample + (reference, 0, 0))
+    WE ARE ASSUMING sample is sampled from targeted_sampler.reference
+    """
+
+    def __init__(self,
+                 targeted_sampler,
+                 sample,
+                 observed):
+
+        self.targeted_sampler = targeted_sampler
+        self.observed = observed.copy() # this is our observed unpenalized estimator
+        nactive = targeted_sampler.observed_target_state.shape[0]
+
+        self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), cov=targeted_sampler.target_cov, size =(sample.shape[0]))
+        print(self._normal_sample.shape)
+        self._sample = np.concatenate((sample, np.tile(self.observed, (sample.shape[0], 1))), axis=1)
+        self._logden = targeted_sampler.log_randomization_density(self._sample)
+        self._delta = np.concatenate((sample, self._normal_sample), axis=1)
+
+
+    def pivot(self,
+              linear_func,
+              candidate,
+              alternative='twosided'):
+        '''
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalue : np.float
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        observed_stat = self.targeted_sampler.observed_target_state.dot(linear_func)
+
+        candidate_sample, weights = self._weights(linear_func, candidate)
+        #print("candidate", candidate)
+        sample_stat = np.array([linear_func.dot(s) for s in candidate_sample[:, self.targeted_sampler.target_slice]])
+
+        pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
+
+        if alternative == 'twosided':
+            return 2 * min(pivot, 1 - pivot)
+        elif alternative == 'less':
+            return pivot
+        else:
+            return 1 - pivot
+
+    def confidence_interval(self, linear_func, level=0.90, how_many_sd=20):
+
+        target_delta = self._delta[:,self.targeted_sampler.target_slice]
+        projected_delta = target_delta.dot(linear_func)
+        projected_observed = self.observed.dot(linear_func)
+        std_projected_delta = np.sqrt(np.dot(linear_func.T, self.targeted_sampler.target_cov).dot(linear_func))
+
+        delta_min, delta_max = projected_delta.min(), projected_delta.max()
+
+        _norm = np.linalg.norm(linear_func)
+        grid_min, grid_max = -how_many_sd * np.std(projected_delta), how_many_sd * np.std(projected_delta)
+        print("grid", grid_min, grid_max)
+
+        def _rootU(gamma):
+            return self.pivot(linear_func,
+                              projected_observed + gamma,
+                              alternative='less') - (1 - level) / 2.
+        def _rootL(gamma):
+            return self.pivot(linear_func,
+                              projected_observed + gamma,
+                              alternative='less') - (1 + level) / 2.
+
+        upper = bisect(_rootU, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
+        lower = bisect(_rootL, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
+
+        return lower + projected_observed, upper + projected_observed
+
+    # Private methods
+
+    def _weights(self, linear_func, candidate):
+
+        candidate_sample = self._sample.copy()
+
+        _norm = np.linalg.norm(linear_func)
+        projection_matrix = np.true_divide(np.dot(linear_func, linear_func.T), _norm**2)
+        residual_matrix = np.identity(linear_func.shape[0])-projection_matrix
+        candidate_sample[:, self.targeted_sampler.target_slice] = \
+            candidate_sample[:, self.targeted_sampler.target_slice].dot(residual_matrix)
+
+        candidate_sample[:, self.targeted_sampler.target_slice] += \
+            (self._normal_sample+np.ones(self._normal_sample.shape)*candidate).dot(projection_matrix)
+
+        _lognum = self.targeted_sampler.log_randomization_density(candidate_sample)
+
+        _logratio = _lognum - self._logden
+        _logratio -= _logratio.max()
+
+        return candidate_sample, np.exp(_logratio)
+
+
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
new file mode 100644
index 000000000..69ee05aad
--- /dev/null
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -0,0 +1,72 @@
+from itertools import product
+import numpy as np
+import nose.tools as nt
+
+from selection.randomized.convenience import lasso, step, threshold
+from selection.randomized.query import optimization_sampler
+from selection.tests.instance import (gaussian_instance,
+                               logistic_instance,
+                               poisson_instance)
+from selection.tests.flags import SMALL_SAMPLES
+from selection.tests.decorators import set_sampling_params_iftrue
+from scipy.stats import t as tdist
+from selection.randomized.glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
+from selection.randomized.M_estimator import restricted_Mest
+
+
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
+
+    cls = lasso
+    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace']):
+
+        inst, const = const_info
+
+        X, Y = inst(n=100, p=10, s=0)[:2]
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * 1
+        conv = const(X, Y, W, randomizer=rand)
+        signs = conv.fit()
+        print("signs", signs)
+
+        #marginalizing_groups = np.zeros(p, np.bool)
+        #marginalizing_groups[:int(p/2)] = True
+        #conditioning_groups = ~marginalizing_groups
+        #conditioning_groups[-int(p/4):] = False
+
+        selected_features = conv._view.selection_variable['variables']
+
+        #conv.summary(selected_features,
+        #             ndraw=ndraw,
+        #             burnin=burnin,
+        #             compute_intervals=True)
+
+        #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+        #                           conditioning_groups=conditioning_groups)
+
+        conv._queries.setup_sampler(form_covariances=None)
+        conv._queries.setup_opt_state()
+        opt_sampler = optimization_sampler(conv._queries)
+
+        S = opt_sampler.sample(ndraw,
+                               burnin,
+                               stepsize=1.e-3)
+        #print(S.shape)
+        #print([np.mean(S[:,i]) for i in range(p)])
+
+        unpenalized_mle = restricted_Mest(conv.loglike, selected_features)
+        form_covariances = glm_nonparametric_bootstrap(n, n)
+        conv._queries.setup_sampler(form_covariances)
+        boot_target, boot_target_observed = pairs_bootstrap_glm(conv.loglike, selected_features, inactive=None)
+        opt_sampler.setup_target(target_info=boot_target,
+                                 observed_target_state=unpenalized_mle,
+                                 form_covariances=form_covariances)
+
+        selective_CI = opt_sampler.confidence_intervals(opt_sampler.observed_target_state, sample=S)
+        print(selective_CI)
+
+        return selective_CI
+
+
+test_opt_weighted_intervals()
\ No newline at end of file
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 0fc2dfd17..1cf5ffc5e 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -27,7 +27,7 @@ def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
     return samples
 
 
-def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =1000):
+def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =10000):
     p = X.shape[1]
     nactive = active.sum()
     lower = np.zeros(p)
@@ -36,7 +36,7 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
 
     for i in range(nactive):
         var = active_set[i]
-        if signs[i]>0:
+        if signs[var]>0:
             lower[i] = -np.dot(X[:, var].T,y) + lam*signs[var]
             upper[i] = np.inf
         else:
@@ -46,12 +46,13 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     lower[range(nactive,p)] = -lam-np.dot(X[:, ~active].T, y)
     upper[range(nactive,p)]= lam-np.dot(X[:,~active].T, y)
 
-    omega_samples = sampling_truncated_dist(lower, upper, randomization)
+    omega_samples = sampling_truncated_dist(lower, upper, randomization, nsamples=nsamples)
 
-    beta_samples = (omega_samples[:,:nactive]+np.dot(X[:,active].T, y)-lam*signs[active])/(epsilon+1)
+    abs_beta_samples = np.true_divide(omega_samples[:,:nactive]+np.dot(X[:,active].T, y)-lam*signs[active], (epsilon+1)*signs[active])
     u_samples = (omega_samples[:, nactive:]+np.dot(X[:,~active].T, y))
 
-    return np.concatenate((beta_samples, u_samples), axis=1)
+    return np.concatenate((abs_beta_samples, u_samples), axis=1)
+
 
 def orthogonal_design(n, p, s, signal, sigma, df=np.inf, random_signs=False):
     X = np.identity(n)[:,:p]
@@ -84,7 +85,7 @@ def _noise(n, df=np.inf):
 
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_optimization_sampler(ndraw=10000, burnin=2000):
+def test_optimization_sampler(ndraw=20000, burnin=2000):
 
     cls = lasso
     for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace']):
@@ -113,6 +114,7 @@ def test_optimization_sampler(ndraw=10000, burnin=2000):
 
         #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
         #                           conditioning_groups=conditioning_groups)
+
         conv._queries.setup_sampler(form_covariances=None)
         conv._queries.setup_opt_state()
         target_sampler = optimization_sampler(conv._queries)

From 717654fd1df6bfb9b806aea53675e98a8391dbf6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 6 Sep 2017 22:31:23 -0700
Subject: [PATCH 163/617] default args to decompose_subgradient, setup_target
 to compute quantities needed for linear decomposition

---
 selection/randomized/M_estimator.py           | 13 ++++-
 selection/randomized/query.py                 | 55 ++++++++++++++++---
 .../tests/test_optimization_sampler.py        | 38 +++++++------
 3 files changed, 77 insertions(+), 29 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 1616572be..6e238cfc8 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -359,20 +359,27 @@ def projection(self, opt_state):
 
     # optional things to condition on
 
-    def decompose_subgradient(self, conditioning_groups, marginalizing_groups=None):
+    def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=None):
         """
         ADD DOCSTRING
 
         conditioning_groups and marginalizing_groups should be disjoint
         """
 
-        if marginalizing_groups is not None and (conditioning_groups * marginalizing_groups).sum() > 0:
+        groups = np.unique(self.penalty.groups)
+
+        if conditioning_groups is None:
+            conditioning_groups = np.zeros_like(groups, np.bool)
+
+        if marginalizing_groups is None:
+            marginalizing_groups = np.zeros_like(groups, np.bool)
+
+        if (conditioning_groups * marginalizing_groups).sum() > 0:
             raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
 
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-        groups = np.unique(self.penalty.groups)
         condition_inactive_groups = np.zeros_like(groups, dtype=bool)
         condition_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
         moving_inactive_groups = np.zeros_like(groups, dtype=bool)
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 7eb5af32f..18b965381 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -268,7 +268,6 @@ def setup_target(self,
                      reference=None,
                      target_set=None,
                      parametric=False):
-
         '''
         Parameters
         ----------
@@ -1057,8 +1056,15 @@ def __init__(self,
         # is assumed to be independent of the rest
         # the corresponding block of `target_cov` is zeroed out
 
+        # make sure we setup the queries
+
+        multi_view.setup_sampler(form_covariances=None)
+        multi_view.setup_opt_state()
+
         # we need these attributes of multi_view
 
+        self.multi_view = multi_view
+
         self.nqueries = len(multi_view.objectives)
         self.opt_slice = multi_view.opt_slice
         self.objectives = multi_view.objectives
@@ -1129,8 +1135,7 @@ def gradient(self, state):
             _, opt_grad[self.opt_slice[i]] = \
                 self.objectives[i].randomization_gradient(0., self.target_transform[i], opt_state[self.opt_slice[i]])
 
-        return opt_grad
-
+        return -opt_grad
 
     def sample(self, ndraw, burnin, stepsize=None):
         '''
@@ -1173,6 +1178,33 @@ def sample(self, ndraw, burnin, stepsize=None):
                 samples.append(target_langevin.state.copy())
         return np.asarray(samples)
 
+    def setup_target(self, target_info, form_covariances, parametric=False):
+        """
+        This computes the matrices used in the linear decomposition
+        that will be used in computing weights for the sampler.
+        """
+
+        self.score_cov = []
+        target_cov_sum = 0
+
+        # we could pararallelize this over all views at once
+
+        for i in range(self.nqueries):
+            view = self.objectives[i]
+            score_info = view.setup_sampler(form_covariances)
+            if parametric == False:
+                target_cov, cross_cov = form_covariances(target_info,  
+                                                         cross_terms=[score_info],
+                                                         nsample=self.multi_view.nboot[i])
+            else:
+                target_cov, cross_cov = form_covariances(target_info, 
+                                                         cross_terms=[score_info])
+
+            target_cov_sum += target_cov
+            self.score_cov.append(cross_cov)
+
+        self.target_cov = target_cov_sum / self.nqueries
+
     def hypothesis_test(self,
                         test_stat,
                         observed_value,
@@ -1287,13 +1319,18 @@ def confidence_intervals(self,
         if sample is None:
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
 
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
+        _intervals = opt_weighted_intervals(self,
+                                            sample,
+                                            observed_target)
 
-        return intervals_instance.confidence_intervals_all(level=level)
+        limits = []
+
+        for i in range(observed_target.shape[0]):
+            keep = np.zeros_like(observed_target)
+            keep[i] = 1.
+            limits.append(_intervals.confidence_interval(keep, level=level))
+
+        return np.array(limits)
 
     def coefficient_pvalues(self,
                             observed,
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
index 337b6a042..e0eb58e81 100644
--- a/selection/randomized/tests/test_optimization_sampler.py
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -14,13 +14,15 @@
 def test_optimization_sampler(ndraw=1000, burnin=200):
 
     cls = lasso
-    for const_info, rand in product(zip([gaussian_instance,
-                                         logistic_instance,
-                                         poisson_instance],
-                                        [cls.gaussian,
-                                         cls.logistic,
-                                         cls.poisson]),
-                              ['gaussian', 'logistic', 'laplace']):
+    for const_info, rand, marginalize, condition in product(zip([gaussian_instance,
+                                                                 logistic_instance,
+                                                                 poisson_instance],
+                                                                [cls.gaussian,
+                                                                 cls.logistic,
+                                                                 cls.poisson]),
+                                                                ['gaussian', 'logistic', 'laplace'],
+                                                                [False, True],
+                                                                [False, True]):
 
         inst, const = const_info
         X, Y = inst()[:2]
@@ -30,20 +32,21 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
         conv = const(X, Y, W, randomizer=rand)
         signs = conv.fit()
 
-        marginalizing_groups = np.zeros(p, np.bool)
-        marginalizing_groups[:int(p/2)] = True
-        
-        conditioning_groups = ~marginalizing_groups
-        conditioning_groups[-int(p/4):] = False
+        if marginalize:
+            marginalizing_groups = np.zeros(p, np.bool)
+            marginalizing_groups[:int(p/2)] = True
+        else:
+            marginalizing_groups = None
+
+        if condition:
+            conditioning_groups = ~marginalizing_groups
+            conditioning_groups[-int(p/4):] = False
+        else:
+            conditioning_groups = None
 
         selected_features = np.zeros(p, np.bool)
         selected_features[:3] = True
 
-        conv.summary(selected_features,
-                     ndraw=ndraw,
-                     burnin=burnin,
-                     compute_intervals=True)
-
         conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
                                    conditioning_groups=conditioning_groups)
 
@@ -53,3 +56,4 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
                                   burnin,
                                   stepsize=1.e-3)
 
+        stop

From e3f48ac8381943cbfa24aa32e63f911aeb1c50c7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 6 Sep 2017 22:42:15 -0700
Subject: [PATCH 164/617] jelena's opt intervals

---
 selection/randomized/query.py | 114 +++++++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 18b965381..6b6e5dfcd 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1430,13 +1430,11 @@ def reconstruction_map(self, state):
         '''
 
         state = np.atleast_2d(state)
-        #print(state.shape)
         if len(state.shape) > 2:
             raise ValueError('expecting at most 2-dimensional array')
 
         target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
-        #reconstructed = np.zeros((opt_state.shape[0],self.randomization_length_total))
 
         for i in range(self.nqueries):
             reconstructed[:, self.randomization_slice[i]] = self.objectives[i].reconstruction_map(target_state,
@@ -1875,3 +1873,115 @@ def _weights(self, candidate):
         return candidate_sample, np.exp(_logratio)
 
 
+class opt_weighted_intervals(object): # intervals_from_sample):
+
+    """
+    Location family based intervals... (cryptic)
+    randomization density should be `g` composed with the affine
+    mapping and take an argument like one row of sample
+    target_linear is the linear part of the affine mapping with
+    respect to target
+    weights for a given candidate will look like
+          randomization_density(sample + (candidate, 0, 0) - (reference, 0, 0)) /
+          randomization_density(sample)
+    if the samples are samples of \bar{\beta}. if we have samples of
+    \Delta from our reference, then the weights will look like
+    randomization_density(sample + (candidate, 0, 0))
+    randomization_density(sample + (reference, 0, 0))
+    WE ARE ASSUMING sample is sampled from targeted_sampler.reference
+    """
+
+    def __init__(self,
+                 targeted_sampler,
+                 sample,
+                 observed):
+
+        self.targeted_sampler = targeted_sampler
+        self.observed = observed.copy() # this is our observed unpenalized estimator
+        nactive = targeted_sampler.observed_target_state.shape[0]
+
+        self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), cov=targeted_sampler.target_cov, size =(sample.shape[0]))
+        print(self._normal_sample.shape)
+        self._sample = np.concatenate((sample, np.tile(self.observed, (sample.shape[0], 1))), axis=1)
+        self._logden = targeted_sampler.log_randomization_density(self._sample)
+        self._delta = np.concatenate((sample, self._normal_sample), axis=1)
+
+
+    def pivot(self,
+              linear_func,
+              candidate,
+              alternative='twosided'):
+        '''
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalue : np.float
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        observed_stat = self.targeted_sampler.observed_target_state.dot(linear_func)
+
+        candidate_sample, weights = self._weights(linear_func, candidate)
+
+        sample_stat = np.array([linear_func.dot(s) for s in candidate_sample[:, self.targeted_sampler.target_slice]])
+
+        pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
+
+        if alternative == 'twosided':
+            return 2 * min(pivot, 1 - pivot)
+        elif alternative == 'less':
+            return pivot
+        else:
+            return 1 - pivot
+
+    def confidence_interval(self, linear_func, level=0.90, how_many_sd=20):
+
+        target_delta = self._delta[:,self.targeted_sampler.target_slice]
+        projected_delta = target_delta.dot(linear_func)
+        projected_observed = self.observed.dot(linear_func)
+        std_projected_delta = np.sqrt(np.dot(linear_func.T, self.targeted_sampler.target_cov).dot(linear_func))
+
+        delta_min, delta_max = projected_delta.min(), projected_delta.max()
+
+        _norm = np.linalg.norm(linear_func)
+        grid_min, grid_max = -how_many_sd * np.std(projected_delta), how_many_sd * np.std(projected_delta)
+        print("grid", grid_min, grid_max)
+
+        def _rootU(gamma):
+            return self.pivot(linear_func,
+                              projected_observed + gamma,
+                              alternative='less') - (1 - level) / 2.
+        def _rootL(gamma):
+            return self.pivot(linear_func,
+                              projected_observed + gamma,
+                              alternative='less') - (1 + level) / 2.
+
+        upper = bisect(_rootU, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
+        lower = bisect(_rootL, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
+
+        return lower + projected_observed, upper + projected_observed
+
+    # Private methods
+
+    def _weights(self, linear_func, candidate):
+
+        candidate_sample = self._sample.copy()
+
+        _norm = np.linalg.norm(linear_func)
+        projection_matrix = np.true_divide(np.dot(linear_func, linear_func.T), _norm**2)
+        residual_matrix = np.identity(linear_func.shape[0])-projection_matrix
+        candidate_sample[:, self.targeted_sampler.target_slice] = \
+            candidate_sample[:, self.targeted_sampler.target_slice].dot(residual_matrix)
+
+        candidate_sample[:, self.targeted_sampler.target_slice] += \
+            (self._normal_sample+np.ones(self._normal_sample.shape)*candidate).dot(projection_matrix)
+
+        _lognum = self.targeted_sampler.log_randomization_density(candidate_sample)
+
+        _logratio = _lognum - self._logden
+        _logratio -= _logratio.max()
+
+        return candidate_sample, np.exp(_logratio)

From 2fb0c2b79e79a27c756f2f18dcd980db6df6b085 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 6 Sep 2017 22:59:18 -0700
Subject: [PATCH 165/617] removing translate_intervals

---
 selection/randomized/query.py | 468 +---------------------------------
 1 file changed, 2 insertions(+), 466 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index aa73d24a2..d12852e8b 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -851,177 +851,6 @@ def log_randomization_density(self, state):
             value += log_dens(reconstructed[:,self.opt_slice[i]])
         return np.squeeze(value)
 
-
-    def hypothesis_test_translate(self,
-                                  sample,
-                                  test_stat,
-                                  observed_target,
-                                  parameter=None,
-                                  alternative='twosided'):
-
-        '''
-        Carry out a hypothesis test
-        based on the distribution of the
-        residual `observed_target - target`
-        sampled at `self.reference`.
-        Parameters
-        ----------
-        sample : np.array
-           Sample of target and optimization variables drawn at `self.reference`.
-        test_stat : callable
-           Test statistic to evaluate on sample from
-           selective distribution.
-        observed_target : np.float
-           Observed value of target estimate.
-           Used in p-value calculation.
-        parameter : np.float (optional)
-           If not None, defaults to `self.reference`.
-           Otherwise, sample is reweighted using Gaussian tilting.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        _intervals = translate_intervals(self,
-                                         sample,
-                                         observed_target)
-
-        if parameter is None:
-            parameter = self.reference
-
-        return _intervals.pivot(test_stat,
-                                parameter,
-                                alternative=alternative)
-
-
-    def confidence_intervals_translate(self,
-                                       observed_target,
-                                       ndraw=10000,
-                                       burnin=2000,
-                                       stepsize=None,
-                                       sample=None,
-                                       level=0.9):
-        '''
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        level : float (optional)
-            Specify the
-            confidence level.
-        Notes
-        -----
-        Construct selective confidence intervals
-        for each parameter of the target.
-        Returns
-        -------
-        intervals : [(float, float)]
-            List of confidence intervals.
-        '''
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize, keep_opt=True)
-
-        _intervals = translate_intervals(self,
-                                         sample,
-                                         observed_target)
-
-        limits = []
-
-        for i in range(observed_target.shape[0]):
-            keep = np.zeros_like(observed_target)
-            keep[i] = 1.
-            limits.append(_intervals.confidence_interval(keep, level=level))
-
-        return np.array(limits)
-
-    def coefficient_pvalues_translate(self,
-                                      observed_target,
-                                      parameter=None,
-                                      ndraw=10000,
-                                      burnin=2000,
-                                      stepsize=None,
-                                      sample=None,
-                                      alternative='twosided'):
-        '''
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        parameter : np.float (optional)
-            A vector of parameters with shape `self.shape`
-            at which to evaluate p-values. Defaults
-            to `np.zeros(self.shape)`.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalues : np.float
-            P values for each coefficient.
-
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize, keep_opt=True)
-
-        if parameter is None:
-            parameter = np.zeros_like(observed_target)
-
-        _intervals = translate_intervals(self,
-                                         sample,
-                                         observed_target)
-
-        pvalues = []
-
-        for i in range(observed_target.shape[0]):
-            keep = np.zeros_like(observed_target)
-            keep[i] = 1.
-
-            _parameter = self.reference.copy()
-            _parameter[i] = parameter[i]
-            pvalues.append(_intervals.pivot(lambda x: keep.dot(x),
-                                            _parameter,
-                                            alternative=alternative))
-
-        return np.array(pvalues)
-
-
 class optimization_sampler(targeted_sampler):
 
     '''
@@ -1463,178 +1292,6 @@ def log_randomization_density(self, state):
             value += log_dens(reconstructed[:,self.opt_slice[i]])
         return np.squeeze(value)
 
-
-    def hypothesis_test_translate(self,
-                                  sample,
-                                  test_stat,
-                                  observed_target,
-                                  parameter=None,
-                                  alternative='twosided'):
-
-        '''
-        Carry out a hypothesis test
-        based on the distribution of the
-        residual `observed_target - target`
-        sampled at `self.reference`.
-        Parameters
-        ----------
-        sample : np.array
-           Sample of target and optimization variables drawn at `self.reference`.
-        test_stat : callable
-           Test statistic to evaluate on sample from
-           selective distribution.
-        observed_target : np.float
-           Observed value of target estimate.
-           Used in p-value calculation.
-        parameter : np.float (optional)
-           If not None, defaults to `self.reference`.
-           Otherwise, sample is reweighted using Gaussian tilting.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        _intervals = translate_intervals(self,
-                                         sample,
-                                         observed_target)
-
-        if parameter is None:
-            parameter = self.reference
-
-        return _intervals.pivot(test_stat,
-                                parameter,
-                                alternative=alternative)
-
-
-    def confidence_intervals_translate(self,
-                                       observed_target,
-                                       ndraw=10000,
-                                       burnin=2000,
-                                       stepsize=None,
-                                       sample=None,
-                                       level=0.9):
-        '''
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        level : float (optional)
-            Specify the
-            confidence level.
-        Notes
-        -----
-        Construct selective confidence intervals
-        for each parameter of the target.
-        Returns
-        -------
-        intervals : [(float, float)]
-            List of confidence intervals.
-        '''
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize, keep_opt=True)
-
-        _intervals = translate_intervals(self,
-                                         sample,
-                                         observed_target)
-
-        limits = []
-
-        for i in range(observed_target.shape[0]):
-            keep = np.zeros_like(observed_target)
-            keep[i] = 1.
-            limits.append(_intervals.confidence_interval(keep, level=level))
-
-        return np.array(limits)
-
-    def coefficient_pvalues_translate(self,
-                                      observed_target,
-                                      parameter=None,
-                                      ndraw=10000,
-                                      burnin=2000,
-                                      stepsize=None,
-                                      sample=None,
-                                      alternative='twosided'):
-        '''
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        parameter : np.float (optional)
-            A vector of parameters with shape `self.shape`
-            at which to evaluate p-values. Defaults
-            to `np.zeros(self.shape)`.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalues : np.float
-            P values for each coefficient.
-
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize, keep_opt=True)
-
-        if parameter is None:
-            parameter = np.zeros_like(observed_target)
-
-        _intervals = translate_intervals(self,
-                                         sample,
-                                         observed_target)
-
-        pvalues = []
-
-        for i in range(observed_target.shape[0]):
-            keep = np.zeros_like(observed_target)
-            keep[i] = 1.
-
-            _parameter = self.reference.copy()
-            _parameter[i] = parameter[i]
-            pvalues.append(_intervals.pivot(lambda x: keep.dot(x),
-                                            _parameter,
-                                            alternative=alternative))
-
-        return np.array(pvalues)
-
-
-
 class bootstrapped_target_sampler(targeted_sampler):
 
     # make one of these for each hypothesis test
@@ -1767,127 +1424,7 @@ def naive_pvalues(target, observed, parameter):
         pvalues[j] = 2*min(pval, 1-pval)
     return pvalues
 
-
-class translate_intervals(object): # intervals_from_sample):
-
-    """
-    Location family based intervals... (cryptic)
-    randomization density should be `g` composed with the affine
-    mapping and take an argument like one row of sample
-    target_linear is the linear part of the affine mapping with
-    respect to target
-    weights for a given candidate will look like
-          randomization_density(sample + (candidate, 0, 0) - (reference, 0, 0)) /
-          randomization_density(sample)
-    if the samples are samples of \bar{\beta}. if we have samples of
-    \Delta from our reference, then the weights will look like
-    randomization_density(sample + (candidate, 0, 0))
-    randomization_density(sample + (reference, 0, 0))
-    WE ARE ASSUMING sample is sampled from targeted_sampler.reference
-    """
-
-    def __init__(self,
-                 targeted_sampler,
-                 sample,
-                 observed):
-        self.targeted_sampler = targeted_sampler
-        self.observed = observed.copy() # this is our observed unpenalized estimator
-        self._logden = targeted_sampler.log_randomization_density(sample)
-        self._delta = sample.copy()
-        self._delta[:, targeted_sampler.target_slice] -= targeted_sampler.reference[None, :]
-
-    def pivot(self,
-              test_statistic,
-              candidate,
-              alternative='twosided'):
-        '''
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalue : np.float
-
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        observed_delta = self.observed - candidate
-        observed_stat = test_statistic(observed_delta)
-
-        candidate_sample, weights = self._weights(candidate)
-        #sample_stat = np.array([test_statistic(s) for s in candidate_sample[:, self.targeted_sampler.target_slice]])
-        sample_stat = np.array([test_statistic(s) for s in self._delta[:, self.targeted_sampler.target_slice]])
-
-        pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
-
-        if alternative == 'twosided':
-            return 2 * min(pivot, 1 - pivot)
-        elif alternative == 'less':
-            return pivot
-        else:
-            return 1 - pivot
-
-    def confidence_interval(self, linear_func, level=0.95, how_many_sd=20):
-
-        target_delta = self._delta[:,self.targeted_sampler.target_slice]
-        projected_delta = target_delta.dot(linear_func)
-        projected_observed = self.observed.dot(linear_func)
-
-        delta_min, delta_max = projected_delta.min(), projected_delta.max()
-
-        _norm = np.linalg.norm(linear_func)
-        grid_min, grid_max = -how_many_sd * np.std(projected_delta), how_many_sd * np.std(projected_delta)
-
-        reference = self.targeted_sampler.reference
-
-        def _rootU(gamma):
-            return self.pivot(lambda x: linear_func.dot(x),
-                              reference + gamma * linear_func / _norm**2,
-                              alternative='less') - (1 - level) / 2.
-
-
-        def _rootL(gamma):
-            return self.pivot(lambda x: linear_func.dot(x),
-                              reference + gamma * linear_func / _norm**2,
-                              alternative='less') - (1 + level) / 2.
-
-        upper = bisect(_rootU, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
-        lower = bisect(_rootL, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
-
-        return lower + projected_observed, upper + projected_observed
-
-    # Private methods
-
-    def _weights(self, candidate):
-
-        candidate_sample = self._delta.copy()
-        candidate_sample[:, self.targeted_sampler.target_slice] += candidate[None, :]
-        _lognum = self.targeted_sampler.log_randomization_density(candidate_sample)
-
-        _logratio = _lognum - self._logden
-        _logratio -= _logratio.max()
-
-        return candidate_sample, np.exp(_logratio)
-
-
-class opt_weighted_intervals(object): # intervals_from_sample):
-
-    """
-    Location family based intervals... (cryptic)
-    randomization density should be `g` composed with the affine
-    mapping and take an argument like one row of sample
-    target_linear is the linear part of the affine mapping with
-    respect to target
-    weights for a given candidate will look like
-          randomization_density(sample + (candidate, 0, 0) - (reference, 0, 0)) /
-          randomization_density(sample)
-    if the samples are samples of \bar{\beta}. if we have samples of
-    \Delta from our reference, then the weights will look like
-    randomization_density(sample + (candidate, 0, 0))
-    randomization_density(sample + (reference, 0, 0))
-    WE ARE ASSUMING sample is sampled from targeted_sampler.reference
-    """
+class opt_weighted_intervals(object):
 
     def __init__(self,
                  targeted_sampler,
@@ -1899,12 +1436,11 @@ def __init__(self,
         nactive = targeted_sampler.observed_target_state.shape[0]
 
         self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), cov=targeted_sampler.target_cov, size =(sample.shape[0]))
-        print(self._normal_sample.shape)
+
         self._sample = np.concatenate((sample, np.tile(self.observed, (sample.shape[0], 1))), axis=1)
         self._logden = targeted_sampler.log_randomization_density(self._sample)
         self._delta = np.concatenate((sample, self._normal_sample), axis=1)
 
-
     def pivot(self,
               linear_func,
               candidate,

From b3f6ea854bed3a868fd0ab467e41dfdb99010c6c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 6 Sep 2017 23:19:01 -0700
Subject: [PATCH 166/617] WIP -- rewriting the weights method

---
 selection/randomized/query.py | 44 +++++++++++++++--------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index d12852e8b..83d579bea 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -823,9 +823,10 @@ def reconstruction_map(self, state):
         #reconstructed = np.zeros((opt_state.shape[0],self.randomization_length_total))
 
         for i in range(self.nqueries):
-            reconstructed[:, self.randomization_slice[i]] = self.objectives[i].reconstruction_map(target_state,
-                                                                                        self.target_transform[i],
-                                                                                        opt_state[:, self.opt_slice[i]])
+            reconstructed[:, self.randomization_slice[i]] = \
+                   self.objectives[i].reconstruction_map(target_state,
+                                                         self.target_transform[i],
+                                                         opt_state[:, self.opt_slice[i]])
 
         return np.squeeze(reconstructed)
 
@@ -1146,7 +1147,7 @@ def confidence_intervals(self,
         if sample is None:
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
 
-        _intervals = opt_weighted_intervals(self,
+        _intervals = optimization_intervals(self,
                                             sample,
                                             observed_target)
 
@@ -1424,21 +1425,22 @@ def naive_pvalues(target, observed, parameter):
         pvalues[j] = 2*min(pval, 1-pval)
     return pvalues
 
-class opt_weighted_intervals(object):
+class optimization_intervals(object):
 
     def __init__(self,
-                 targeted_sampler,
+                 opt_sampler,
                  sample,
                  observed):
 
-        self.targeted_sampler = targeted_sampler
+        self.opt_sampler = opt_sampler
         self.observed = observed.copy() # this is our observed unpenalized estimator
-        nactive = targeted_sampler.observed_target_state.shape[0]
 
-        self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), cov=targeted_sampler.target_cov, size =(sample.shape[0]))
+        self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), 
+                                                            cov=opt_sampler.target_cov, 
+                                                            size=(sample.shape[0],))
 
-        self._sample = np.concatenate((sample, np.tile(self.observed, (sample.shape[0], 1))), axis=1)
-        self._logden = targeted_sampler.log_randomization_density(self._sample)
+        self._sample = sample
+        self._logden = opt_sampler.log_randomization_density(self._sample)
         self._delta = np.concatenate((sample, self._normal_sample), axis=1)
 
     def pivot(self,
@@ -1456,12 +1458,10 @@ def pivot(self,
         if alternative not in ['greater', 'less', 'twosided']:
             raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
 
-        observed_stat = self.targeted_sampler.observed_target_state.dot(linear_func)
+        observed_stat = self.observed.dot(linear_func)
+        sample_stat = self._normal_sample.dot(linear_func)
 
         candidate_sample, weights = self._weights(linear_func, candidate)
-
-        sample_stat = np.array([linear_func.dot(s) for s in candidate_sample[:, self.targeted_sampler.target_slice]])
-
         pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
 
         if alternative == 'twosided':
@@ -1504,16 +1504,10 @@ def _weights(self, linear_func, candidate):
 
         candidate_sample = self._sample.copy()
 
-        _norm = np.linalg.norm(linear_func)
-        projection_matrix = np.true_divide(np.dot(linear_func, linear_func.T), _norm**2)
-        residual_matrix = np.identity(linear_func.shape[0])-projection_matrix
-        candidate_sample[:, self.targeted_sampler.target_slice] = \
-            candidate_sample[:, self.targeted_sampler.target_slice].dot(residual_matrix)
-
-        candidate_sample[:, self.targeted_sampler.target_slice] += \
-            (self._normal_sample+np.ones(self._normal_sample.shape)*candidate).dot(projection_matrix)
-
-        _lognum = self.targeted_sampler.log_randomization_density(candidate_sample)
+        # Here we should loop through the views
+        # and move the score of each view 
+        # for each projected (through linear_func) normal sample
+        # using the linear decomposition
 
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()

From e65c4ab4822c3ec81a7fd0486fd27887b4c868d5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 6 Sep 2017 23:43:34 -0700
Subject: [PATCH 167/617] storing necessary quantities to form weights --
 removing ppf

---
 selection/randomized/query.py         | 51 ++++++++++++++++++++++-----
 selection/randomized/randomization.py |  4 ---
 2 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 83d579bea..132effa1a 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -102,13 +102,12 @@ def linear_decomposition(self, target_score_cov, target_cov, observed_target_sta
 
         return (composition_linear_part, composition_offset)
 
-
     def reconstruction_map(self, data_state, data_transform, opt_state):
 
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-        # reconstruction of randoimzation omega
+        # reconstruction of randomization omega
 
         data_state = np.atleast_2d(data_state)
         opt_linear, opt_offset = self.opt_transform
@@ -897,6 +896,7 @@ def __init__(self,
         self.nqueries = len(multi_view.objectives)
         self.opt_slice = multi_view.opt_slice
         self.objectives = multi_view.objectives
+        self.nboot = multi_view.nboot
 
         self.total_randomization_length = multi_view.total_randomization_length
         self.randomization_slice = multi_view.randomization_slice
@@ -1016,23 +1016,25 @@ def setup_target(self, target_info, form_covariances, parametric=False):
         self.score_cov = []
         target_cov_sum = 0
 
-        # we could pararallelize this over all views at once
-
+        # we should pararallelize this over all views at once ?
+        self.observed_score = []
         for i in range(self.nqueries):
             view = self.objectives[i]
             score_info = view.setup_sampler(form_covariances)
             if parametric == False:
                 target_cov, cross_cov = form_covariances(target_info,  
                                                          cross_terms=[score_info],
-                                                         nsample=self.multi_view.nboot[i])
+                                                         nsample=self.nboot[i])
             else:
                 target_cov, cross_cov = form_covariances(target_info, 
                                                          cross_terms=[score_info])
 
             target_cov_sum += target_cov
             self.score_cov.append(cross_cov)
+            self.observed_score.append(view.observed_score_state)
 
         self.target_cov = target_cov_sum / self.nqueries
+        self.target_invcov = np.linalg.inv(self.target_cov)
 
     def hypothesis_test(self,
                         test_stat,
@@ -1461,7 +1463,23 @@ def pivot(self,
         observed_stat = self.observed.dot(linear_func)
         sample_stat = self._normal_sample.dot(linear_func)
 
-        candidate_sample, weights = self._weights(linear_func, candidate)
+        target_cov = linear_func.dot(self.target_cov.dot(linear_func))
+
+        nuisance = []
+        score_cov = []
+        for i in range(len(self.objectives)):
+            cur_score_cov = linear_func.dot(self.score_cov[i])
+            cur_nuisance = self.observed_score[i] - cur_score_cov * observed_stat / target_cov
+            nuisance.append(cur_nuisance)
+            score_cov.append(cur_score_cov)
+
+        candidate_sample, weights = self._weights(linear_func, 
+                                                  candidate, 
+                                                  observed_stat, 
+                                                  sample_stat, 
+                                                  nuisance,
+                                                  score_cov)
+
         pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
 
         if alternative == 'twosided':
@@ -1500,15 +1518,32 @@ def _rootL(gamma):
 
     # Private methods
 
-    def _weights(self, linear_func, candidate):
+    def _weights(self, 
+                 linear_func, 
+                 candidate, 
+                 observed_stat, 
+                 sample_stat,
+                 nuisance,
+                 score_cov):
 
-        candidate_sample = self._sample.copy()
+        candidate_sample = sample_stat.copy()
 
         # Here we should loop through the views
         # and move the score of each view 
         # for each projected (through linear_func) normal sample
         # using the linear decomposition
 
+        # We need access to the map that takes observed_score for each view
+        # and constructs the full randomization -- this is the reconstruction map
+        # for each view
+
+        # The data state for each view will be set to be N_i + A_i \hat{\theta}_i
+        # where N_i is the nuisance sufficient stat for the i-th view's
+        # data with respect to \hat{\theta} and N_i  will not change because
+        # it depends on the observed \hat{\theta} and observed score of i-th view
+
+        # In this function, \hat{\theta}_i will change with the Monte Carlo sample
+
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
 
diff --git a/selection/randomized/randomization.py b/selection/randomized/randomization.py
index 8104a834d..debd91781 100644
--- a/selection/randomized/randomization.py
+++ b/selection/randomized/randomization.py
@@ -16,7 +16,6 @@ def __init__(self,
                  density,
                  cdf,
                  pdf,
-                 ppf,
                  derivative_log_density,
                  grad_negative_log_density,
                  sampler,
@@ -31,7 +30,6 @@ def __init__(self,
         self._density = density
         self._cdf = cdf
         self._pdf = pdf
-        self._ppf = ppf
         self._derivative_log_density = derivative_log_density
         self._grad_negative_log_density = grad_negative_log_density
         self._sampler = sampler
@@ -179,7 +177,6 @@ def laplace(shape, scale):
         sampler = lambda size: rv.rvs(size=shape + size)
         cdf = lambda x: laplace.cdf(x, loc=0., scale = scale)
         pdf = lambda x: laplace.pdf(x, loc=0., scale = scale)
-        ppf = lambda x: laplace.ppf(x, loc=0, scale=scale)
         derivative_log_density = lambda x: -np.sign(x)/scale
         grad_negative_log_density = lambda x: np.sign(x) / scale
         sampler = lambda size: rv.rvs(size=shape + size)
@@ -191,7 +188,6 @@ def laplace(shape, scale):
                              density,
                              cdf,
                              pdf,
-                             ppf,
                              derivative_log_density,
                              grad_negative_log_density,
                              sampler,

From 9c51987a80882b9ecaa7fb58121f53231dc302a4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 7 Sep 2017 00:37:02 -0700
Subject: [PATCH 168/617] WIP: first draft of _weights

---
 selection/randomized/query.py                 | 55 ++++++++++---------
 .../tests/test_optimization_sampler.py        | 16 ++++--
 2 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 132effa1a..6cb028749 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -113,7 +113,11 @@ def reconstruction_map(self, data_state, data_transform, opt_state):
         opt_linear, opt_offset = self.opt_transform
 
         data_linear, data_offset = data_transform
-        data_piece = data_linear.dot(data_state.T) + data_offset[:, None]
+        if data_linear is not None:
+            data_piece = data_linear.dot(data_state) + data_offset
+        else:
+            data_piece = np.multiply.outer(data_offset, np.ones(opt_state.shape[0]))
+
         if opt_linear is not None:
             opt_state = np.atleast_2d(opt_state)
             opt_piece = opt_linear.dot(opt_state.T) + opt_offset[:, None]
@@ -1014,12 +1018,15 @@ def setup_target(self, target_info, form_covariances, parametric=False):
         """
 
         self.score_cov = []
+        self.observed_score = []
+        self.log_density = []
+
         target_cov_sum = 0
 
         # we should pararallelize this over all views at once ?
-        self.observed_score = []
         for i in range(self.nqueries):
             view = self.objectives[i]
+            self.log_density.append(view.log_randomization_density)
             score_info = view.setup_sampler(form_covariances)
             if parametric == False:
                 target_cov, cross_cov = form_covariances(target_info,  
@@ -1242,7 +1249,6 @@ def crude_lipschitz(self):
             lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
         return lipschitz
 
-
     def reconstruction_map(self, state):
         '''
         Reconstruction of randomization at current state.
@@ -1263,13 +1269,13 @@ def reconstruction_map(self, state):
         if len(state.shape) > 2:
             raise ValueError('expecting at most 2-dimensional array')
 
-        target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
 
         for i in range(self.nqueries):
-            reconstructed[:, self.randomization_slice[i]] = self.objectives[i].reconstruction_map(target_state,
-                                                                                        self.target_transform[i],
-                                                                                        opt_state[:, self.opt_slice[i]])
+            reconstructed[:,self.randomization_slice[i]] = self.objectives[i].reconstruction_map(  
+                0.,
+                self.target_transform[i],
+                state[:,self.opt_slice[i]])
 
         return np.squeeze(reconstructed)
 
@@ -1431,19 +1437,17 @@ class optimization_intervals(object):
 
     def __init__(self,
                  opt_sampler,
-                 sample,
+                 opt_sample,
                  observed):
 
-        self.opt_sampler = opt_sampler
+        self.reconstructed_sample = opt_sampler.reconstruction_map(opt_sample) 
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
         self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), 
                                                             cov=opt_sampler.target_cov, 
                                                             size=(sample.shape[0],))
 
-        self._sample = sample
-        self._logden = opt_sampler.log_randomization_density(self._sample)
-        self._delta = np.concatenate((sample, self._normal_sample), axis=1)
+        self._logden = opt_sampler.log_randomization_density(self.reconstructed_sample)
 
     def pivot(self,
               linear_func,
@@ -1467,18 +1471,17 @@ def pivot(self,
 
         nuisance = []
         score_cov = []
-        for i in range(len(self.objectives)):
-            cur_score_cov = linear_func.dot(self.score_cov[i])
+        for i in range(len(self.opt_sampler.objectives)):
+            cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
             cur_nuisance = self.observed_score[i] - cur_score_cov * observed_stat / target_cov
             nuisance.append(cur_nuisance)
             score_cov.append(cur_score_cov)
 
-        candidate_sample, weights = self._weights(linear_func, 
-                                                  candidate, 
-                                                  observed_stat, 
-                                                  sample_stat, 
-                                                  nuisance,
-                                                  score_cov)
+        candidate_sample, weights = self._weights(self.opt_sample,          # sample of optimization variables
+                                                  sample_stat + candidate,  # normal sample under candidate
+                                                  nuisance,                 # nuisance sufficient stats for each view
+                                                  score_cov,                # points will be moved like sample * score_cov
+                                                  self.opt_sampler.log_density)
 
         pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
 
@@ -1519,14 +1522,10 @@ def _rootL(gamma):
     # Private methods
 
     def _weights(self, 
-                 linear_func, 
-                 candidate, 
-                 observed_stat, 
                  sample_stat,
                  nuisance,
-                 score_cov):
-
-        candidate_sample = sample_stat.copy()
+                 score_cov,
+                 log_density):
 
         # Here we should loop through the views
         # and move the score of each view 
@@ -1544,6 +1543,10 @@ def _weights(self,
 
         # In this function, \hat{\theta}_i will change with the Monte Carlo sample
 
+        _lognum = 0
+        for i in range(len(log_density)):
+            density_arg = nuisance[i] + score_cov[i].dot(sample_stat)  
+            _lognum += log_density[i](density_arg)
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
 
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
index e0eb58e81..7341572e2 100644
--- a/selection/randomized/tests/test_optimization_sampler.py
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -39,7 +39,10 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
             marginalizing_groups = None
 
         if condition:
-            conditioning_groups = ~marginalizing_groups
+            if marginalizing_groups is not None:
+                conditioning_groups = ~marginalizing_groups
+            else:
+                conditioning_groups = np.ones(p, np.bool)
             conditioning_groups[-int(p/4):] = False
         else:
             conditioning_groups = None
@@ -50,10 +53,11 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
         conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
                                    conditioning_groups=conditioning_groups)
 
-        target_sampler = optimization_sampler(conv._queries)
+        opt_sampler = optimization_sampler(conv._queries)
 
-        S = target_sampler.sample(ndraw,
-                                  burnin,
-                                  stepsize=1.e-3)
+        S = opt_sampler.sample(ndraw,
+                               burnin,
+                               stepsize=1.e-3)
 
-        stop
+        opt_sampler.reconstruction_map(S)
+        

From 544020835ae396e3a83d5ef3a8a318ab3a87f631 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 7 Sep 2017 00:40:11 -0700
Subject: [PATCH 169/617] comment

---
 selection/randomized/query.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 6cb028749..1a9b9048d 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1440,7 +1440,8 @@ def __init__(self,
                  opt_sample,
                  observed):
 
-        self.reconstructed_sample = opt_sampler.reconstruction_map(opt_sample) 
+        self.reconstructed_sample = opt_sampler.reconstruction_map(opt_sample) # observed_score + affine(opt_sample)
+
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
         self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), 

From 6dd45ccf77ddba814f78ba2131243d7c6c1c20f9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 7 Sep 2017 00:44:35 -0700
Subject: [PATCH 170/617] no more a subclass

---
 selection/randomized/query.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 1a9b9048d..e173e1c43 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -855,7 +855,7 @@ def log_randomization_density(self, state):
             value += log_dens(reconstructed[:,self.opt_slice[i]])
         return np.squeeze(value)
 
-class optimization_sampler(targeted_sampler):
+class optimization_sampler(object):
 
     '''
     Object to sample only optimization variables of a selective sampler

From e66fee1658b29aab5e97201e25e453eaba2a9b87 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 7 Sep 2017 01:05:19 -0700
Subject: [PATCH 171/617] removing translate

---
 selection/randomized/tests/test_cv.py         |  39 ++--
 selection/randomized/tests/test_intervals.py  |  46 ++---
 .../tests/test_marginalize_subgrad.py         |  40 +---
 .../randomized/tests/test_multiple_queries.py | 171 ++----------------
 .../tests/test_multiple_queries_CI.py         |  58 ++----
 .../randomized/tests/test_multiple_splits.py  |  57 ++----
 .../tests/test_opt_weighted_intervals.py      |  17 +-
 .../randomized/tests/test_split_compare.py    |  57 ++----
 selection/randomized/tests/test_sqrt_lasso.py |  48 ++---
 9 files changed, 126 insertions(+), 407 deletions(-)

diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 11369632c..9d8563247 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -39,7 +39,6 @@ def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
             lam_frac = 1.,
             glmnet = True,
             loss = 'gaussian',
-            intervals = 'old',
             bootstrap = False,
             condition_on_CVR = True,
             marginalize_subgrad = True,
@@ -137,32 +136,18 @@ def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
                                                      mv,
                                                      bootstrap=bootstrap)
 
-        if intervals == 'old':
-            target_sample = target_sampler.sample(ndraw=ndraw,
-                                                  burnin=burnin)
-            LU = target_sampler.confidence_intervals(target_observed,
-                                                     sample=target_sample,
-                                                     level=0.9)
-
-            pivots_truth = target_sampler.coefficient_pvalues(target_observed,
-                                                              parameter=true_vec,
-                                                              sample=target_sample)
-            pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                         parameter=np.zeros_like(true_vec),
-                                                         sample=target_sample)
-        else:
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-            LU = target_sampler.confidence_intervals_translate(target_observed,
-                                                               sample=full_sample,
-                                                               level=0.9)
-            pivots_truth = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                        parameter=true_vec,
-                                                                        sample=full_sample)
-            pvalues = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                   parameter=np.zeros_like(true_vec),
-                                                                   sample=full_sample)
+        target_sample = target_sampler.sample(ndraw=ndraw,
+                                              burnin=burnin)
+        LU = target_sampler.confidence_intervals(target_observed,
+                                                 sample=target_sample,
+                                                 level=0.9)
+
+        pivots_truth = target_sampler.coefficient_pvalues(target_observed,
+                                                          parameter=true_vec,
+                                                          sample=target_sample)
+        pvalues = target_sampler.coefficient_pvalues(target_observed,
+                                                     parameter=np.zeros_like(true_vec),
+                                                     sample=target_sample)
 
         L, U = LU.T
         sel_covered = np.zeros(nactive, np.bool)
diff --git a/selection/randomized/tests/test_intervals.py b/selection/randomized/tests/test_intervals.py
index 7ab3deebe..903794b67 100644
--- a/selection/randomized/tests/test_intervals.py
+++ b/selection/randomized/tests/test_intervals.py
@@ -30,7 +30,6 @@ def test_intervals(s=0,
                    burnin=2000, 
                    bootstrap=True,
                    loss='gaussian',
-                   intervals='old',
                    randomizer = 'laplace',
                    solve_args={'min_its':50, 'tol':1.e-10}):
 
@@ -87,37 +86,20 @@ def test_intervals(s=0,
                                                      mv,
                                                      bootstrap=bootstrap)
 
-        if intervals == 'old':
-            target_sample = target_sampler.sample(ndraw=ndraw,
-                                                  burnin=burnin)
-            LU = target_sampler.confidence_intervals(target_observed,
-                                                     sample=target_sample,
-                                                     level=0.9)
-            pivots_mle = target_sampler.coefficient_pvalues(target_observed,
-                                                            parameter=target_sampler.reference,
-                                                            sample=target_sample)
-            pivots_truth = target_sampler.coefficient_pvalues(target_observed,
-                                                          parameter=true_vec,
-                                                          sample=target_sample)
-            pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                     parameter=np.zeros_like(true_vec),
-                                                     sample=target_sample)
-        else:
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-            LU = target_sampler.confidence_intervals_translate(target_observed,
-                                                               sample=full_sample,
-                                                               level=0.9)
-            pivots_mle = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                            parameter=target_sampler.reference,
-                                                            sample=full_sample)
-            pivots_truth = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                          parameter=true_vec,
-                                                          sample=full_sample)
-            pvalues = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                     parameter=np.zeros_like(true_vec),
-                                                     sample=full_sample)
+        target_sample = target_sampler.sample(ndraw=ndraw,
+                                              burnin=burnin)
+        LU = target_sampler.confidence_intervals(target_observed,
+                                                 sample=target_sample,
+                                                 level=0.9)
+        pivots_mle = target_sampler.coefficient_pvalues(target_observed,
+                                                        parameter=target_sampler.reference,
+                                                        sample=target_sample)
+        pivots_truth = target_sampler.coefficient_pvalues(target_observed,
+                                                      parameter=true_vec,
+                                                      sample=target_sample)
+        pvalues = target_sampler.coefficient_pvalues(target_observed,
+                                                 parameter=np.zeros_like(true_vec),
+                                                 sample=target_sample)
 
         LU_naive = naive_confidence_intervals(target_sampler, target_observed)
 
diff --git a/selection/randomized/tests/test_marginalize_subgrad.py b/selection/randomized/tests/test_marginalize_subgrad.py
index 967ba0a82..3c1c8bf3c 100644
--- a/selection/randomized/tests/test_marginalize_subgrad.py
+++ b/selection/randomized/tests/test_marginalize_subgrad.py
@@ -45,8 +45,7 @@ def test_marginalize(s=4,
                      nviews=3,
                      scalings=True,
                      subgrad =True,
-                     parametric=False,
-                     intervals='old'):
+                     parametric=False):
     print(n,p,s)
 
     if randomizer == 'laplace':
@@ -121,35 +120,14 @@ def test_marginalize(s=4,
                                                      parametric=parametric)
                                                      #reference= beta[active_union])
 
-        if intervals=='old':
-            target_sample = target_sampler.sample(ndraw=ndraw,
-                                                  burnin=burnin)
-            LU = target_sampler.confidence_intervals(target_observed,
-                                                     sample=target_sample,
-                                                     level=0.9)
-            pivots = target_sampler.coefficient_pvalues(target_observed,
-                                                        parameter=true_vec,
-                                                        sample=target_sample)
-        elif intervals=='new':
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-            LU = target_sampler.confidence_intervals_translate(target_observed,
-                                                           sample=full_sample,
-                                                           level=0.9)
-            pivots = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                    parameter=true_vec,
-                                                                    sample=full_sample)
-
-        #test_stat = lambda x: np.linalg.norm(x - beta[active_union])
-        #observed_test_value = test_stat(target_observed)
-        #pivots = target_sampler.hypothesis_test(test_stat,
-        #                                       observed_test_value,
-        #                                       alternative='twosided',
-        #                                       parameter = beta[active_union],
-        #                                       ndraw=ndraw,
-        #                                       burnin=burnin,
-        #                                       stepsize=None)
+        target_sample = target_sampler.sample(ndraw=ndraw,
+                                              burnin=burnin)
+        LU = target_sampler.confidence_intervals(target_observed,
+                                                 sample=target_sample,
+                                                 level=0.9)
+        pivots = target_sampler.coefficient_pvalues(target_observed,
+                                                    parameter=true_vec,
+                                                    sample=target_sample)
 
         def coverage(LU):
             L, U = LU[:, 0], LU[:, 1]
diff --git a/selection/randomized/tests/test_multiple_queries.py b/selection/randomized/tests/test_multiple_queries.py
index 27d17fdec..85ce218ac 100644
--- a/selection/randomized/tests/test_multiple_queries.py
+++ b/selection/randomized/tests/test_multiple_queries.py
@@ -111,155 +111,14 @@ def test_multiple_queries(s=3,
                                        burnin=burnin,
                                        keep_opt=True)
 
-        pivot = target_sampler.hypothesis_test_translate(full_sample,
-                                                         test_stat,
-                                                         target_observed,
-                                                         alternative='twosided')
+        pivot = target_sampler.hypothesis_test(full_sample,
+                                               test_stat,
+                                               target_observed,
+                                               alternative='twosided')
 
         return [pivot], [False]
 
-@register_report(['pvalue', 'active'])
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-@set_seed_iftrue(SET_SEED)
-@wait_for_return_value()
-def test_multiple_queries_translate(s=3, n=200, p=20,
-                                    signal=7,
-                                    rho=0.1,
-                                    lam_frac=0.7,
-                                    nview=4,
-                                    ndraw=10000, burnin=2000,
-                                    bootstrap=True):
-
-    randomizer = randomization.laplace((p,), scale=1)
-    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal)
-
-    nonzero = np.where(beta)[0]
-    lam_frac = 1.
-
-    loss = rr.glm.logistic(X, y)
-    epsilon = 1.
-
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
-    W = np.ones(p)*lam
-    W[0] = 0 # use at least some unpenalized
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    view = []
-    for i in range(nview):
-        view.append(glm_group_lasso(loss, epsilon, penalty, randomizer))
-
-    mv = multiple_queries(view)
-    mv.solve()
 
-    active_union = np.zeros(p, np.bool)
-    for i in range(nview):
-        active_union += view[i].selection_variable['variables']
-
-    nactive = np.sum(active_union)
-    print("nactive", nactive)
-
-    if set(nonzero).issubset(np.nonzero(active_union)[0]):
-        if nactive==s:
-            return None
-
-        active_set = np.nonzero(active_union)[0]
-
-        inactive_selected = np.array([active_union[i] and i not in nonzero for i in range(p)])
-        true_active = (beta != 0)
-        reference = np.zeros(inactive_selected.sum())
-        target_sampler, target_observed = glm_target(loss,
-                                                     active_union,
-                                                     mv,
-                                                     subset=inactive_selected,
-                                                     bootstrap=bootstrap,
-                                                     reference=reference)
-
-        test_stat = lambda x: np.linalg.norm(x)
-        observed_test_value = test_stat(target_observed)
-
-        full_sample = target_sampler.sample(ndraw=ndraw,
-                                            burnin=burnin,
-                                            keep_opt=True)
-
-        pivot = target_sampler.hypothesis_test_translate(full_sample,
-                                                         test_stat,
-                                                         target_observed,
-                                                         alternative='twosided')
-
-        return [pivot], [False]
-
-@register_report(['truth', 'active'])
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=100, burnin=100)
-@set_seed_iftrue(SET_SEED)
-@wait_for_return_value()
-def test_multiple_queries_individual_coeff(s=3,
-                                           n=100,
-                                           p=10,
-                                           signal=7,
-                                           rho=0.1,
-                                           lam_frac=0.7,
-                                           nview=4,
-                                           ndraw=10000, burnin=2000,
-                                           bootstrap=True):
-
-    randomizer = randomization.laplace((p,), scale=1)
-    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal)
-
-    nonzero = np.where(beta)[0]
-
-    loss = rr.glm.logistic(X, y)
-    epsilon = 1.
-
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
-    W = np.ones(p)*lam
-    #W[0] = 0 # use at least some unpenalized
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    view = []
-    for i in range(nview):
-        view.append(glm_group_lasso(loss, epsilon, penalty, randomizer))
-
-    mv = multiple_queries(view)
-    mv.solve()
-
-    active_union = np.zeros(p, np.bool)
-    for i in range(nview):
-        active_union += view[i].selection_variable['variables']
-
-    nactive = np.sum(active_union)
-    print("nactive", nactive)
-    active_set = np.nonzero(active_union)[0]
-
-    pvalues = []
-    true_beta = beta[active_union]
-    if set(nonzero).issubset(np.nonzero(active_union)[0]):
-        for j in range(nactive):
-
-            subset = np.zeros(p, np.bool)
-            subset[active_set[j]] = True
-            target_sampler, target_observed = glm_target(loss,
-                                                         active_union,# * ~subset,
-                                                         mv,
-                                                         subset=subset,
-                                                         reference = true_beta[j],
-                                                         #reference=np.zeros((1,)),
-                                                         bootstrap=bootstrap)
-            test_stat = lambda x: np.atleast_1d(x-true_beta[j])
-
-            pval = target_sampler.hypothesis_test(test_stat,
-                                                  np.atleast_1d(target_observed-true_beta[j]),
-                                                  alternative='twosided',
-                                                  ndraw=ndraw,
-                                                  burnin=burnin)
-            pvalues.append(pval)
-
-        active_var = np.zeros_like(pvalues, np.bool)
-        _nonzero = np.array([i in nonzero for i in active_set])
-        active_var[_nonzero] = True
-
-        return pvalues, [active_set[j] in nonzero for j in range(nactive)]
 
 
 @register_report(['pvalue', 'active'])
@@ -329,13 +188,13 @@ def test_parametric_covariance(ndraw=10000, burnin=2000):
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @set_seed_iftrue(SET_SEED)
 @wait_for_return_value()
-def test_multiple_queries_translate(s=3, n=200, p=20,
-                                    signal=7,
-                                    rho=0.1,
-                                    lam_frac=0.7,
-                                    nview=4,
-                                    ndraw=10000, burnin=2000,
-                                    bootstrap=True):
+def test_multiple_queries(s=3, n=200, p=20,
+                          signal=7,
+                          rho=0.1,
+                          lam_frac=0.7,
+                          nview=4,
+                          ndraw=10000, burnin=2000,
+                          bootstrap=True):
 
     randomizer = randomization.laplace((p,), scale=1)
     X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal)
@@ -389,10 +248,10 @@ def test_multiple_queries_translate(s=3, n=200, p=20,
                                             burnin=burnin,
                                             keep_opt=True)
 
-        pivot = target_sampler.hypothesis_test_translate(full_sample,
-                                                         test_stat,
-                                                         target_observed,
-                                                         alternative='twosided')
+        pivot = target_sampler.hypothesis_test(full_sample,
+                                               test_stat,
+                                               target_observed,
+                                               alternative='twosided')
 
         return [pivot], [False]
 
diff --git a/selection/randomized/tests/test_multiple_queries_CI.py b/selection/randomized/tests/test_multiple_queries_CI.py
index b421aefbf..4fa9cd10c 100644
--- a/selection/randomized/tests/test_multiple_queries_CI.py
+++ b/selection/randomized/tests/test_multiple_queries_CI.py
@@ -27,7 +27,6 @@ def test_multiple_queries(s=3,
                          rho=0.1,
                          lam_frac=0.7,
                          nviews=4,
-                         intervals ='new',
                          ndraw=10000, burnin=2000,
                          solve_args={'min_its':50, 'tol':1.e-10}, check_screen =True):
 
@@ -77,50 +76,29 @@ def test_multiple_queries(s=3,
                                                           mv,
                                                           bootstrap=True)
 
-        if intervals == 'old':
-            target_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
-                                                            burnin=burnin)
-            LU_boot = target_sampler_boot.confidence_intervals(target_observed,
-                                                               sample=target_sample_boot,
-                                                               level=0.9)
-            pivots_boot = target_sampler_boot.coefficient_pvalues(target_observed,
-                                                                  parameter=true_vec,
-                                                                  sample=target_sample_boot)
-        else:
-            full_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
-                                                          burnin=burnin,
-                                                          keep_opt=True)
-            LU_boot = target_sampler_boot.confidence_intervals_translate(target_observed,
-                                                                         sample=full_sample_boot,
-                                                                         level=0.9)
-            pivots_boot = target_sampler_boot.coefficient_pvalues_translate(target_observed,
-                                                                            parameter=true_vec,
-                                                                            sample=full_sample_boot)
+        target_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
+                                                        burnin=burnin)
+        LU_boot = target_sampler_boot.confidence_intervals(target_observed,
+                                                           sample=target_sample_boot,
+                                                           level=0.9)
+        pivots_boot = target_sampler_boot.coefficient_pvalues(target_observed,
+                                                              parameter=true_vec,
+                                                              sample=target_sample_boot)
+
         ## CLT plugin
         target_sampler, _ = glm_target(loss,
                                        active_union,
                                        mv,
                                        bootstrap=False)
 
-        if intervals == 'old':
-            target_sample = target_sampler.sample(ndraw=ndraw,
-                                                  burnin=burnin)
-            LU = target_sampler.confidence_intervals(target_observed,
-                                                     sample=target_sample,
-                                                     level=0.9)
-            pivots = target_sampler.coefficient_pvalues(target_observed,
-                                                        parameter=true_vec,
-                                                        sample=target_sample)
-        else:
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-            LU = target_sampler.confidence_intervals_translate(target_observed,
-                                                               sample=full_sample,
-                                                               level=0.9)
-            pivots = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                  parameter=true_vec,
-                                                                  sample=full_sample)
+        target_sample = target_sampler.sample(ndraw=ndraw,
+                                              burnin=burnin)
+        LU = target_sampler.confidence_intervals(target_observed,
+                                                 sample=target_sample,
+                                                 level=0.9)
+        pivots = target_sampler.coefficient_pvalues(target_observed,
+                                                    parameter=true_vec,
+                                                    sample=target_sample)
 
         LU_naive = naive_confidence_intervals(target_sampler, target_observed)
 
@@ -153,7 +131,7 @@ def coverage(LU):
 
 def report(niter=10, **kwargs):
 
-    kwargs = {'s': 0, 'n': 300, 'p': 10, 'signal': 7, 'nviews':3, 'intervals':'old'}
+    kwargs = {'s': 0, 'n': 300, 'p': 10, 'signal': 7, 'nviews':3}
     split_report = reports.reports['test_multiple_queries']
     screened_results = reports.collect_multiple_runs(split_report['test'],
                                                      split_report['columns'],
diff --git a/selection/randomized/tests/test_multiple_splits.py b/selection/randomized/tests/test_multiple_splits.py
index da199bd1d..2e5d9e7fc 100644
--- a/selection/randomized/tests/test_multiple_splits.py
+++ b/selection/randomized/tests/test_multiple_splits.py
@@ -29,7 +29,6 @@ def test_multiple_splits(s=3,
                          split_frac=0.8,
                          lam_frac=0.7,
                          nsplits=4,
-                         intervals ='new',
                          ndraw=10000, burnin=2000,
                          solve_args={'min_its':50, 'tol':1.e-10}, check_screen =True):
 
@@ -79,50 +78,28 @@ def test_multiple_splits(s=3,
                                                           mv,
                                                           bootstrap=True)
 
-        if intervals == 'old':
-            target_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
-                                                            burnin=burnin)
-            LU_boot = target_sampler_boot.confidence_intervals(target_observed,
-                                                               sample=target_sample_boot,
-                                                               level=0.9)
-            pivots_boot = target_sampler_boot.coefficient_pvalues(target_observed,
-                                                                  parameter=true_vec,
-                                                                  sample=target_sample_boot)
-        else:
-            full_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
-                                                          burnin=burnin,
-                                                          keep_opt=True)
-            LU_boot = target_sampler_boot.confidence_intervals_translate(target_observed,
-                                                                         sample=full_sample_boot,
-                                                                         level=0.9)
-            pivots_boot = target_sampler_boot.coefficient_pvalues_translate(target_observed,
-                                                                            parameter=true_vec,
-                                                                            sample=full_sample_boot)
+        target_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
+                                                        burnin=burnin)
+        LU_boot = target_sampler_boot.confidence_intervals(target_observed,
+                                                           sample=target_sample_boot,
+                                                           level=0.9)
+        pivots_boot = target_sampler_boot.coefficient_pvalues(target_observed,
+                                                              parameter=true_vec,
+                                                              sample=target_sample_boot)
         ## CLT plugin
         target_sampler, _ = glm_target(loss,
                                        active_union,
                                        mv,
                                        bootstrap=False)
 
-        if intervals == 'old':
-            target_sample = target_sampler.sample(ndraw=ndraw,
-                                                  burnin=burnin)
-            LU = target_sampler.confidence_intervals(target_observed,
-                                                     sample=target_sample,
-                                                     level=0.9)
-            pivots = target_sampler.coefficient_pvalues(target_observed,
-                                                        parameter=true_vec,
-                                                        sample=target_sample)
-        else:
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-            LU = target_sampler.confidence_intervals_translate(target_observed,
-                                                               sample=full_sample,
-                                                               level=0.9)
-            pivots = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                  parameter=true_vec,
-                                                                  sample=full_sample)
+        target_sample = target_sampler.sample(ndraw=ndraw,
+                                              burnin=burnin)
+        LU = target_sampler.confidence_intervals(target_observed,
+                                                 sample=target_sample,
+                                                 level=0.9)
+        pivots = target_sampler.coefficient_pvalues(target_observed,
+                                                    parameter=true_vec,
+                                                    sample=target_sample)
 
         LU_naive = naive_confidence_intervals(target_sampler, target_observed)
 
@@ -155,7 +132,7 @@ def coverage(LU):
 
 def report(niter=3, **kwargs):
 
-    kwargs = {'s': 0, 'n': 300, 'p': 20, 'signal': 7, 'split_frac': 0.5, 'nsplits':3, 'intervals':'old'}
+    kwargs = {'s': 0, 'n': 300, 'p': 20, 'signal': 7, 'split_frac': 0.5, 'nsplits':3}
     split_report = reports.reports['test_multiple_splits']
     screened_results = reports.collect_multiple_runs(split_report['test'],
                                                      split_report['columns'],
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 69ee05aad..eeb08bd31 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -2,16 +2,17 @@
 import numpy as np
 import nose.tools as nt
 
-from selection.randomized.convenience import lasso, step, threshold
-from selection.randomized.query import optimization_sampler
-from selection.tests.instance import (gaussian_instance,
+from ..convenience import lasso, step, threshold
+from ..query import optimization_sampler
+from ...tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
-from selection.tests.flags import SMALL_SAMPLES
-from selection.tests.decorators import set_sampling_params_iftrue
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue
+
 from scipy.stats import t as tdist
-from selection.randomized.glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
-from selection.randomized.M_estimator import restricted_Mest
+from ..glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
+from ..M_estimator import restricted_Mest
 
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
@@ -69,4 +70,4 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         return selective_CI
 
 
-test_opt_weighted_intervals()
\ No newline at end of file
+test_opt_weighted_intervals()
diff --git a/selection/randomized/tests/test_split_compare.py b/selection/randomized/tests/test_split_compare.py
index fabadd0cd..85a39b0b0 100644
--- a/selection/randomized/tests/test_split_compare.py
+++ b/selection/randomized/tests/test_split_compare.py
@@ -31,7 +31,6 @@ def test_split_compare(s=3,
                        split_frac=0.8,
                        lam_frac=0.7,
                        ndraw=10000, burnin=2000,
-                       intervals = 'new',
                        solve_args={'min_its':50, 'tol':1.e-10}, check_screen =True):
 
     X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal)
@@ -76,25 +75,14 @@ def test_split_compare(s=3,
                                                           mv,
                                                           bootstrap=True)
 
-        if intervals == 'old':
-            target_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
-                                                  burnin=burnin)
-            LU_boot = target_sampler_boot.confidence_intervals(target_observed,
-                                                     sample=target_sample_boot,
-                                                     level=0.9)
-            pivots_boot = target_sampler_boot.coefficient_pvalues(target_observed,
-                                                              parameter=true_vec,
-                                                              sample=target_sample_boot)
-        else:
-            full_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-            LU_boot = target_sampler_boot.confidence_intervals_translate(target_observed,
-                                                               sample=full_sample_boot,
-                                                               level=0.9)
-            pivots_boot = target_sampler_boot.coefficient_pvalues_translate(target_observed,
-                                                                        parameter=true_vec,
-                                                                        sample=full_sample_boot)
+        target_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
+                                              burnin=burnin)
+        LU_boot = target_sampler_boot.confidence_intervals(target_observed,
+                                                 sample=target_sample_boot,
+                                                 level=0.9)
+        pivots_boot = target_sampler_boot.coefficient_pvalues(target_observed,
+                                                          parameter=true_vec,
+                                                          sample=target_sample_boot)
 
         ## CLT plugin
         target_sampler, _ = glm_target(loss,
@@ -102,25 +90,14 @@ def test_split_compare(s=3,
                                        mv,
                                        bootstrap=False)
 
-        if intervals == 'old':
-            target_sample = target_sampler.sample(ndraw=ndraw,
-                                                  burnin=burnin)
-            LU = target_sampler.confidence_intervals(target_observed,
-                                                     sample=target_sample,
-                                                     level=0.9)
-            pivots = target_sampler.coefficient_pvalues(target_observed,
-                                                        parameter=true_vec,
-                                                        sample=target_sample)
-        else:
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-            LU = target_sampler.confidence_intervals_translate(target_observed,
-                                                               sample=full_sample,
-                                                               level=0.9)
-            pivots = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                  parameter=true_vec,
-                                                                  sample=full_sample)
+        target_sample = target_sampler.sample(ndraw=ndraw,
+                                              burnin=burnin)
+        LU = target_sampler.confidence_intervals(target_observed,
+                                                 sample=target_sample,
+                                                 level=0.9)
+        pivots = target_sampler.coefficient_pvalues(target_observed,
+                                                    parameter=true_vec,
+                                                    sample=target_sample)
 
         LU_naive = naive_confidence_intervals(target_sampler, target_observed)
 
@@ -158,7 +135,7 @@ def coverage(LU):
 
 def report(niter=3, **kwargs):
 
-    kwargs = {'s': 0, 'n': 300, 'p': 20, 'signal': 7, 'split_frac': 0.8, 'intervals':'old'}
+    kwargs = {'s': 0, 'n': 300, 'p': 20, 'signal': 7, 'split_frac': 0.8}
     split_report = reports.reports['test_split_compare']
     screened_results = reports.collect_multiple_runs(split_report['test'],
                                                      split_report['columns'],
diff --git a/selection/randomized/tests/test_sqrt_lasso.py b/selection/randomized/tests/test_sqrt_lasso.py
index 59299d8cc..99a859606 100644
--- a/selection/randomized/tests/test_sqrt_lasso.py
+++ b/selection/randomized/tests/test_sqrt_lasso.py
@@ -35,7 +35,6 @@ def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
                     scale1 = 0.1,
                     scale2 = 0.2,
                     lam_frac = 1.,
-                    intervals = 'old',
                     bootstrap = False,
                     condition_on_CVR = False,
                     marginalize_subgrad = True,
@@ -98,38 +97,21 @@ def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
                                                      mv,
                                                      bootstrap=bootstrap)
 
-        if intervals == 'old':
-            target_sample = target_sampler.sample(ndraw=ndraw,
-                                                  burnin=burnin)
-            LU = target_sampler.confidence_intervals(target_observed,
-                                                     sample=target_sample,
-                                                     level=0.9)
-
-            #pivots_mle = target_sampler.coefficient_pvalues(target_observed,
-            #                                                parameter=target_sampler.reference,
-            #                                                sample=target_sample)
-            pivots_truth = target_sampler.coefficient_pvalues(target_observed,
-                                                              parameter=true_vec,
-                                                              sample=target_sample)
-            pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                         parameter=np.zeros_like(true_vec),
-                                                         sample=target_sample)
-        else:
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-            LU = target_sampler.confidence_intervals_translate(target_observed,
-                                                               sample=full_sample,
-                                                               level=0.9)
-            #pivots_mle = target_sampler.coefficient_pvalues_translate(target_observed,
-            #                                                          parameter=target_sampler.reference,
-            #                                                          sample=full_sample)
-            pivots_truth = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                        parameter=true_vec,
-                                                                        sample=full_sample)
-            pvalues = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                   parameter=np.zeros_like(true_vec),
-                                                                   sample=full_sample)
+        target_sample = target_sampler.sample(ndraw=ndraw,
+                                              burnin=burnin)
+        LU = target_sampler.confidence_intervals(target_observed,
+                                                 sample=target_sample,
+                                                 level=0.9)
+
+        #pivots_mle = target_sampler.coefficient_pvalues(target_observed,
+        #                                                parameter=target_sampler.reference,
+        #                                                sample=target_sample)
+        pivots_truth = target_sampler.coefficient_pvalues(target_observed,
+                                                          parameter=true_vec,
+                                                          sample=target_sample)
+        pvalues = target_sampler.coefficient_pvalues(target_observed,
+                                                     parameter=np.zeros_like(true_vec),
+                                                     sample=target_sample)
 
         L, U = LU.T
         sel_covered = np.zeros(nactive, np.bool)

From a7001bd5188539e27fcf51a617d1253cc8f957af Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 07:33:44 -0700
Subject: [PATCH 172/617] BF: old translate tests were broken

---
 .../randomized/tests/test_multiple_queries.py | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/selection/randomized/tests/test_multiple_queries.py b/selection/randomized/tests/test_multiple_queries.py
index 85ce218ac..ef38f1ddd 100644
--- a/selection/randomized/tests/test_multiple_queries.py
+++ b/selection/randomized/tests/test_multiple_queries.py
@@ -88,7 +88,6 @@ def test_multiple_queries(s=3,
                                                          bootstrap=bootstrap,
                                                          reference=reference)
             test_stat = lambda x: np.linalg.norm(x-reference)
-            observed_test_value = test_stat(target_observed)
 
         else:
             reference = beta[active_union]
@@ -98,8 +97,8 @@ def test_multiple_queries(s=3,
                                                          bootstrap=bootstrap,
                                                          reference = reference)
             test_stat = lambda x: np.linalg.norm(x-beta[active_union])
-            observed_test_value = test_stat(target_observed)
 
+        observed_test_value = test_stat(target_observed)
         pivot = target_sampler.hypothesis_test(test_stat,
                                                observed_test_value,
                                                alternative='twosided',
@@ -108,13 +107,8 @@ def test_multiple_queries(s=3,
                                                parameter=reference)
 
         full_sample = target_sampler.sample(ndraw=ndraw,
-                                       burnin=burnin,
-                                       keep_opt=True)
-
-        pivot = target_sampler.hypothesis_test(full_sample,
-                                               test_stat,
-                                               target_observed,
-                                               alternative='twosided')
+                                            burnin=burnin,
+                                            keep_opt=True)
 
         return [pivot], [False]
 
@@ -248,13 +242,17 @@ def test_multiple_queries(s=3, n=200, p=20,
                                             burnin=burnin,
                                             keep_opt=True)
 
-        pivot = target_sampler.hypothesis_test(full_sample,
-                                               test_stat,
-                                               target_observed,
-                                               alternative='twosided')
+        pivot = target_sampler.hypothesis_test(test_stat,
+                                               observed_test_value,
+                                               alternative='twosided',
+                                               ndraw=ndraw,
+                                               burnin=burnin,
+                                               parameter=reference)
 
         return [pivot], [False]
 
+
+
 def report(niter=1, **kwargs):
 
     #kwargs = {'s':3, 'n':300, 'p':20, 'signal':7, 'nview':4, 'test': 'global'}

From d324409f300113c28b88430f3cb6a25b5ac251a9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 08:47:02 -0700
Subject: [PATCH 173/617] RF: reworked the reconstruction maps

---
 selection/randomized/query.py                 | 205 +++++++++---------
 selection/randomized/tests/test_Mest.py       |  36 +--
 .../randomized/tests/test_convenience.py      |   4 +-
 .../randomized/tests/test_greedy_step.py      |  15 +-
 .../tests/test_optimization_sampler.py        |  10 +-
 .../randomized/tests/test_randomized_lasso.py |   2 +-
 .../randomized/tests/test_reconstruction.py   |   2 +-
 7 files changed, 147 insertions(+), 127 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index e173e1c43..dceaa5906 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -26,48 +26,6 @@ def randomize(self):
             self.randomized_loss = self.randomization.randomize(self.loss, self.epsilon)
         self._randomized = True
 
-
-    def randomization_gradient(self, data_state, data_transform, opt_state):
-        """
-        Randomization derivative at full state.
-        """
-
-        # reconstruction of randomization omega
-
-        opt_linear, opt_offset = self.opt_transform
-
-        data_linear, data_offset = data_transform
-        if data_linear is not None:
-            data_piece = data_linear.dot(data_state) + data_offset
-        else: # this can be none if we are not moving a target
-            data_piece = data_offset
-
-        # value of the randomization omega
-
-        if opt_linear is not None: # this can happen if we marginalize all of omega!
-            opt_piece = opt_linear.dot(opt_state) + opt_offset
-            full_state = (data_piece + opt_piece)
-        else:
-            full_state = data_piece
-
-        # gradient of negative log density of randomization at omega
-        # we may have marginalized over some optimization variables here
-
-        randomization_derivative = self.construct_weights(full_state)
-
-        # chain rule for data, optimization parts
-
-        if data_linear is not None:
-            data_grad = data_linear.T.dot(randomization_derivative)
-        else:
-            data_grad = None
-
-        if opt_linear is not None:
-            opt_grad = opt_linear.T.dot(randomization_derivative)
-        else:
-            opt_grad = None
-        return data_grad, opt_grad #- self.grad_log_jacobian(opt_state)
-
     def construct_weights(self, full_state):
         return self.randomization.gradient(full_state)
 
@@ -102,33 +60,26 @@ def linear_decomposition(self, target_score_cov, target_cov, observed_target_sta
 
         return (composition_linear_part, composition_offset)
 
-    def reconstruction_map(self, data_state, data_transform, opt_state):
+    # Reconstruct different parts of 
+    # randomization: optimization, data and full
+
+    def reconstruct_opt(self, opt_state):
 
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-        # reconstruction of randomization omega
-
-        data_state = np.atleast_2d(data_state)
         opt_linear, opt_offset = self.opt_transform
-
-        data_linear, data_offset = data_transform
-        if data_linear is not None:
-            data_piece = data_linear.dot(data_state) + data_offset
-        else:
-            data_piece = np.multiply.outer(data_offset, np.ones(opt_state.shape[0]))
-
         if opt_linear is not None:
             opt_state = np.atleast_2d(opt_state)
-            opt_piece = opt_linear.dot(opt_state.T) + opt_offset[:, None]
-            return (data_piece + opt_piece).T
+            return np.squeeze(opt_linear.dot(opt_state.T) + opt_offset[:, None]).T
         else:
-            return data_piece.T
+            return opt_offset
 
     def log_density(self, data_state, data_transform, opt_state):
 
-        full_data = self.reconstruction_map(data_state, data_transform, opt_state)
+        full_data = reconstruct_full(data_state, data_transform, self, opt_state)
         return self.randomization.log_density(full_data)
+
      # implemented by subclasses
 
     def grad_log_jacobian(self, opt_state):
@@ -140,7 +91,6 @@ def grad_log_jacobian(self, opt_state):
         # needs to be implemented for group lasso
         return self.derivative_logdet_jacobian(opt_state[self.scaling_slice])
 
-
     def jacobian(self, opt_state):
         """
         log_jacobian depends only on data through
@@ -172,6 +122,25 @@ def projection(self, opt_state):
 
         raise NotImplementedError('abstract method -- projection of optimization variables')
 
+def reconstruct_data(data_state, data_transform):
+
+    data_state = np.atleast_2d(data_state)
+    data_linear, data_offset = data_transform
+    if data_linear is not None:
+        return np.squeeze(data_linear.dot(data_state.T) + data_offset[:,None]).T
+    else:
+        return np.squeeze(data_offset)
+
+def reconstruct_full(data_state, data_transform, query, opt_state):
+
+    if not query._setup:
+        raise ValueError('setup_sampler should be called before using this function')
+
+    data_piece = reconstruct_data(data_state, data_transform)
+    opt_piece =  query.reconstruct_opt(opt_state)
+
+    return np.squeeze((data_piece + opt_piece))
+
 class multiple_queries(object):
 
     '''
@@ -539,11 +508,21 @@ def gradient(self, state):
         # randomization_gradient are gradients of a CONVEX function
 
         for i in range(self.nqueries):
-            target_grad_curr, opt_grad[self.opt_slice[i]] = \
-                self.objectives[i].randomization_gradient(target_state, self.target_transform[i], opt_state[self.opt_slice[i]])
-            target_grad += target_grad_curr.copy()
 
-        target_grad = - target_grad
+            randomization_state = reconstruct_full(target_state, 
+                                                   self.target_transform[i], 
+                                                   self.objectives[i],
+                                                   opt_state[self.opt_slice[i]])
+
+            grad = self.objectives[i].construct_weights(randomization_state)
+            target_linear, target_offset = self.target_transform[i]
+            opt_linear, opt_offset = self.objectives[i].opt_transform
+            if target_linear is not None:
+                target_grad += target_linear.T.dot(grad)
+            if opt_linear is not None:
+                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
+
+        target_grad = -target_grad
         target_grad += self._reference_inv - self.target_inv_cov.dot(target_state)
         full_grad[self.target_slice] = target_grad
         full_grad[self.overall_opt_slice] = -opt_grad
@@ -800,7 +779,7 @@ def crude_lipschitz(self):
         return lipschitz
 
 
-    def reconstruction_map(self, state):
+    def reconstruct(self, state):
         '''
         Reconstruction of randomization at current state.
         Parameters
@@ -817,19 +796,17 @@ def reconstruction_map(self, state):
         '''
 
         state = np.atleast_2d(state)
-        #print(state.shape)
         if len(state.shape) > 2:
             raise ValueError('expecting at most 2-dimensional array')
 
         target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
-        #reconstructed = np.zeros((opt_state.shape[0],self.randomization_length_total))
 
         for i in range(self.nqueries):
-            reconstructed[:, self.randomization_slice[i]] = \
-                   self.objectives[i].reconstruction_map(target_state,
-                                                         self.target_transform[i],
-                                                         opt_state[:, self.opt_slice[i]])
+            reconstructed[:, self.randomization_slice[i]] = reconstruct_full(target_state,
+                                                                             self.target_transform[i],
+                                                                             self.objectives[i],
+                                                                             opt_state[:, self.opt_slice[i]])
 
         return np.squeeze(reconstructed)
 
@@ -847,7 +824,7 @@ def log_randomization_density(self, state):
             Has number of rows as `state` if 2-dimensional.
         '''
 
-        reconstructed = self.reconstruction_map(state)
+        reconstructed = self.reconstruct(state)
         value = np.zeros(reconstructed.shape[0])
 
         for i in range(self.nqueries):
@@ -874,8 +851,6 @@ def __init__(self,
            `objectives`, `score_info` are key
            attributed. (Should maybe change constructor
            to reflect only what is needed.)
-
-
         '''
 
         # sampler will draw samples for bootstrap
@@ -924,14 +899,11 @@ def __init__(self,
         # We implicitly assume that we are sampling a target
         # independent of the data in each view
 
-        self.target_transform = []
+        self.observed_scores = []
         for i in range(self.nqueries):
             obj = self.objectives[i]
-            
-            _, observed_score = obj.linear_decomposition(np.zeros(obj.ndim),
-                                                         np.array([[1.]]),
-                                                         0.)
-            self.target_transform.append((None, observed_score)) 
+            score_linear, score_offset = obj.score_transform
+            self.observed_scores.append(score_linear.dot(obj.observed_score_state) + score_offset)
 
     def projection(self, state):
         '''
@@ -964,10 +936,10 @@ def gradient(self, state):
         # randomization_gradient are gradients of a CONVEX function
 
         for i in range(self.nqueries):
-            # the 0 is our fictitious target independent of all the data
-            _, opt_grad[self.opt_slice[i]] = \
-                self.objectives[i].randomization_gradient(0., self.target_transform[i], opt_state[self.opt_slice[i]])
-
+            reconstructed_opt_state = self.objectives[i].reconstruct_opt(opt_state[self.opt_slice[i]])
+            opt_linear, opt_offset = self.objectives[i].opt_transform
+            opt_grad[self.opt_slice[i]] = \
+                opt_linear.T.dot(self.objectives[i].construct_weights(reconstructed_opt_state + self.observed_scores[i]))
         return -opt_grad
 
     def sample(self, ndraw, burnin, stepsize=None):
@@ -1249,7 +1221,7 @@ def crude_lipschitz(self):
             lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
         return lipschitz
 
-    def reconstruction_map(self, state):
+    def reconstruct(self, state):
         '''
         Reconstruction of randomization at current state.
         Parameters
@@ -1257,6 +1229,7 @@ def reconstruction_map(self, state):
         state : np.float
            State of sampler made up of `(target, opt_vars)`.
            Can be array with each row a state.
+
         Returns
         -------
         reconstructed : np.float
@@ -1266,15 +1239,42 @@ def reconstruction_map(self, state):
         '''
 
         state = np.atleast_2d(state)
-        if len(state.shape) > 2:
+        if state.ndim > 2:
+            raise ValueError('expecting at most 2-dimensional array')
+
+        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
+
+        for i in range(self.nqueries):
+            reconstructed[:,self.randomization_slice[i]] = self.objectives[i].reconstruct_opt(  
+                state[:,self.opt_slice[i]]) + self.observed_scores[i]
+
+        return np.squeeze(reconstructed)
+
+    def reconstruct_opt(self, state):
+        '''
+        Reconstruction of randomization at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be array with each row a state.
+
+        Returns
+        -------
+        reconstructed : np.float
+           Has shape of `opt_vars` with same number of rows
+           as `state`.
+
+        '''
+
+        state = np.atleast_2d(state)
+        if state.ndim > 2:
             raise ValueError('expecting at most 2-dimensional array')
 
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
 
         for i in range(self.nqueries):
-            reconstructed[:,self.randomization_slice[i]] = self.objectives[i].reconstruction_map(  
-                0.,
-                self.target_transform[i],
+            reconstructed[:,self.randomization_slice[i]] = self.objectives[i].reconstruct_opt(  
                 state[:,self.opt_slice[i]])
 
         return np.squeeze(reconstructed)
@@ -1293,7 +1293,7 @@ def log_randomization_density(self, state):
             Has number of rows as `state` if 2-dimensional.
         '''
 
-        reconstructed = self.reconstruction_map(state)
+        reconstructed = self.reconstruct(state)
         value = np.zeros(reconstructed.shape[0])
 
         for i in range(self.nqueries):
@@ -1330,7 +1330,6 @@ def __init__(self,
         self.target_alpha = target_alpha
         self.boot_transform = []
 
-
         for i in range(self.nqueries):
             composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
                                                                                                   self.target_cov,
@@ -1356,10 +1355,19 @@ def gradient(self, state):
         # randomization_gradient are gradients of a CONVEX function
 
         for i in range(self.nqueries):
-            boot_grad_curr, opt_grad[self.opt_slice[i]] = \
-                self.objectives[i].randomization_gradient(boot_state, self.boot_transform[i],
-                                                          opt_state[self.opt_slice[i]])
-            boot_grad += boot_grad_curr.copy()
+
+            randomization_state = reconstruct_full(boot_state, 
+                                                   self.boot_transform[i], 
+                                                   self.objectives[i],
+                                                   opt_state[self.opt_slice[i]])
+
+            grad = self.objectives[i].construct_weights(randomization_state)
+            boot_linear, boot_offset = self.boot_transform[i]
+            opt_linear, opt_offset = self.objectives[i].opt_transform
+            if boot_linear is not None:
+                boot_grad += boot_linear.T.dot(grad)
+            if opt_linear is not None:
+                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
 
         boot_grad = -boot_grad
         boot_grad -= boot_state
@@ -1440,16 +1448,17 @@ def __init__(self,
                  opt_sample,
                  observed):
 
-        self.reconstructed_sample = opt_sampler.reconstruction_map(opt_sample) # observed_score + affine(opt_sample)
+        full_sample = opt_sampler.reconstruct_full(opt_sample) # observed_score + affine(opt_sample)
+        self._logden = opt_sampler.log_randomization_density(full_sample)
 
+        # we now remove the observed_score from full_sample
+        self.reconstructed_sample = opt_sampler.reconstruct_opt(opt_sample) # affine(opt_sample)
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
         self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), 
                                                             cov=opt_sampler.target_cov, 
                                                             size=(sample.shape[0],))
 
-        self._logden = opt_sampler.log_randomization_density(self.reconstructed_sample)
-
     def pivot(self,
               linear_func,
               candidate,
@@ -1546,8 +1555,8 @@ def _weights(self,
 
         _lognum = 0
         for i in range(len(log_density)):
-            density_arg = nuisance[i] + score_cov[i].dot(sample_stat)  
-            _lognum += log_density[i](density_arg)
+            density_arg = score_cov[i].dot(sample_stat) + nuisance[i][:,None]
+            _lognum += log_density[i](density_arg + self.reconstructed_sample)
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
 
diff --git a/selection/randomized/tests/test_Mest.py b/selection/randomized/tests/test_Mest.py
index cf3ba294a..8ba805543 100644
--- a/selection/randomized/tests/test_Mest.py
+++ b/selection/randomized/tests/test_Mest.py
@@ -8,15 +8,15 @@
 
 import regreg.api as rr
 
-from selection.tests.decorators import wait_for_return_value, register_report
+from ...tests.decorators import wait_for_return_value, register_report
 import selection.tests.reports as reports
+from ...tests.instance import logistic_instance
 
-from selection.randomized.api import randomization, multiple_queries, pairs_bootstrap_glm, glm_group_lasso, glm_nonparametric_bootstrap 
-from selection.randomized.glm import bootstrap_cov
-from selection.distributions.discrete_family import discrete_family
-from selection.sampling.langevin import projected_langevin
-
-from selection.randomized.tests import logistic_instance
+from ..api import randomization, multiple_queries, pairs_bootstrap_glm, glm_group_lasso, glm_nonparametric_bootstrap 
+from ..glm import bootstrap_cov
+from ...distributions.discrete_family import discrete_family
+from ...sampling.langevin import projected_langevin
+from ..query import reconstruct_full
 
 @register_report(['pvalue', 'active'])
 @wait_for_return_value()
@@ -92,13 +92,16 @@ def target_gradient(state):
             target = state[target_slice]
             opt_state1 = state[opt_slice1]
             opt_state2 = state[opt_slice2]
-            target_grad1 = M_est1.randomization_gradient(target, (A1, b1), opt_state1)
-            target_grad2 = M_est2.randomization_gradient(target, (A2, b2), opt_state2)
+            opt_linear1 = M_est1.opt_transform[0]
+            arg1 = reconstruct_full(target, (A1, b1), M_est1, opt_state1); grad1 = M_est1.construct_weights(arg1)
+
+            opt_linear2 = M_est2.opt_transform[0]
+            arg2 = reconstruct_full(target, (A2, b2), M_est2, opt_state2); grad2 = M_est2.construct_weights(arg2)
 
             full_grad = np.zeros_like(state)
-            full_grad[opt_slice1] = -target_grad1[1]
-            full_grad[opt_slice2] = -target_grad2[1]
-            full_grad[target_slice] -= target_grad1[0] + target_grad2[0]
+            full_grad[opt_slice1] = -opt_linear1.T.dot(grad1)
+            full_grad[opt_slice2] = -opt_linear2.T.dot(grad2)
+            full_grad[target_slice] -= A1.T.dot(grad1) + A2.T.dot(grad2)
             full_grad[target_slice] -= target_inv_cov.dot(target)
 
             return full_grad
@@ -201,11 +204,14 @@ def target_gradient(state):
 
             target = state[target_slice]
             opt_state1 = state[opt_slice1]
-            target_grad1 = M_est1.randomization_gradient(target, (A1, b1), opt_state1)
+
+
+            opt_linear1 = M_est1.opt_transform[0]
+            arg1 = reconstruct_full(target, (A1, b1), M_est1, opt_state1); grad1 = M_est1.construct_weights(arg1)
 
             full_grad = np.zeros_like(state)
-            full_grad[opt_slice1] = -target_grad1[1]
-            full_grad[target_slice] -= target_grad1[0] 
+            full_grad[opt_slice1] = -opt_linear1.T.dot(grad1)
+            full_grad[target_slice] -= A1.T.dot(grad1)
             full_grad[target_slice] -= target_inv_cov.dot(target)
 
             return full_grad
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index ae08e7608..5943437d7 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -56,8 +56,8 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
                                                      conv._queries,
                                                      bootstrap=False)
 
-        S = target_sampler.sample_opt(ndraw,
-                                      burnin)
+        S = target_sampler.sample(ndraw,
+                                  burnin)
 
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
diff --git a/selection/randomized/tests/test_greedy_step.py b/selection/randomized/tests/test_greedy_step.py
index fc40a8677..d193702e0 100644
--- a/selection/randomized/tests/test_greedy_step.py
+++ b/selection/randomized/tests/test_greedy_step.py
@@ -24,6 +24,7 @@
 from ..glm import bootstrap_cov
 from ...distributions.discrete_family import discrete_family
 from ...sampling.langevin import projected_langevin
+from ..query import reconstruct_full
 
 @register_report(['pvalue', 'active'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
@@ -110,13 +111,17 @@ def target_gradient(state):
             target = state[target_slice]
             opt_state1 = state[opt_slice1]
             opt_state2 = state[opt_slice2]
-            target_grad1 = M_est1.randomization_gradient(target, (A1, b1), opt_state1)
-            target_grad2 = step.randomization_gradient(target, (A2, b2), opt_state2)
+
+            opt_linear1 = M_est1.opt_transform[0]
+            arg1 = reconstruct_full(target, (A1, b1), M_est1, opt_state1); grad1 = M_est1.construct_weights(arg1)
+
+            opt_linear2 = step.opt_transform[0]
+            arg2 = reconstruct_full(target, (A2, b2), step, opt_state2); grad2 = step.construct_weights(arg2)
 
             full_grad = np.zeros_like(state)
-            full_grad[opt_slice1] = -target_grad1[1]
-            full_grad[opt_slice2] = -target_grad2[1]
-            full_grad[target_slice] -= target_grad1[0] + target_grad2[0]
+            full_grad[opt_slice1] = -opt_linear1.T.dot(grad1)
+            full_grad[opt_slice2] = -opt_linear2.T.dot(grad2)
+            full_grad[target_slice] -= A1.T.dot(grad1) + A2.T.dot(grad2)
             full_grad[target_slice] -= target_inv_cov.dot(target)
 
             return full_grad
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
index 7341572e2..46a28c100 100644
--- a/selection/randomized/tests/test_optimization_sampler.py
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -50,14 +50,14 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
         selected_features = np.zeros(p, np.bool)
         selected_features[:3] = True
 
-        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
-                                   conditioning_groups=conditioning_groups)
+        print(const_info, condition, marginalize, rand)
 
-        opt_sampler = optimization_sampler(conv._queries)
+        conv.decompose_subgradient(conditioning_groups, marginalizing_groups)
 
+        opt_sampler = optimization_sampler(conv._queries)
         S = opt_sampler.sample(ndraw,
                                burnin,
-                               stepsize=1.e-3)
+                               stepsize=1.e-10)
 
-        opt_sampler.reconstruction_map(S)
+        opt_sampler.reconstruct(S)
         
diff --git a/selection/randomized/tests/test_randomized_lasso.py b/selection/randomized/tests/test_randomized_lasso.py
index a7a25fc3a..daa77b2ce 100644
--- a/selection/randomized/tests/test_randomized_lasso.py
+++ b/selection/randomized/tests/test_randomized_lasso.py
@@ -13,7 +13,7 @@ def test_randomized_lasso(n=300, p=500, s=5, signal=7.5, rho=0.2):
 
     print(np.nonzero(signs != 0)[0])
     print(np.nonzero(beta != 0)[0])
-    print(L.summary(signs != 0, ndraw=10000, burnin=2000, reference_type='tilt', compute_intervals=False))
+    print(L.summary(signs != 0, ndraw=1000, burnin=200, compute_intervals=False))
 
 
 if __name__ == "__main__":
diff --git a/selection/randomized/tests/test_reconstruction.py b/selection/randomized/tests/test_reconstruction.py
index 0c0bbd3e0..da92fe698 100644
--- a/selection/randomized/tests/test_reconstruction.py
+++ b/selection/randomized/tests/test_reconstruction.py
@@ -60,6 +60,6 @@ def test_reconstruction(s=3,
                                               burnin=burnin,
                                               keep_opt=True)
         
-        reconstruction = target_sampler.reconstruction_map(target_sample)
+        reconstruction = target_sampler.reconstruct(target_sample)
         logdens = target_sampler.log_randomization_density(target_sample)
         return logdens.shape

From 694e03d03f05d94a5b12ae45141dcb9638d0c65d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 08:54:38 -0700
Subject: [PATCH 174/617] BF: removing translate option, giving threshold a
 trivial affine transform

---
 selection/randomized/convenience.py         | 43 ++++++---------------
 selection/randomized/tests/test_sampling.py | 33 ++++------------
 selection/randomized/threshold_score.py     |  2 +-
 3 files changed, 20 insertions(+), 58 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index f4445855a..641faaafd 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -169,7 +169,6 @@ def summary(self, selected_features,
                 level=0.9,
                 ndraw=10000, 
                 burnin=2000,
-                reference_type='translate',
                 compute_intervals=False,
                 bootstrap=False):
         """
@@ -195,9 +194,6 @@ def summary(self, selected_features,
         burnin : int (optional)
             Defaults to 1000.
 
-        reference_type : str
-            One of ['translate', 'tilt']. 
-
         bootstrap : bool
             Use wild bootstrap instead of Gaussian plugin.
 
@@ -205,9 +201,6 @@ def summary(self, selected_features,
         if not hasattr(self, "_queries"):
             raise ValueError('run `fit` method before producing summary.')
 
-        if reference_type not in ['translate', 'tilt']:
-            raise ValueError('reference_type must be one of ["translate", "tilt"]')
-
         target_sampler, target_observed = glm_target(self.loglike,
                                                      selected_features,
                                                      self._queries,
@@ -217,31 +210,17 @@ def summary(self, selected_features,
             null_value = np.zeros(self.loglike.shape[0])
 
         intervals = None
-        if reference_type == 'translate':
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=True)
-
-            pvalues = target_sampler.coefficient_pvalues_translate(target_observed,
-                                                                   parameter=null_value,
-                                                                   sample=full_sample)
-
-            if compute_intervals:
-                intervals = target_sampler.confidence_intervals_translate(target_observed,
-                                                                          sample=full_sample,
-                                                                          level=level)
-        else:
-            full_sample = target_sampler.sample(ndraw=ndraw,
-                                                burnin=burnin,
-                                                keep_opt=False)
-            pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                         parameter=null_value,
-                                                         sample=full_sample)
-            if compute_intervals:
-                intervals = target_sampler.confidence_intervals(target_observed,
-                                                                sample=full_sample,
-                                                                level=level)
-            
+        full_sample = target_sampler.sample(ndraw=ndraw,
+                                            burnin=burnin,
+                                            keep_opt=False)
+        pvalues = target_sampler.coefficient_pvalues(target_observed,
+                                                     parameter=null_value,
+                                                     sample=full_sample)
+        if compute_intervals:
+            intervals = target_sampler.confidence_intervals(target_observed,
+                                                            sample=full_sample,
+                                                            level=level)
+
         return intervals, pvalues
 
     @staticmethod
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 1cf5ffc5e..a51e701e7 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -1,16 +1,16 @@
 from itertools import product
-import numpy as np
 import nose.tools as nt
 
-from selection.randomized.convenience import lasso, step, threshold
-from selection.randomized.query import optimization_sampler
-from selection.tests.instance import (gaussian_instance,
-                               logistic_instance,
-                               poisson_instance)
-from selection.tests.flags import SMALL_SAMPLES
-from selection.tests.decorators import set_sampling_params_iftrue
+import numpy as np
 from scipy.stats import t as tdist
 
+from ..convenience import lasso, step, threshold
+from ..query import optimization_sampler
+from ...tests.instance import (gaussian_instance,
+                               logistic_instance,
+                               poisson_instance)
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue
 
 def inverse_truncated_cdf(x, lower, upper, randomization):
     #if (x<0 or x>1):
@@ -18,7 +18,6 @@ def inverse_truncated_cdf(x, lower, upper, randomization):
     arg = randomization._cdf(lower) + np.multiply(x, randomization._cdf(upper) - randomization._cdf(lower))
     return randomization._ppf(arg)
 
-
 def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
     uniform_samples = np.random.uniform(0,1, size=(nsamples,randomization.shape[0]))
     samples = np.zeros((nsamples, randomization.shape[0]))
@@ -26,7 +25,6 @@ def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
         samples[i,:] = inverse_truncated_cdf(uniform_samples[i,:], lower, upper, randomization)
     return samples
 
-
 def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =10000):
     p = X.shape[1]
     nactive = active.sum()
@@ -100,21 +98,8 @@ def test_optimization_sampler(ndraw=20000, burnin=2000):
         signs = conv.fit()
         print("signs", signs)
 
-        marginalizing_groups = np.zeros(p, np.bool)
-        #marginalizing_groups[:int(p/2)] = True
-        conditioning_groups = ~marginalizing_groups
-        #conditioning_groups[-int(p/4):] = False
-
         selected_features = conv._view.selection_variable['variables']
 
-        #conv.summary(selected_features,
-        #             ndraw=ndraw,
-        #             burnin=burnin,
-        #             compute_intervals=True)
-
-        #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
-        #                           conditioning_groups=conditioning_groups)
-
         conv._queries.setup_sampler(form_covariances=None)
         conv._queries.setup_opt_state()
         target_sampler = optimization_sampler(conv._queries)
@@ -131,5 +116,3 @@ def test_optimization_sampler(ndraw=20000, burnin=2000):
         print([np.mean(opt_samples[:,i]) for i in range(p)])
 
 
-
-test_optimization_sampler()
\ No newline at end of file
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index cb54898a0..ce43f86ca 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -150,7 +150,7 @@ def setup_sampler(self):
 
         p = self.boundary.shape[0]  # shorthand
         self.num_opt_var = 0
-        self.opt_transform = (None, None)
+        self.opt_transform = (np.array([], np.float), np.zeros(p, np.float))
         self.observed_opt_state = np.array([])
         _score_linear_term = -np.identity(p)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))

From 61512daef6a49ea721cdfcd0481e64325009580c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 09:18:29 -0700
Subject: [PATCH 175/617] WIP: opt_weighted_intervals tests runs, not clear
 they cover 0...

---
 selection/randomized/query.py                 | 65 +++++++++----------
 .../tests/test_opt_weighted_intervals.py      |  8 +--
 2 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index dceaa5906..db8e94388 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -75,9 +75,7 @@ def reconstruct_opt(self, opt_state):
         else:
             return opt_offset
 
-    def log_density(self, data_state, data_transform, opt_state):
-
-        full_data = reconstruct_full(data_state, data_transform, self, opt_state)
+    def log_density(self, full_data):
         return self.randomization.log_density(full_data)
 
      # implemented by subclasses
@@ -810,7 +808,7 @@ def reconstruct(self, state):
 
         return np.squeeze(reconstructed)
 
-    def log_randomization_density(self, state):
+    def log_density(self, state):
         '''
         Log of randomization density at current state.
         Parameters
@@ -983,7 +981,10 @@ def sample(self, ndraw, burnin, stepsize=None):
                 samples.append(target_langevin.state.copy())
         return np.asarray(samples)
 
-    def setup_target(self, target_info, form_covariances, parametric=False):
+    def setup_target(self, 
+                     target_info, 
+                     form_covariances, 
+                     parametric=False):
         """
         This computes the matrices used in the linear decomposition
         that will be used in computing weights for the sampler.
@@ -991,14 +992,14 @@ def setup_target(self, target_info, form_covariances, parametric=False):
 
         self.score_cov = []
         self.observed_score = []
-        self.log_density = []
+        self.log_densities = []
 
         target_cov_sum = 0
 
         # we should pararallelize this over all views at once ?
         for i in range(self.nqueries):
             view = self.objectives[i]
-            self.log_density.append(view.log_randomization_density)
+            self.log_densities.append(view.log_density)
             score_info = view.setup_sampler(form_covariances)
             if parametric == False:
                 target_cov, cross_cov = form_covariances(target_info,  
@@ -1279,7 +1280,7 @@ def reconstruct_opt(self, state):
 
         return np.squeeze(reconstructed)
 
-    def log_randomization_density(self, state):
+    def log_density(self, state):
         '''
         Log of randomization density at current state.
         Parameters
@@ -1448,17 +1449,21 @@ def __init__(self,
                  opt_sample,
                  observed):
 
-        full_sample = opt_sampler.reconstruct_full(opt_sample) # observed_score + affine(opt_sample)
-        self._logden = opt_sampler.log_randomization_density(full_sample)
+        full_sample = opt_sampler.reconstruct(opt_sample) # observed_score + affine(opt_sample)
+        self._logden = opt_sampler.log_density(full_sample)
 
         # we now remove the observed_score from full_sample
         self.reconstructed_sample = opt_sampler.reconstruct_opt(opt_sample) # affine(opt_sample)
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
-        self._normal_sample = np.random.multivariate_normal(mean=np.zeros(nactive), 
-                                                            cov=opt_sampler.target_cov, 
-                                                            size=(sample.shape[0],))
+        # setup_target has been called on opt_sampler
+        self.opt_sampler = opt_sampler
+        self.opt_sample = opt_sample
 
+        self.target_cov = opt_sampler.target_cov
+        self._normal_sample = np.random.multivariate_normal(mean=np.zeros(self.target_cov.shape[0]), 
+                                                            cov=self.target_cov, 
+                                                            size=(opt_sample.shape[0],))
     def pivot(self,
               linear_func,
               candidate,
@@ -1483,16 +1488,15 @@ def pivot(self,
         score_cov = []
         for i in range(len(self.opt_sampler.objectives)):
             cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
-            cur_nuisance = self.observed_score[i] - cur_score_cov * observed_stat / target_cov
+            cur_nuisance = self.opt_sampler.observed_score[i] - cur_score_cov * observed_stat / target_cov
             nuisance.append(cur_nuisance)
             score_cov.append(cur_score_cov)
 
-        candidate_sample, weights = self._weights(self.opt_sample,          # sample of optimization variables
-                                                  sample_stat + candidate,  # normal sample under candidate
-                                                  nuisance,                 # nuisance sufficient stats for each view
-                                                  score_cov,                # points will be moved like sample * score_cov
-                                                  self.opt_sampler.log_density)
-
+        weights = self._weights(sample_stat + candidate,  # normal sample under candidate
+                                nuisance,                 # nuisance sufficient stats for each view
+                                score_cov,                # points will be moved like sample * score_cov
+                                self.opt_sampler.log_densities)
+        
         pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
 
         if alternative == 'twosided':
@@ -1504,16 +1508,11 @@ def pivot(self,
 
     def confidence_interval(self, linear_func, level=0.90, how_many_sd=20):
 
-        target_delta = self._delta[:,self.targeted_sampler.target_slice]
-        projected_delta = target_delta.dot(linear_func)
+        sample_stat = self._normal_sample.dot(linear_func)
         projected_observed = self.observed.dot(linear_func)
-        std_projected_delta = np.sqrt(np.dot(linear_func.T, self.targeted_sampler.target_cov).dot(linear_func))
-
-        delta_min, delta_max = projected_delta.min(), projected_delta.max()
-
+        
         _norm = np.linalg.norm(linear_func)
-        grid_min, grid_max = -how_many_sd * np.std(projected_delta), how_many_sd * np.std(projected_delta)
-        print("grid", grid_min, grid_max)
+        grid_min, grid_max = -how_many_sd * np.std(sample_stat), how_many_sd * np.std(sample_stat)
 
         def _rootU(gamma):
             return self.pivot(linear_func,
@@ -1535,7 +1534,7 @@ def _weights(self,
                  sample_stat,
                  nuisance,
                  score_cov,
-                 log_density):
+                 log_densities):
 
         # Here we should loop through the views
         # and move the score of each view 
@@ -1554,11 +1553,11 @@ def _weights(self,
         # In this function, \hat{\theta}_i will change with the Monte Carlo sample
 
         _lognum = 0
-        for i in range(len(log_density)):
-            density_arg = score_cov[i].dot(sample_stat) + nuisance[i][:,None]
-            _lognum += log_density[i](density_arg + self.reconstructed_sample)
+        for i in range(len(log_densities)):
+            density_arg = np.multiply.outer(score_cov[i], sample_stat) + nuisance[i][:,None]
+            _lognum += log_densities[i](density_arg.T + self.reconstructed_sample)
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
 
-        return candidate_sample, np.exp(_logratio)
+        return np.exp(_logratio)
 
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index eeb08bd31..25be5bdb8 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -60,14 +60,12 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         form_covariances = glm_nonparametric_bootstrap(n, n)
         conv._queries.setup_sampler(form_covariances)
         boot_target, boot_target_observed = pairs_bootstrap_glm(conv.loglike, selected_features, inactive=None)
-        opt_sampler.setup_target(target_info=boot_target,
-                                 observed_target_state=unpenalized_mle,
-                                 form_covariances=form_covariances)
+        opt_sampler.setup_target(boot_target,
+                                 form_covariances)
 
-        selective_CI = opt_sampler.confidence_intervals(opt_sampler.observed_target_state, sample=S)
+        selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
         print(selective_CI)
 
         return selective_CI
 
 
-test_opt_weighted_intervals()

From 698188badbb02c0c0cac2066ca838d91ec0f1f18 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 09:38:05 -0700
Subject: [PATCH 176/617] have to set seed for test to pass for the moment

---
 selection/randomized/tests/test_opt_weighted_intervals.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 25be5bdb8..d9e5a9048 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -8,14 +8,14 @@
                                logistic_instance,
                                poisson_instance)
 from ...tests.flags import SMALL_SAMPLES
-from ...tests.decorators import set_sampling_params_iftrue
+from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 
 from scipy.stats import t as tdist
 from ..glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
 from ..M_estimator import restricted_Mest
 
-
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+@set_seed_iftrue(True, 200)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
 def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
     cls = lasso

From 70a7f92aa8c0b8a41712e639368f77047ff10efd Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 09:48:58 -0700
Subject: [PATCH 177/617] BF: rename

---
 selection/randomized/tests/test_reconstruction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/randomized/tests/test_reconstruction.py b/selection/randomized/tests/test_reconstruction.py
index da92fe698..c99379f4d 100644
--- a/selection/randomized/tests/test_reconstruction.py
+++ b/selection/randomized/tests/test_reconstruction.py
@@ -61,5 +61,5 @@ def test_reconstruction(s=3,
                                               keep_opt=True)
         
         reconstruction = target_sampler.reconstruct(target_sample)
-        logdens = target_sampler.log_randomization_density(target_sample)
+        logdens = target_sampler.log_density(target_sample)
         return logdens.shape

From fed1250636fb006a8d7648b9470413eb7da7e435 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 10:28:26 -0700
Subject: [PATCH 178/617] whitespace

---
 selection/randomized/M_estimator.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 6e238cfc8..301eac291 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -323,9 +323,6 @@ def derivative_logdet_jacobian(self, scalings):
         der[self.scaling_slice] = np.array([np.matrix.trace(jacobian_inv_blocks[i]) for i in range(scalings.shape[0])])
         return der
 
-
-
-
     def setup_sampler(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
         pass
 
@@ -339,7 +336,6 @@ def projection(self, opt_state):
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-
         if ('subgradient' not in self.selection_variable and 
             'scaling' not in self.selection_variable): # have not conditioned on any thing else
             new_state = opt_state.copy() # not really necessary to copy

From 9e2eae837b99c767516ebebd9f80918ddaf75602 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 10:28:41 -0700
Subject: [PATCH 179/617] BF: need to set the _setup bit

---
 selection/randomized/cv_view.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/selection/randomized/cv_view.py b/selection/randomized/cv_view.py
index 52d3b28fb..3baca0928 100644
--- a/selection/randomized/cv_view.py
+++ b/selection/randomized/cv_view.py
@@ -68,6 +68,7 @@ def solve(self, glmnet=False, K=5):
             self._solved = True
 
     def setup_sampler(self):
+        self._setup = True
         return self.CV1_boot
 
     def one_SD_rule(self, direction="up"):

From 01744434390327bcc569b8ef0b81b11f845cb5b6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 10:29:05 -0700
Subject: [PATCH 180/617] removing print statement

---
 selection/randomized/glm.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index b4a59870c..baa0a73d6 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -601,8 +601,6 @@ def bootstrap_cov(sampler, boot_target, cross_terms=(), nsample=2000):
     _outer_target = 0.
 
     for j in range(nsample):
-        #if j % 100==0:
-        #    print(j)
         indices = sampler()
         _boot_target = boot_target(indices)
 

From 28dea7c7a0cf5a19d75a08a361a79e24d43ab27d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 10:31:48 -0700
Subject: [PATCH 181/617] using only raw score in multiple_views

---
 selection/randomized/query.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index db8e94388..2d06383f8 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -897,11 +897,12 @@ def __init__(self,
         # We implicitly assume that we are sampling a target
         # independent of the data in each view
 
-        self.observed_scores = []
+        self.observed_raw_score = [] # in the data coordinates, not the view's coordinates
+                                     # will typically be \nabla \ell(\bar{\beta}_E) - \nabla^2 \ell(\bar{\beta}_E) \bar{\beta}_E
         for i in range(self.nqueries):
             obj = self.objectives[i]
             score_linear, score_offset = obj.score_transform
-            self.observed_scores.append(score_linear.dot(obj.observed_score_state) + score_offset)
+            self.observed_raw_score.append(score_linear.dot(obj.observed_score_state) + score_offset)
 
     def projection(self, state):
         '''
@@ -937,7 +938,7 @@ def gradient(self, state):
             reconstructed_opt_state = self.objectives[i].reconstruct_opt(opt_state[self.opt_slice[i]])
             opt_linear, opt_offset = self.objectives[i].opt_transform
             opt_grad[self.opt_slice[i]] = \
-                opt_linear.T.dot(self.objectives[i].construct_weights(reconstructed_opt_state + self.observed_scores[i]))
+                opt_linear.T.dot(self.objectives[i].construct_weights(reconstructed_opt_state + self.observed_raw_score[i]))
         return -opt_grad
 
     def sample(self, ndraw, burnin, stepsize=None):
@@ -991,7 +992,6 @@ def setup_target(self,
         """
 
         self.score_cov = []
-        self.observed_score = []
         self.log_densities = []
 
         target_cov_sum = 0
@@ -1011,7 +1011,6 @@ def setup_target(self,
 
             target_cov_sum += target_cov
             self.score_cov.append(cross_cov)
-            self.observed_score.append(view.observed_score_state)
 
         self.target_cov = target_cov_sum / self.nqueries
         self.target_invcov = np.linalg.inv(self.target_cov)
@@ -1247,7 +1246,7 @@ def reconstruct(self, state):
 
         for i in range(self.nqueries):
             reconstructed[:,self.randomization_slice[i]] = self.objectives[i].reconstruct_opt(  
-                state[:,self.opt_slice[i]]) + self.observed_scores[i]
+                state[:,self.opt_slice[i]]) + self.observed_raw_score[i]
 
         return np.squeeze(reconstructed)
 
@@ -1488,7 +1487,7 @@ def pivot(self,
         score_cov = []
         for i in range(len(self.opt_sampler.objectives)):
             cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
-            cur_nuisance = self.opt_sampler.observed_score[i] - cur_score_cov * observed_stat / target_cov
+            cur_nuisance = self.opt_sampler.observed_raw_score[i] - cur_score_cov * observed_stat / target_cov
             nuisance.append(cur_nuisance)
             score_cov.append(cur_score_cov)
 
@@ -1509,24 +1508,25 @@ def pivot(self,
     def confidence_interval(self, linear_func, level=0.90, how_many_sd=20):
 
         sample_stat = self._normal_sample.dot(linear_func)
-        projected_observed = self.observed.dot(linear_func)
+        observed_stat = self.observed.dot(linear_func)
         
         _norm = np.linalg.norm(linear_func)
         grid_min, grid_max = -how_many_sd * np.std(sample_stat), how_many_sd * np.std(sample_stat)
 
         def _rootU(gamma):
             return self.pivot(linear_func,
-                              projected_observed + gamma,
+                              observed_stat + gamma,
                               alternative='less') - (1 - level) / 2.
         def _rootL(gamma):
             return self.pivot(linear_func,
-                              projected_observed + gamma,
+                              observed_stat + gamma,
                               alternative='less') - (1 + level) / 2.
 
         upper = bisect(_rootU, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
         lower = bisect(_rootL, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
 
-        return lower + projected_observed, upper + projected_observed
+        print(_rootU(upper), _rootL(lower), 'pivot')
+        return lower + observed_stat, upper + observed_stat
 
     # Private methods
 

From 259f8918c6824f050f4fc49ba4d90c045bfc9df7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 10:56:35 -0700
Subject: [PATCH 182/617] made a l2norm_glm class so sqrt lasso can be
 randomized

---
 selection/algorithms/sqrt_lasso.py            | 130 ++++++++++++++++++
 selection/randomized/tests/test_sqrt_lasso.py |   7 +-
 2 files changed, 134 insertions(+), 3 deletions(-)

diff --git a/selection/algorithms/sqrt_lasso.py b/selection/algorithms/sqrt_lasso.py
index c979d3d42..e1f99face 100644
--- a/selection/algorithms/sqrt_lasso.py
+++ b/selection/algorithms/sqrt_lasso.py
@@ -109,6 +109,136 @@ def hessian(self, beta):
 
         return self._H / f - np.multiply.outer(g, g) / f**3
 
+class l2norm_saturated(rr.smooth_atom):
+
+    """
+    A little wrapper so that sqrt_lasso view can be bootstrapped
+    like a glm. 
+
+    Mainly needs the saturated_loss.hessian method.
+
+    """
+
+    def __init__(self, 
+                 shape,
+                 response, 
+                 coef=1., 
+                 offset=None,
+                 quadratic=None,
+                 initial=None):
+
+        rr.smooth_atom.__init__(self,
+                                shape,
+                                offset=offset,
+                                quadratic=quadratic,
+                                initial=initial,
+                                coef=coef)
+
+        if sparse.issparse(response):
+            self.response = response.toarray().flatten()
+        else:
+            self.response = np.asarray(response)
+
+    def smooth_objective(self, natural_param, mode='both', check_feasibility=False):
+        """
+
+        Evaluate the smooth objective, computing its value, gradient or both.
+
+        Parameters
+        ----------
+
+        natural_param : ndarray
+            The current parameter values.
+
+        mode : str
+            One of ['func', 'grad', 'both']. 
+
+        check_feasibility : bool
+            If True, return `np.inf` when
+            point is not feasible, i.e. when `natural_param` is not
+            in the domain.
+
+        Returns
+        -------
+
+        If `mode` is 'func' returns just the objective value 
+        at `natural_param`, else if `mode` is 'grad' returns the gradient
+        else returns both.
+        """
+        
+        natural_param = self.apply_offset(natural_param)
+        resid = natural_param - self.response 
+
+        if mode == 'both':
+            f, g = self.scale(np.sqrt(np.sum(resid**2))), self.scale(resid / np.sqrt(np.sum(resid**2)))
+            return f, g
+        elif mode == 'grad':
+            return self.scale(resid / np.sqrt(np.sum(resid**2))) 
+        elif mode == 'func':
+            return self.scale(np.sqrt(np.sum(resid**2)))
+        else:
+            raise ValueError("mode incorrectly specified")
+            
+    # Begin loss API
+
+    def hessian(self, natural_param):
+        """
+        Hessian of the loss.
+
+        Parameters
+        ----------
+
+        natural_param : ndarray
+            Parameters where Hessian will be evaluated.
+
+        Returns
+        -------
+
+        hess : ndarray
+            A 1D-array representing the diagonal of the Hessian
+            evaluated at `natural_param`.
+        """
+        natural_param = self.apply_offset(natural_param)
+        resid = natural_param - self.response 
+
+        norm_resid = np.sqrt(np.sum(resid**2))
+        return self.scale(np.ones_like(natural_param) / norm_resid - resid**2 / norm_resid**3) # diagonal of full Hessian
+                                                                                               # used for bootstrap for randomized and setting
+                                                                                               # up score for randomized
+
+    def get_data(self):
+        return self.response
+
+    def set_data(self, data):
+        self.response = data
+
+    data = property(get_data, set_data)
+
+    def __copy__(self):
+        return l2norm_saturated(self.shape,
+                                copy(self.response),
+                                coef=self.coef, 
+                                offset=copy(self.offset),
+                                quadratic=copy(self.quadratic),
+                                initial=copy(self.coefs))
+
+    # End loss API
+
+    def mean_function(self, eta):
+        return eta
+
+def l2norm_glm(X, 
+               Y, 
+               quadratic=None, 
+               initial=None,
+               offset=None):
+    return rr.glm(X, 
+                  Y,
+                  l2norm_saturated(Y.shape, Y),
+                  quadratic=quadratic,
+                  initial=initial,
+                  offset=offset)
+
 def solve_sqrt_lasso(X, Y, weights=None, initial=None, quadratic=None, solve_args={}):
     """
 
diff --git a/selection/randomized/tests/test_sqrt_lasso.py b/selection/randomized/tests/test_sqrt_lasso.py
index 99a859606..897f9819e 100644
--- a/selection/randomized/tests/test_sqrt_lasso.py
+++ b/selection/randomized/tests/test_sqrt_lasso.py
@@ -8,7 +8,8 @@
 from ...tests.instance import (gaussian_instance,
                                       logistic_instance)
 from ...algorithms.sqrt_lasso import (sqlasso_objective,
-                                      choose_lambda)
+                                      choose_lambda,
+                                      l2norm_glm)
 from ..query import naive_confidence_intervals, naive_pvalues
 
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
@@ -52,8 +53,8 @@ def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, signal=signal, sigma=1)
     lam_nonrandom = choose_lambda(X)
     lam_random = choose_lambda_with_randomization(X, randomizer)
-    loss = sqlasso_objective(X, y)
-
+    loss = l2norm_glm(X, y)
+    #sqloss = rr.glm.gaussian(X, y)
     epsilon = 1./n
 
     # non-randomized sqrt-Lasso, just looking how many vars it selects

From 820fc4a8e670fe42e85d0472a66894921426407d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 12:39:16 -0700
Subject: [PATCH 183/617] BF: deactivating seems to mess up later activation

---
 selection/algorithms/tests/test_compareR.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 504977837..72d7b1c7e 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -331,8 +331,6 @@ def test_solve_QP(): # check the R coordinate descent LASSO solver
 
     soln_R = np.asarray(rpy.r('soln_R'))
 
-    rpy2.robjects.numpy2ri.deactivate()
-
     yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
 
 

From 33998726cb796031ac4a0f05c4836c15ffd67cc0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 13:33:42 -0700
Subject: [PATCH 184/617] BF: making sure we are in randomization's original
 coordinates

---
 selection/randomized/query.py | 289 +++++++++++++++++-----------------
 1 file changed, 147 insertions(+), 142 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 2d06383f8..91ca7a42b 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -830,6 +830,113 @@ def log_density(self, state):
             value += log_dens(reconstructed[:,self.opt_slice[i]])
         return np.squeeze(value)
 
+class bootstrapped_target_sampler(targeted_sampler):
+
+    # make one of these for each hypothesis test
+
+    def __init__(self,
+                 multi_view,
+                 target_info,
+                 observed_target_state,
+                 target_alpha,
+                 target_set=None,
+                 reference=None,
+                 boot_size=None):
+
+        # sampler will draw bootstrapped weights for the target
+
+        if boot_size is None:
+            boot_size = target_alpha.shape[1]
+
+        targeted_sampler.__init__(self, multi_view,
+                                  target_info,
+                                  observed_target_state,
+                                  target_set,
+                                  reference)
+        # for bootstrap
+
+        self.boot_size = boot_size
+        self.target_alpha = target_alpha
+        self.boot_transform = []
+
+        for i in range(self.nqueries):
+            composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
+                                                                                                  self.target_cov,
+                                                                                                  self.observed_target_state)
+            boot_linear_part = np.dot(composition_linear_part, target_alpha)
+            boot_offset = composition_offset + np.dot(composition_linear_part, self.reference).flatten()
+            self.boot_transform.append((boot_linear_part, boot_offset))
+
+        # set the observed state for bootstrap
+
+        self.boot_slice = slice(multi_view.num_opt_var, multi_view.num_opt_var + self.boot_size)
+        self.observed_state = np.zeros(multi_view.num_opt_var + self.boot_size)
+        self.observed_state[self.boot_slice] = np.ones(self.boot_size)
+        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
+
+
+    def gradient(self, state):
+
+        boot_state, opt_state = state[self.boot_slice], state[self.overall_opt_slice]
+        boot_grad, opt_grad = np.zeros_like(boot_state), np.zeros_like(opt_state)
+        full_grad = np.zeros_like(state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+
+            randomization_state = reconstruct_full(boot_state, 
+                                                   self.boot_transform[i], 
+                                                   self.objectives[i],
+                                                   opt_state[self.opt_slice[i]])
+
+            grad = self.objectives[i].construct_weights(randomization_state)
+            boot_linear, boot_offset = self.boot_transform[i]
+            opt_linear, opt_offset = self.objectives[i].opt_transform
+            if boot_linear is not None:
+                boot_grad += boot_linear.T.dot(grad)
+            if opt_linear is not None:
+                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
+
+        boot_grad = -boot_grad
+        boot_grad -= boot_state
+
+        full_grad[self.boot_slice] = boot_grad
+        full_grad[self.overall_opt_slice] = -opt_grad
+
+        return full_grad
+
+    def sample(self, ndraw, burnin, stepsize = None, keep_opt=False):
+        if stepsize is None:
+            stepsize = 1. / self.observed_state.shape[0]
+
+        bootstrap_langevin = projected_langevin(self.observed_state.copy(),
+                                                self.gradient,
+                                                self.projection,
+                                                stepsize)
+        if keep_opt:
+            boot_slice = slice(None, None, None)
+        else:
+            boot_slice = self.boot_slice
+
+        samples = []
+        for i in range(ndraw + burnin):
+            bootstrap_langevin.next()
+            if (i >= burnin):
+                samples.append(bootstrap_langevin.state[boot_slice].copy())
+        samples = np.asarray(samples)
+
+        if keep_opt:
+            target_samples = samples[:,self.boot_slice].dot(self.target_alpha.T) + self.reference[None, :]
+            opt_sample0 = samples[0,self.overall_opt_slice]
+            result = np.zeros((samples.shape[0], opt_sample0.shape[0] + target_samples.shape[1]))
+            result[:,self.overall_opt_slice] = samples[:,self.overall_opt_slice]
+            result[:,self.target_slice] = target_samples
+            return result
+        else:
+            target_samples = samples.dot(self.target_alpha.T) + self.reference[None, :]
+            return target_samples
+
 class optimization_sampler(object):
 
     '''
@@ -899,10 +1006,12 @@ def __init__(self,
 
         self.observed_raw_score = [] # in the data coordinates, not the view's coordinates
                                      # will typically be \nabla \ell(\bar{\beta}_E) - \nabla^2 \ell(\bar{\beta}_E) \bar{\beta}_E
+        self.score_info = []
         for i in range(self.nqueries):
             obj = self.objectives[i]
             score_linear, score_offset = obj.score_transform
             self.observed_raw_score.append(score_linear.dot(obj.observed_score_state) + score_offset)
+            self.score_info.append(obj.score_transform)
 
     def projection(self, state):
         '''
@@ -1301,146 +1410,6 @@ def log_density(self, state):
             value += log_dens(reconstructed[:,self.opt_slice[i]])
         return np.squeeze(value)
 
-class bootstrapped_target_sampler(targeted_sampler):
-
-    # make one of these for each hypothesis test
-
-    def __init__(self,
-                 multi_view,
-                 target_info,
-                 observed_target_state,
-                 target_alpha,
-                 target_set=None,
-                 reference=None,
-                 boot_size=None):
-
-        # sampler will draw bootstrapped weights for the target
-
-        if boot_size is None:
-            boot_size = target_alpha.shape[1]
-
-        targeted_sampler.__init__(self, multi_view,
-                                  target_info,
-                                  observed_target_state,
-                                  target_set,
-                                  reference)
-        # for bootstrap
-
-        self.boot_size = boot_size
-        self.target_alpha = target_alpha
-        self.boot_transform = []
-
-        for i in range(self.nqueries):
-            composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
-                                                                                                  self.target_cov,
-                                                                                                  self.observed_target_state)
-            boot_linear_part = np.dot(composition_linear_part, target_alpha)
-            boot_offset = composition_offset + np.dot(composition_linear_part, self.reference).flatten()
-            self.boot_transform.append((boot_linear_part, boot_offset))
-
-        # set the observed state for bootstrap
-
-        self.boot_slice = slice(multi_view.num_opt_var, multi_view.num_opt_var + self.boot_size)
-        self.observed_state = np.zeros(multi_view.num_opt_var + self.boot_size)
-        self.observed_state[self.boot_slice] = np.ones(self.boot_size)
-        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
-
-
-    def gradient(self, state):
-
-        boot_state, opt_state = state[self.boot_slice], state[self.overall_opt_slice]
-        boot_grad, opt_grad = np.zeros_like(boot_state), np.zeros_like(opt_state)
-        full_grad = np.zeros_like(state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        for i in range(self.nqueries):
-
-            randomization_state = reconstruct_full(boot_state, 
-                                                   self.boot_transform[i], 
-                                                   self.objectives[i],
-                                                   opt_state[self.opt_slice[i]])
-
-            grad = self.objectives[i].construct_weights(randomization_state)
-            boot_linear, boot_offset = self.boot_transform[i]
-            opt_linear, opt_offset = self.objectives[i].opt_transform
-            if boot_linear is not None:
-                boot_grad += boot_linear.T.dot(grad)
-            if opt_linear is not None:
-                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
-
-        boot_grad = -boot_grad
-        boot_grad -= boot_state
-
-        full_grad[self.boot_slice] = boot_grad
-        full_grad[self.overall_opt_slice] = -opt_grad
-
-        return full_grad
-
-    def sample(self, ndraw, burnin, stepsize = None, keep_opt=False):
-        if stepsize is None:
-            stepsize = 1. / self.observed_state.shape[0]
-
-        bootstrap_langevin = projected_langevin(self.observed_state.copy(),
-                                                self.gradient,
-                                                self.projection,
-                                                stepsize)
-        if keep_opt:
-            boot_slice = slice(None, None, None)
-        else:
-            boot_slice = self.boot_slice
-
-        samples = []
-        for i in range(ndraw + burnin):
-            bootstrap_langevin.next()
-            if (i >= burnin):
-                samples.append(bootstrap_langevin.state[boot_slice].copy())
-        samples = np.asarray(samples)
-
-        if keep_opt:
-            target_samples = samples[:,self.boot_slice].dot(self.target_alpha.T) + self.reference[None, :]
-            opt_sample0 = samples[0,self.overall_opt_slice]
-            result = np.zeros((samples.shape[0], opt_sample0.shape[0] + target_samples.shape[1]))
-            result[:,self.overall_opt_slice] = samples[:,self.overall_opt_slice]
-            result[:,self.target_slice] = target_samples
-            return result
-        else:
-            target_samples = samples.dot(self.target_alpha.T) + self.reference[None, :]
-            return target_samples
-
-def naive_confidence_intervals(target, observed, alpha=0.1):
-    """
-    Compute naive Gaussian based confidence
-    intervals for target.
-    Parameters
-    ----------
-
-    target : `targeted_sampler`
-    observed : np.float
-        A vector of observed data of shape `target.shape`
-    alpha : float (optional)
-        1 - confidence level.
-    Returns
-    -------
-    intervals : np.float
-        Gaussian based confidence intervals.
-    """
-    quantile = - ndist.ppf(alpha/float(2))
-    LU = np.zeros((2, target.shape[0]))
-    for j in range(target.shape[0]):
-        sigma = np.sqrt(target.target_cov[j, j])
-        LU[0,j] = observed[j] - sigma * quantile
-        LU[1,j] = observed[j] + sigma * quantile
-    return LU.T
-
-def naive_pvalues(target, observed, parameter):
-    pvalues = np.zeros(target.shape[0])
-    for j in range(target.shape[0]):
-        sigma = np.sqrt(target.target_cov[j, j])
-        pval = ndist.cdf((observed[j]-parameter[j])/sigma)
-        pvalues[j] = 2*min(pval, 1-pval)
-    return pvalues
-
 class optimization_intervals(object):
 
     def __init__(self,
@@ -1488,8 +1457,12 @@ def pivot(self,
         for i in range(len(self.opt_sampler.objectives)):
             cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
             cur_nuisance = self.opt_sampler.observed_raw_score[i] - cur_score_cov * observed_stat / target_cov
-            nuisance.append(cur_nuisance)
-            score_cov.append(cur_score_cov)
+            # cur_nuisance is in the view's internal coordinates
+            score_linear, score_offset = self.opt_sampler.score_info[i]
+            # final_nuisance is on the scale of the original randomization
+            final_nuisance = score_linear.dot(cur_nuisance) + score_offset
+            nuisance.append(final_nuisance)
+            score_cov.append(score_linear.dot(cur_score_cov))
 
         weights = self._weights(sample_stat + candidate,  # normal sample under candidate
                                 nuisance,                 # nuisance sufficient stats for each view
@@ -1561,3 +1534,35 @@ def _weights(self,
 
         return np.exp(_logratio)
 
+def naive_confidence_intervals(target, observed, alpha=0.1):
+    """
+    Compute naive Gaussian based confidence
+    intervals for target.
+    Parameters
+    ----------
+
+    target : `targeted_sampler`
+    observed : np.float
+        A vector of observed data of shape `target.shape`
+    alpha : float (optional)
+        1 - confidence level.
+    Returns
+    -------
+    intervals : np.float
+        Gaussian based confidence intervals.
+    """
+    quantile = - ndist.ppf(alpha/float(2))
+    LU = np.zeros((2, target.shape[0]))
+    for j in range(target.shape[0]):
+        sigma = np.sqrt(target.target_cov[j, j])
+        LU[0,j] = observed[j] - sigma * quantile
+        LU[1,j] = observed[j] + sigma * quantile
+    return LU.T
+
+def naive_pvalues(target, observed, parameter):
+    pvalues = np.zeros(target.shape[0])
+    for j in range(target.shape[0]):
+        sigma = np.sqrt(target.target_cov[j, j])
+        pval = ndist.cdf((observed[j]-parameter[j])/sigma)
+        pvalues[j] = 2*min(pval, 1-pval)
+    return pvalues

From c5a7d1fd12501376e65cccdf6751e76efb74b1fe Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 13:37:30 -0700
Subject: [PATCH 185/617] BF: fixing rpy2 activate

---
 selection/algorithms/cv_glmnet.py                   | 3 +++
 selection/algorithms/tests/test_compareR.py         | 1 +
 selection/constraints/tests/test_quadratic_tests.py | 3 +++
 selection/randomized/tests/test_cv_glmnet.py        | 2 +-
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/selection/algorithms/cv_glmnet.py b/selection/algorithms/cv_glmnet.py
index fa6803dba..052e79de6 100644
--- a/selection/algorithms/cv_glmnet.py
+++ b/selection/algorithms/cv_glmnet.py
@@ -15,6 +15,7 @@
     from rpy2 import robjects
     import rpy2.robjects.numpy2ri
     rpy2.robjects.numpy2ri.activate()
+    rpy2.robjects.numpy2ri.deactivate()
     importr('glmnet')
     have_glmnet = True
 except ImportError:
@@ -35,6 +36,7 @@ def __init__(self, loss, loss_label):
     def using_glmnet(self, loss=None):
         if not have_glmnet:
             raise ImportError("""glmnet failed to load with rpy2""")
+        rpy2.robjects.numpy2ri.activate()
         robjects.r('''
             glmnet_cv = function(X,y, family, lam_seq=NA){
             y = as.matrix(y)
@@ -87,6 +89,7 @@ def using_glmnet(self, loss=None):
             CV_err = CV_err_longer
         SD = np.array(result[4])
 
+        rpy2.robjects.numpy2ri.deactivate()
         return lam_minCV, lam_1SE, lam_seq, CV_err, SD
 
 
diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 72d7b1c7e..e5f600faf 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -330,6 +330,7 @@ def test_solve_QP(): # check the R coordinate descent LASSO solver
     rpy.r(R_code)
 
     soln_R = np.asarray(rpy.r('soln_R'))
+    rpy2.robjects.numpy2ri.deactivate()
 
     yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
 
diff --git a/selection/constraints/tests/test_quadratic_tests.py b/selection/constraints/tests/test_quadratic_tests.py
index 59229de27..cea1d987f 100644
--- a/selection/constraints/tests/test_quadratic_tests.py
+++ b/selection/constraints/tests/test_quadratic_tests.py
@@ -17,6 +17,7 @@
     from rpy2.robjects.numpy2ri import numpy2ri
     ro.conversion.py2ri = numpy2ri
     ro.numpy2ri.activate()
+    ro.numpy2ri.deactivate()
     R_available = True
 except ImportError:
     R_available = False
@@ -55,6 +56,7 @@ def test_chisq_noncentral(nsim=1000, burnin=2000, ndraw=8000):
     A, b = np.random.standard_normal((4,6)), np.zeros(4)
     con = AC.constraints(A,b, mean=mu)
 
+    ro.numpy2ri.activate()
     ro.r('fncp=%f' % ncp)
     ro.r('f = function(x) {pchisq(x,3,ncp=fncp)}')
     def F(x):
@@ -90,6 +92,7 @@ def F(x):
     P = np.array(P).reshape(-1)
     P = P[P > 0]
     P = P[P < 1]
+    ro.numpy2ri.deactivate()
 
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10)
diff --git a/selection/randomized/tests/test_cv_glmnet.py b/selection/randomized/tests/test_cv_glmnet.py
index cd0b05a7a..ad56c7cbc 100644
--- a/selection/randomized/tests/test_cv_glmnet.py
+++ b/selection/randomized/tests/test_cv_glmnet.py
@@ -1,7 +1,7 @@
 import numpy as np
 import regreg.api as rr
 
-from ..cv_glmnet import CV_glmnet
+from ...algorithms.cv_glmnet import CV_glmnet
 from ...tests.instance import gaussian_instance
 
 def test_cv_glmnet():

From 5b5530d41ca82d94509b227a18183f1c4b88c800 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 13:50:18 -0700
Subject: [PATCH 186/617] comments for tests

---
 selection/algorithms/tests/test_compareR.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index e5f600faf..0f210a051 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -16,6 +16,9 @@
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_fixed_lambda():
+    """
+    Check that Gaussian LASSO results agree with R
+    """
     tol = 1.e-5
     for s in [1,1.1]:
         lam = 7.8
@@ -80,6 +83,9 @@ def test_fixed_lambda():
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_forward_step():
+    """
+    Check that forward step results agree with R
+    """
     tol = 1.e-5
     R_code = """
     library(selectiveInference)
@@ -130,6 +136,9 @@ def test_forward_step():
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_forward_step_all():
+    """
+    Check that forward step results agree with R
+    """
     tol = 1.e-5
     R_code = """
     library(selectiveInference)
@@ -177,6 +186,9 @@ def test_forward_step_all():
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_coxph():
+    """
+    Check that Cox results agree with R
+    """
     tol = 1.e-5
     R_code = """
     library(selectiveInference)
@@ -234,6 +246,9 @@ def test_coxph():
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_logistic():
+    """
+    Check that logistic results agree with R
+    """
     tol = 1.e-4
     R_code = """
     library(selectiveInference)
@@ -290,7 +305,10 @@ def test_logistic():
 
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
-def test_solve_QP(): # check the R coordinate descent LASSO solver
+def test_solve_QP(): 
+    """
+    Check the R coordinate descent LASSO solver
+    """
 
     n, p = 100, 200
     lam = 10

From 5fdf730ff2b28fc11d395fa40ddeecd7df498086 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 14:16:09 -0700
Subject: [PATCH 187/617] selectiveInference install not working in travis

---
 .travis.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 177cf1293..d09c39347 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -47,7 +47,10 @@ install:
   - cd R-software
   - git submodule init
   - git submodule update
-  - make install
+  - rm -f selectiveInference/src/RcppExports.cpp
+  - rm -f selectiveInference/R/RcppExports.R
+  - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
+  - R CMD INSTALL selectiveInference
   - travis_install $INSTALL_TYPE
 
 # command to run tests, e.g. python setup.py test
@@ -75,7 +78,7 @@ script:
       if [ "$R_TESTS" ]; then
         nosetests ../selection/algorithms/tests/test_compareR.py
       else 
-        env USE_SMALL_SAMPLES=1 SET_SEED=1 nosetests $DOCTEST_ARGS --verbose $COVER_ARGS selection
+        env USE_SMALL_SAMPLES=1 USE_TEST_SEED=1 nosetests $DOCTEST_ARGS --verbose $COVER_ARGS selection
       fi
 after_success:
     - if [ "${COVERAGE}" == "1" ]; then coveralls; fi

From b93acddbaecb86cc323e974e68914a7ebb5a03ce Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 14:18:27 -0700
Subject: [PATCH 188/617] making R tests verbose

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index d09c39347..c80f49307 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -76,7 +76,7 @@ script:
       fi
     - |
       if [ "$R_TESTS" ]; then
-        nosetests ../selection/algorithms/tests/test_compareR.py
+        nosetests -v ../selection/algorithms/tests/test_compareR.py
       else 
         env USE_SMALL_SAMPLES=1 USE_TEST_SEED=1 nosetests $DOCTEST_ARGS --verbose $COVER_ARGS selection
       fi

From 9ca5f8f2fda8721609c8f8b20f9f6f058b29884b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 14:21:15 -0700
Subject: [PATCH 189/617] sudo for R install

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index c80f49307..ac5d03c97 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -50,7 +50,7 @@ install:
   - rm -f selectiveInference/src/RcppExports.cpp
   - rm -f selectiveInference/R/RcppExports.R
   - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
-  - R CMD INSTALL selectiveInference
+  - sudo R CMD INSTALL selectiveInference
   - travis_install $INSTALL_TYPE
 
 # command to run tests, e.g. python setup.py test

From 26ce459d69d71b024f93b5bb19fc57086d8081e7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 14:30:20 -0700
Subject: [PATCH 190/617] dependencies for selectiveInference

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index ac5d03c97..0494ac69f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -50,6 +50,7 @@ install:
   - rm -f selectiveInference/src/RcppExports.cpp
   - rm -f selectiveInference/R/RcppExports.R
   - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
+  - Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference
   - travis_install $INSTALL_TYPE
 

From 49fb1feb3b32de6cce050383f53f71e801c4adfe Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 14:36:18 -0700
Subject: [PATCH 191/617] sudo for install

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 0494ac69f..5cd32072b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -50,7 +50,7 @@ install:
   - rm -f selectiveInference/src/RcppExports.cpp
   - rm -f selectiveInference/R/RcppExports.R
   - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
-  - Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
+  - sudo Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference
   - travis_install $INSTALL_TYPE
 

From e0de1f562ab20c65d9740617ab57b085926f3fc0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 14:41:38 -0700
Subject: [PATCH 192/617] move up one directory

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 5cd32072b..53c45986a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,6 +52,7 @@ install:
   - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
   - sudo Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference
+  - cd ..
   - travis_install $INSTALL_TYPE
 
 # command to run tests, e.g. python setup.py test

From ed4d69007fea149ba1dc0bec5948aaffb678243e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 14:57:39 -0700
Subject: [PATCH 193/617] removing a main script function call

---
 selection/approx_ci/api.py                    | 0
 selection/approx_ci/tests/test_greedy_step.py | 1 -
 2 files changed, 1 deletion(-)
 delete mode 100644 selection/approx_ci/api.py

diff --git a/selection/approx_ci/api.py b/selection/approx_ci/api.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/selection/approx_ci/tests/test_greedy_step.py b/selection/approx_ci/tests/test_greedy_step.py
index 2b1b97ef8..7c0c88268 100644
--- a/selection/approx_ci/tests/test_greedy_step.py
+++ b/selection/approx_ci/tests/test_greedy_step.py
@@ -120,4 +120,3 @@ def test_greedy_step(n, p, s, signal):
         print("output of selection adjusted inference", greedy_step)
         return(greedy_step)
 
-test_greedy_step(n=200, p=30, s=0, signal=5.)
\ No newline at end of file

From 96935a68dcf970d99bfde6df1efc4dda99862175 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 14:59:40 -0700
Subject: [PATCH 194/617] comments for tests

---
 selection/randomized/tests/test_convenience.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index 5943437d7..bb2405d7c 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -12,7 +12,9 @@
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_lasso_constructors(ndraw=1000, burnin=200):
-
+    """
+    Smoke tests for lasso convenience constructors
+    """
     cls = lasso
     for const_info, rand in product(zip([gaussian_instance,
                                          logistic_instance,
@@ -62,6 +64,9 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_step_constructors(ndraw=1000, burnin=200):
+    """
+    Smoke tests for greedy_step convenience constructors
+    """
 
     cls = step
     for const_info, rand in product(zip([gaussian_instance,
@@ -104,6 +109,9 @@ def test_step_constructors(ndraw=1000, burnin=200):
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_threshold_constructors(ndraw=1000, burnin=200):
+    """
+    Smoke tests for marginal threshold convenience constructors
+    """
 
     cls = threshold
     for const_info, rand in product(zip([gaussian_instance,

From efeb0aebd7e2dd9ccb66286c1be71c507246b3b8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 15:01:59 -0700
Subject: [PATCH 195/617] we depend on rpy2 for cv_glmnet

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 54ee26eba..280ef2764 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,4 @@ pyinter
 statsmodels
 sklearn
 pyinter
-
+rpy2

From 67bf40805430709f4f747ea1d32015234b35bb60 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 9 Sep 2017 15:07:51 -0700
Subject: [PATCH 196/617] small fixes to approx_ci -- moved hiv to examples
 directory

---
 .../examples/hiv_approx_ci.py                 |  0
 selection/approx_ci/tests/test_glm.py         |  2 +-
 selection/approx_ci/tests/test_greedy_step.py | 32 +++++++++----------
 3 files changed, 17 insertions(+), 17 deletions(-)
 rename selection/approx_ci/tests/test_hiv_data.py => doc/examples/hiv_approx_ci.py (100%)

diff --git a/selection/approx_ci/tests/test_hiv_data.py b/doc/examples/hiv_approx_ci.py
similarity index 100%
rename from selection/approx_ci/tests/test_hiv_data.py
rename to doc/examples/hiv_approx_ci.py
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index a9a5355fb..d74931586 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -10,7 +10,7 @@
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
 
-from ..ci_via_approx_density import approximate_conditional_density
+from ..ci_approx_density import approximate_conditional_density
 from ..approx_ci.estimator_approx import M_estimator_approx
 
 from ...randomized.query import naive_confidence_intervals
diff --git a/selection/approx_ci/tests/test_greedy_step.py b/selection/approx_ci/tests/test_greedy_step.py
index 7c0c88268..5688dd2d2 100644
--- a/selection/approx_ci/tests/test_greedy_step.py
+++ b/selection/approx_ci/tests/test_greedy_step.py
@@ -9,14 +9,14 @@
 
 from selection.randomized.query import naive_confidence_intervals
 
-def test_approximate_inference(X,
-                               y,
-                               beta,
-                               sigma,
-                               seed_n = 0,
-                               lam_frac = 1.,
-                               loss='gaussian',
-                               randomization_scale = 1.):
+def approximate_inference(X,
+                          y,
+                          beta,
+                          sigma,
+                          seed_n = 0,
+                          lam_frac = 1.,
+                          loss='gaussian',
+                          randomization_scale = 1.):
 
     from selection.api import randomization
     n, p = X.shape
@@ -106,15 +106,15 @@ def __init__(self, target_cov):
                                            naive_risk)))
 
 
-def test_greedy_step(n, p, s, signal):
+def test_greedy_step(n=50, p=100, s=5, signal=5):
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
-    greedy_step = test_approximate_inference(X,
-                                             y,
-                                             beta,
-                                             sigma,
-                                             seed_n=0,
-                                             lam_frac=1.,
-                                             loss='gaussian')
+    greedy_step = approximate_inference(X,
+                                        y,
+                                        beta,
+                                        sigma,
+                                        seed_n=0,
+                                        lam_frac=1.,
+                                        loss='gaussian')
 
     if greedy_step is not None:
         print("output of selection adjusted inference", greedy_step)

From e2eee658c365e1041b974c13f052582016f92214 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 13:24:45 -0700
Subject: [PATCH 197/617] BF from python3 test s on travis

---
 selection/approx_ci/tests/test_glm.py            |  2 +-
 selection/bayesian/dual_lasso.py                 |  4 ++--
 selection/bayesian/estimator.py                  | 15 +++------------
 selection/bayesian/forward_stepwise_reduced.py   |  2 +-
 selection/bayesian/marginal_screening_reduced.py |  2 +-
 selection/bayesian/par_carved_reduced.py         |  4 ++--
 selection/bayesian/par_random_lasso_reduced.py   |  6 +++---
 selection/randomized/tests/test_condition.py     |  8 ++++----
 selection/randomized/tests/test_intervals.py     |  4 ++--
 9 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index d74931586..30aa93b58 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -11,7 +11,7 @@
 from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
 
 from ..ci_approx_density import approximate_conditional_density
-from ..approx_ci.estimator_approx import M_estimator_approx
+from ..estimator_approx import M_estimator_approx
 
 from ...randomized.query import naive_confidence_intervals
 from ...randomized.query import naive_pvalues
diff --git a/selection/bayesian/dual_lasso.py b/selection/bayesian/dual_lasso.py
index d0568976a..0fa82acbb 100644
--- a/selection/bayesian/dual_lasso.py
+++ b/selection/bayesian/dual_lasso.py
@@ -132,7 +132,7 @@ def minimize2(self, step=1, nstep=30, tol=1.e-8):
         objective = lambda u: self.total_loss.objective(u)
         grad = lambda u: self.total_loss.smooth_objective(u, 'grad') + self.dual_arg
 
-        for itercount in xrange(nstep):
+        for itercount in range(nstep):
             newton_step = grad(current) * self.noise_variance
 
             # make sure proposal is feasible
@@ -350,7 +350,7 @@ def posterior_samples(self, ndraw=1500, burnin=50):
 
         samples = []
 
-        for i in xrange(ndraw + burnin):
+        for i in range(ndraw + burnin):
             sampler.next()
             if i >= burnin:
                 samples.append(sampler.state.copy())
diff --git a/selection/bayesian/estimator.py b/selection/bayesian/estimator.py
index 44ac103d1..4d09dcbaf 100644
--- a/selection/bayesian/estimator.py
+++ b/selection/bayesian/estimator.py
@@ -579,10 +579,7 @@ def solve_approx(self):
 
         self.feasible_point = np.append(self.observed_score_state, np.abs(self.initial_soln[self._overall]))
 
-        lagrange = []
-        for key, value in self.penalty.weights.iteritems():
-            lagrange.append(value)
-        lagrange = np.asarray(lagrange)
+        lagrange = self.penalty._weight_array
 
         self.inactive_lagrange = lagrange[~self._overall]
 
@@ -658,10 +655,7 @@ def solve_approx(self):
 
         self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
 
-        lagrange = []
-        for key, value in self.penalty.weights.iteritems():
-            lagrange.append(value)
-        lagrange = np.asarray(lagrange)
+        lagrange = self.penalty._weight_array
 
         #print("True or false", np.all(lagrange[0]-np.fabs(self.feasible_point[p+self.nactive:]))>0)
         #print("True or false", np.all(self.feasible_point[p:][:self.nactive]) > 0)
@@ -710,10 +704,7 @@ def solve_approx(self):
 
         self.feasible_point = np.append(self.observed_score_state, np.abs(self.initial_soln[self._overall]))
 
-        lagrange = []
-        for key, value in self.penalty.weights.iteritems():
-            lagrange.append(value)
-        lagrange = np.asarray(lagrange)
+        lagrange = self.penalty._weight_array
 
         self.inactive_lagrange = lagrange[~self._overall]
 
diff --git a/selection/bayesian/forward_stepwise_reduced.py b/selection/bayesian/forward_stepwise_reduced.py
index 28944fd3e..af9be0e2f 100644
--- a/selection/bayesian/forward_stepwise_reduced.py
+++ b/selection/bayesian/forward_stepwise_reduced.py
@@ -401,7 +401,7 @@ def posterior_samples(self, ndraw=1000, burnin=100):
 
         samples = []
 
-        for i in xrange(ndraw + burnin):
+        for i in range(ndraw + burnin):
             sampler.next()
             if i >= burnin:
                 samples.append(sampler.state.copy())
diff --git a/selection/bayesian/marginal_screening_reduced.py b/selection/bayesian/marginal_screening_reduced.py
index d01280d33..0173b28be 100644
--- a/selection/bayesian/marginal_screening_reduced.py
+++ b/selection/bayesian/marginal_screening_reduced.py
@@ -349,7 +349,7 @@ def posterior_samples(self, langevin_steps=1500, burnin=50):
 
         samples = []
 
-        for i in xrange(langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             #print i, sampler.state.copy()
diff --git a/selection/bayesian/par_carved_reduced.py b/selection/bayesian/par_carved_reduced.py
index 6d8ddbed4..687ba48c5 100644
--- a/selection/bayesian/par_carved_reduced.py
+++ b/selection/bayesian/par_carved_reduced.py
@@ -123,7 +123,7 @@ def minimize2(self, step=1, nstep=100, tol=1.e-8):
         objective = lambda u: self.smooth_objective(u, 'func')
         grad = lambda u: self.smooth_objective(u, 'grad')
 
-        for itercount in xrange(nstep):
+        for itercount in range(nstep):
             newton_step = grad(current)
             count = 0
             while True:
@@ -286,7 +286,7 @@ def posterior_samples(self, ndraw=1500, burnin=100):
 
         samples = []
 
-        for i in xrange(ndraw + burnin):
+        for i in range(ndraw + burnin):
             sampler.next()
             if i >= burnin:
                 samples.append(sampler.state.copy())
diff --git a/selection/bayesian/par_random_lasso_reduced.py b/selection/bayesian/par_random_lasso_reduced.py
index d810e458a..e335bec68 100644
--- a/selection/bayesian/par_random_lasso_reduced.py
+++ b/selection/bayesian/par_random_lasso_reduced.py
@@ -157,7 +157,7 @@ def minimize2(self, step=1, nstep=100, tol=1.e-8):
         objective = lambda u: self.smooth_objective(u, 'func')
         grad = lambda u: self.smooth_objective(u, 'grad')
 
-        for itercount in xrange(nstep):
+        for itercount in range(nstep):
             newton_step = grad(current)
             #print("gradient", newton_step)
 
@@ -285,7 +285,7 @@ def map_solve(self, step=1, nstep=100, tol=1.e-5):
         objective = lambda u: self.smooth_objective_post(u, 'func')
         grad = lambda u: self.smooth_objective_post(u, 'grad')
 
-        for itercount in xrange(nstep):
+        for itercount in range(nstep):
 
             newton_step = grad(current)
 
@@ -326,7 +326,7 @@ def posterior_samples(self, langevin_steps=1500, burnin=100):
 
         samples = []
 
-        for i in xrange(langevin_steps):
+        for i in range(langevin_steps):
             sampler.next()
             samples.append(sampler.state.copy())
             sys.stderr.write("sample number: " + str(i) + "\n")
diff --git a/selection/randomized/tests/test_condition.py b/selection/randomized/tests/test_condition.py
index 5c5bfe496..de287d2c8 100644
--- a/selection/randomized/tests/test_condition.py
+++ b/selection/randomized/tests/test_condition.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+from __future__ import print_function, division
 import numpy as np
 
 import regreg.api as rr
@@ -78,11 +78,11 @@ def test_condition(s=0,
             return None
 
         if scalings: # try condition on some scalings
-            for i in range(int(nviews)/2):
+            for i in range(nviews//2):
                 conditioning_groups = np.zeros(p, bool)
-                conditioning_groups[:int(p/2)] = True
+                conditioning_groups[:p//2] = True
                 marginalizing_groups = np.ones(p, bool)
-                marginalizing_groups[:int(p/2)] = False
+                marginalizing_groups[:p//2] = False
                 views[i].decompose_subgradient(conditioning_groups=conditioning_groups,
                                                marginalizing_groups=marginalizing_groups)
                 views[i].condition_on_scalings()
diff --git a/selection/randomized/tests/test_intervals.py b/selection/randomized/tests/test_intervals.py
index 903794b67..411d17395 100644
--- a/selection/randomized/tests/test_intervals.py
+++ b/selection/randomized/tests/test_intervals.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+from __future__ import print_function, division
 import numpy as np
 
 import regreg.api as rr
@@ -54,7 +54,7 @@ def test_intervals(s=0,
 
     W = lam_frac*np.ones(p)*lam
     # W[0] = 0 # use at least some unpenalized
-    groups = np.concatenate([np.arange(10) for i in range(p/10)])
+    groups = np.concatenate([np.arange(10) for i in range(p//10)])
     #print(groups)
     #groups = np.arange(p)
     penalty = rr.group_lasso(groups,

From 4b29d13861d3bd961e40134b669704f68befe212 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 13:44:49 -0700
Subject: [PATCH 198/617] removing debug statement

---
 selection/randomized/tests/test_sqrt_lasso.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/selection/randomized/tests/test_sqrt_lasso.py b/selection/randomized/tests/test_sqrt_lasso.py
index 897f9819e..41b930911 100644
--- a/selection/randomized/tests/test_sqrt_lasso.py
+++ b/selection/randomized/tests/test_sqrt_lasso.py
@@ -80,9 +80,6 @@ def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
     if nactive==0:
         return None
 
-    import sys
-    sys.stderr.write(`(nonzero, active_union )` + '\n')
-
     nonzero = np.where(beta)[0]
     if set(nonzero).issubset(np.nonzero(active_union)[0]):
 

From 18b53de32e2dd416444ff56169c7cf3ca3897540 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 13:54:45 -0700
Subject: [PATCH 199/617] trying to constrain rpy2 for python2.7 support

---
 .travis.yml     | 6 ++++--
 constraints.txt | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 constraints.txt

diff --git a/.travis.yml b/.travis.yml
index 53c45986a..d7257bdfa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,7 +42,8 @@ before_install:
 
 install:
   # Install selection
-  - pip install -r requirements.txt
+  - if ["${TRAVIS_PYTHON_VERSION:0:1}" == "2"]; then pip install  -r requirements.txt -c py2constraints.txt; fi
+  - if ["${TRAVIS_PYTHON_VERSION:0:1}" == "3"]; then pip install  -r requirements.txt; fi
   - pip install -e .
   - cd R-software
   - git submodule init
@@ -60,7 +61,8 @@ script:
 
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
-    - pip install -r doc-requirements.txt # installs rpy2 among other things
+    - if ["${TRAVIS_PYTHON_VERSION:0:1}" == "2"]; then pip install  -r doc-requirements.txt -c py2constraints.txt; fi
+    - if ["${TRAVIS_PYTHON_VERSION:0:1}" == "3"]; then pip install  -r doc-requirements.txt; fi
 
     # Change into an innocuous directory and find tests from installation
     - mkdir for_testing
diff --git a/constraints.txt b/constraints.txt
new file mode 100644
index 000000000..11fdafd49
--- /dev/null
+++ b/constraints.txt
@@ -0,0 +1 @@
+rpy2<2.9

From 195f71f9fc521a7f9fd6b02cfaf794383681f653 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 14:01:07 -0700
Subject: [PATCH 200/617] removing quotes?

---
 .travis.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d7257bdfa..5694366d7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,8 +42,8 @@ before_install:
 
 install:
   # Install selection
-  - if ["${TRAVIS_PYTHON_VERSION:0:1}" == "2"]; then pip install  -r requirements.txt -c py2constraints.txt; fi
-  - if ["${TRAVIS_PYTHON_VERSION:0:1}" == "3"]; then pip install  -r requirements.txt; fi
+  - if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then pip install  -r requirements.txt -c py2constraints.txt; fi
+  - if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then pip install  -r requirements.txt; fi
   - pip install -e .
   - cd R-software
   - git submodule init
@@ -61,8 +61,8 @@ script:
 
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
-    - if ["${TRAVIS_PYTHON_VERSION:0:1}" == "2"]; then pip install  -r doc-requirements.txt -c py2constraints.txt; fi
-    - if ["${TRAVIS_PYTHON_VERSION:0:1}" == "3"]; then pip install  -r doc-requirements.txt; fi
+    - if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then pip install  -r doc-requirements.txt -c py2constraints.txt; fi
+    - if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then pip install  -r doc-requirements.txt; fi
 
     # Change into an innocuous directory and find tests from installation
     - mkdir for_testing

From c563623b52f110c8bd39d0f7dccd51cc83ff8487 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 14:09:45 -0700
Subject: [PATCH 201/617] adding constraints

---
 .travis.yml | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5694366d7..013e08660 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,6 +24,12 @@ matrix:
       env:
         - RUN_R_TESTS=1
 before_install:
+  if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
+      pip install  -r doc-requirements.txt -c constraints.txt; 
+  fi
+  if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
+      pip install  -r doc-requirements.txt; 
+  fi
   - source travis-tools/utils.sh
   - travis_before_install
   # Install regreg 
@@ -42,8 +48,12 @@ before_install:
 
 install:
   # Install selection
-  - if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then pip install  -r requirements.txt -c py2constraints.txt; fi
-  - if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then pip install  -r requirements.txt; fi
+  if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
+      pip install  -r doc-requirements.txt -c constraints.txt; 
+  fi
+  if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
+      pip install  -r doc-requirements.txt; 
+  fi
   - pip install -e .
   - cd R-software
   - git submodule init
@@ -61,9 +71,12 @@ script:
 
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
-    - if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then pip install  -r doc-requirements.txt -c py2constraints.txt; fi
-    - if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then pip install  -r doc-requirements.txt; fi
-
+    if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
+        pip install  -r doc-requirements.txt -c constraints.txt; 
+    fi
+    if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
+        pip install  -r doc-requirements.txt; 
+    fi
     # Change into an innocuous directory and find tests from installation
     - mkdir for_testing
     - cd for_testing

From b59ce916b65e25b8bd77613513b91c3f0735e493 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 14:13:09 -0700
Subject: [PATCH 202/617] syntax of travis file

---
 .travis.yml | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 013e08660..62d97740f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,12 +24,12 @@ matrix:
       env:
         - RUN_R_TESTS=1
 before_install:
-  if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
+  - if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
       pip install  -r doc-requirements.txt -c constraints.txt; 
-  fi
-  if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
+    fi
+  - if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
       pip install  -r doc-requirements.txt; 
-  fi
+    fi
   - source travis-tools/utils.sh
   - travis_before_install
   # Install regreg 
@@ -48,12 +48,12 @@ before_install:
 
 install:
   # Install selection
-  if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
-      pip install  -r doc-requirements.txt -c constraints.txt; 
-  fi
-  if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
-      pip install  -r doc-requirements.txt; 
-  fi
+  -  if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
+       pip install  -r doc-requirements.txt -c constraints.txt; 
+     fi
+  -  if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
+       pip install  -r doc-requirements.txt; 
+     fi
   - pip install -e .
   - cd R-software
   - git submodule init
@@ -71,12 +71,12 @@ script:
 
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
-    if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
-        pip install  -r doc-requirements.txt -c constraints.txt; 
-    fi
-    if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
-        pip install  -r doc-requirements.txt; 
-    fi
+    - if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
+         pip install  -r doc-requirements.txt -c constraints.txt; 
+      fi
+    - if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
+         pip install  -r doc-requirements.txt; 
+      fi
     # Change into an innocuous directory and find tests from installation
     - mkdir for_testing
     - cd for_testing

From 3eee18b01b229f787056591c695a86e7419398e4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 14:18:44 -0700
Subject: [PATCH 203/617] double brackets, exact versions

---
 .travis.yml | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 62d97740f..1e319ea1d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,11 +24,11 @@ matrix:
       env:
         - RUN_R_TESTS=1
 before_install:
-  - if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
-      pip install  -r doc-requirements.txt -c constraints.txt; 
+  - if [[$TRAVIS_PYTHON_VERSION == 2.7]]; then 
+      pip install -r requirements.txt -c constraints.txt; 
     fi
-  - if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
-      pip install  -r doc-requirements.txt; 
+  - if [[$TRAVIS_PYTHON_VERSION != 2.7]]; then 
+      pip install -r requirements.txt; 
     fi
   - source travis-tools/utils.sh
   - travis_before_install
@@ -48,12 +48,12 @@ before_install:
 
 install:
   # Install selection
-  -  if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
-       pip install  -r doc-requirements.txt -c constraints.txt; 
-     fi
-  -  if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
-       pip install  -r doc-requirements.txt; 
-     fi
+  - if [[$TRAVIS_PYTHON_VERSION == 2.7]]; then 
+      pip install -r requirements.txt -c constraints.txt; 
+    fi
+  - if [[$TRAVIS_PYTHON_VERSION != 2.7]]; then 
+      pip install -r requirements.txt; 
+    fi
   - pip install -e .
   - cd R-software
   - git submodule init
@@ -71,11 +71,11 @@ script:
 
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
-    - if [${TRAVIS_PYTHON_VERSION:0:1} == "2"]; then 
-         pip install  -r doc-requirements.txt -c constraints.txt; 
+    - if [[$TRAVIS_PYTHON_VERSION == 2.7]]; then 
+        pip install  -r doc-requirements.txt -c constraints.txt; 
       fi
-    - if [${TRAVIS_PYTHON_VERSION:0:1} == "3"]; then 
-         pip install  -r doc-requirements.txt; 
+    - if [[$TRAVIS_PYTHON_VERSION != 2.7]]; then 
+        pip install  -r doc-requirements.txt; 
       fi
     # Change into an innocuous directory and find tests from installation
     - mkdir for_testing

From 09ca9252d464f1b925a1220931586f31ac07ae33 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 14:44:40 -0700
Subject: [PATCH 204/617] spaces in brackets?

---
 .travis.yml | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 1e319ea1d..36d57d067 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,11 +24,10 @@ matrix:
       env:
         - RUN_R_TESTS=1
 before_install:
-  - if [[$TRAVIS_PYTHON_VERSION == 2.7]]; then 
-      pip install -r requirements.txt -c constraints.txt; 
-    fi
-  - if [[$TRAVIS_PYTHON_VERSION != 2.7]]; then 
-      pip install -r requirements.txt; 
+  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 
+      pip install  -r requirements.txt -c constraints.txt; 
+    else
+      pip install  -r requirements.txt; 
     fi
   - source travis-tools/utils.sh
   - travis_before_install
@@ -48,12 +47,11 @@ before_install:
 
 install:
   # Install selection
-  - if [[$TRAVIS_PYTHON_VERSION == 2.7]]; then 
-      pip install -r requirements.txt -c constraints.txt; 
-    fi
-  - if [[$TRAVIS_PYTHON_VERSION != 2.7]]; then 
-      pip install -r requirements.txt; 
-    fi
+    - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 
+        pip install  -r requirements.txt -c constraints.txt; 
+      else
+        pip install  -r requirements.txt; 
+      fi
   - pip install -e .
   - cd R-software
   - git submodule init
@@ -71,10 +69,9 @@ script:
 
     - pip install nose
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
-    - if [[$TRAVIS_PYTHON_VERSION == 2.7]]; then 
+    - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 
         pip install  -r doc-requirements.txt -c constraints.txt; 
-      fi
-    - if [[$TRAVIS_PYTHON_VERSION != 2.7]]; then 
+      else
         pip install  -r doc-requirements.txt; 
       fi
     # Change into an innocuous directory and find tests from installation

From 1f9c90de748c195a1a84b55a805af3e6d72e9ec9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 14:47:02 -0700
Subject: [PATCH 205/617] indents

---
 .travis.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 36d57d067..78a94966a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -47,11 +47,11 @@ before_install:
 
 install:
   # Install selection
-    - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 
-        pip install  -r requirements.txt -c constraints.txt; 
-      else
-        pip install  -r requirements.txt; 
-      fi
+  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 
+      pip install  -r requirements.txt -c constraints.txt; 
+    else
+      pip install  -r requirements.txt; 
+    fi
   - pip install -e .
   - cd R-software
   - git submodule init

From c167804432e192e93e633d6a356096b121afa304 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 14:48:44 -0700
Subject: [PATCH 206/617] remove early install

---
 .travis.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 78a94966a..4b2c86a03 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,11 +24,6 @@ matrix:
       env:
         - RUN_R_TESTS=1
 before_install:
-  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 
-      pip install  -r requirements.txt -c constraints.txt; 
-    else
-      pip install  -r requirements.txt; 
-    fi
   - source travis-tools/utils.sh
   - travis_before_install
   # Install regreg 

From d2c3943c71213ca49a027f73a0d3c36a6f603fb2 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 10 Sep 2017 15:24:05 -0700
Subject: [PATCH 207/617] flag for R tests

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4b2c86a03..0129b1f1e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -84,7 +84,7 @@ script:
       COVER_ARGS="--with-coverage --cover-package selection";
       fi
     - |
-      if [ "$R_TESTS" ]; then
+      if [ "$RUN_R_TESTS" ]; then
         nosetests -v ../selection/algorithms/tests/test_compareR.py
       else 
         env USE_SMALL_SAMPLES=1 USE_TEST_SEED=1 nosetests $DOCTEST_ARGS --verbose $COVER_ARGS selection

From 73d0f45e88bd65157b500bda424d85fa2657c789 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 11 Sep 2017 16:23:56 -0700
Subject: [PATCH 208/617] BF: missing some matrices as pointed out be Jelena

---
 selection/randomized/query.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 91ca7a42b..5266cd0ca 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1004,6 +1004,7 @@ def __init__(self,
         # We implicitly assume that we are sampling a target
         # independent of the data in each view
 
+        self.observed_score = [] # in the view's coordinates
         self.observed_raw_score = [] # in the data coordinates, not the view's coordinates
                                      # will typically be \nabla \ell(\bar{\beta}_E) - \nabla^2 \ell(\bar{\beta}_E) \bar{\beta}_E
         self.score_info = []
@@ -1011,6 +1012,7 @@ def __init__(self,
             obj = self.objectives[i]
             score_linear, score_offset = obj.score_transform
             self.observed_raw_score.append(score_linear.dot(obj.observed_score_state) + score_offset)
+            self.observed_score.append(obj.observed_score_state)
             self.score_info.append(obj.score_transform)
 
     def projection(self, state):
@@ -1456,13 +1458,13 @@ def pivot(self,
         score_cov = []
         for i in range(len(self.opt_sampler.objectives)):
             cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
-            cur_nuisance = self.opt_sampler.observed_raw_score[i] - cur_score_cov * observed_stat / target_cov
+            cur_nuisance = self.opt_sampler.observed_score[i] - cur_score_cov * observed_stat / target_cov
             # cur_nuisance is in the view's internal coordinates
             score_linear, score_offset = self.opt_sampler.score_info[i]
             # final_nuisance is on the scale of the original randomization
             final_nuisance = score_linear.dot(cur_nuisance) + score_offset
             nuisance.append(final_nuisance)
-            score_cov.append(score_linear.dot(cur_score_cov))
+            score_cov.append(score_linear.dot(cur_score_cov) / target_cov)
 
         weights = self._weights(sample_stat + candidate,  # normal sample under candidate
                                 nuisance,                 # nuisance sufficient stats for each view

From 09bccb091ff07b67e63151d4f2bc58f4d4459752 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Mon, 11 Sep 2017 16:24:08 -0700
Subject: [PATCH 209/617] few bugs in intervals

---
 selection/randomized/convenience.py           |  1 +
 selection/randomized/query.py                 | 37 +++++++++---------
 .../tests/test_opt_weighted_intervals.py      | 19 +++++----
 selection/randomized/tests/test_sampling.py   | 39 +++++++++++++++----
 4 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 641faaafd..9fe88eb53 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -87,6 +87,7 @@ def __init__(self,
 
         self.covariance_estimator = covariance_estimator
 
+        self.randomizer_scale = randomizer_scale
         if randomizer == 'laplace':
             self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
         elif randomizer == 'gaussian':
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 83b0efa7c..e5784a5ed 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1076,7 +1076,9 @@ def sample(self, ndraw, burnin, stepsize=None):
         '''
 
         if stepsize is None:
-            stepsize = 1. / self.crude_lipschitz()
+            print("here")
+            stepsize = 1./len(self.observed_state) #
+            #stepsize = 1. / self.crude_lipschitz()
 
         target_langevin = projected_langevin(self.observed_state.copy(),
                                              self.gradient,
@@ -1251,7 +1253,7 @@ def confidence_intervals(self,
         return np.array(limits)
 
     def coefficient_pvalues(self,
-                            observed,
+                            observed_target,
                             parameter=None,
                             ndraw=10000,
                             burnin=2000,
@@ -1298,22 +1300,19 @@ def coefficient_pvalues(self,
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
 
         if parameter is None:
-            parameter = np.zeros(self.shape)
+            parameter = np.zeros(observed_target.shape[0])
 
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
+        _intervals = optimization_intervals(self,
+                                            sample,
+                                            observed_target)
+        pvals = []
 
-        pval = intervals_instance.pivots_all(parameter)
+        for i in range(observed_target.shape[0]):
+            keep = np.zeros_like(observed_target)
+            keep[i] = 1.
+            pvals.append(_intervals.pivot(keep, candidate=parameter[i], alternative=alternative))
 
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * np.minimum(pval, 1 - pval)
+        return np.array(pvals)
 
     def crude_lipschitz(self):
         """
@@ -1456,13 +1455,13 @@ def pivot(self,
         score_cov = []
         for i in range(len(self.opt_sampler.objectives)):
             cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
-            cur_nuisance = self.opt_sampler.observed_raw_score[i] - cur_score_cov * observed_stat / target_cov
+            cur_nuisance = self.opt_sampler.objectives[i].observed_score_state - cur_score_cov * observed_stat / target_cov
             # cur_nuisance is in the view's internal coordinates
             score_linear, score_offset = self.opt_sampler.score_info[i]
             # final_nuisance is on the scale of the original randomization
             final_nuisance = score_linear.dot(cur_nuisance) + score_offset
             nuisance.append(final_nuisance)
-            score_cov.append(score_linear.dot(cur_score_cov))
+            score_cov.append(score_linear.dot(cur_score_cov)/target_cov)
 
         weights = self._weights(sample_stat + candidate,  # normal sample under candidate
                                 nuisance,                 # nuisance sufficient stats for each view
@@ -1498,7 +1497,7 @@ def _rootL(gamma):
         upper = bisect(_rootU, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
         lower = bisect(_rootL, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
 
-        print(_rootU(upper), _rootL(lower), 'pivot')
+        #print(_rootU(upper), _rootL(lower), 'pivot')
         return lower + observed_stat, upper + observed_stat
 
     # Private methods
@@ -1527,7 +1526,7 @@ def _weights(self,
 
         _lognum = 0
         for i in range(len(log_densities)):
-            density_arg = np.multiply.outer(score_cov[i], sample_stat) + nuisance[i][:,None]
+            density_arg = np.multiply.outer(score_cov[i], sample_stat) + nuisance[i][:, None]
             _lognum += log_densities[i](density_arg.T + self.reconstructed_sample)
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index d9e5a9048..210681e2d 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -2,17 +2,17 @@
 import numpy as np
 import nose.tools as nt
 
-from ..convenience import lasso, step, threshold
-from ..query import optimization_sampler
-from ...tests.instance import (gaussian_instance,
+from selection.randomized.convenience import lasso, step, threshold
+from selection.randomized.query import optimization_sampler
+from selection.tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
-from ...tests.flags import SMALL_SAMPLES
-from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+from selection.tests.flags import SMALL_SAMPLES
+from selection.tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 
 from scipy.stats import t as tdist
-from ..glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
-from ..M_estimator import restricted_Mest
+from selection.randomized.glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
+from selection.randomized.M_estimator import restricted_Mest
 
 @set_seed_iftrue(True, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
@@ -58,14 +58,17 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
         unpenalized_mle = restricted_Mest(conv.loglike, selected_features)
         form_covariances = glm_nonparametric_bootstrap(n, n)
-        conv._queries.setup_sampler(form_covariances)
+        #conv._queries.setup_sampler(form_covariances)
         boot_target, boot_target_observed = pairs_bootstrap_glm(conv.loglike, selected_features, inactive=None)
         opt_sampler.setup_target(boot_target,
                                  form_covariances)
 
+        selective_pvalues = opt_sampler.coefficient_pvalues(unpenalized_mle, sample=S)
+        print("pvalues ", selective_pvalues)
         selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
         print(selective_CI)
 
         return selective_CI
 
 
+test_opt_weighted_intervals()
\ No newline at end of file
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index a51e701e7..87c5abb2e 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -3,20 +3,39 @@
 
 import numpy as np
 from scipy.stats import t as tdist
+from scipy.stats import laplace, logistic, norm as ndist
 
-from ..convenience import lasso, step, threshold
-from ..query import optimization_sampler
-from ...tests.instance import (gaussian_instance,
+from selection.randomized.convenience import lasso, step, threshold
+from selection.randomized.query import optimization_sampler
+from selection.tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
-from ...tests.flags import SMALL_SAMPLES
-from ...tests.decorators import set_sampling_params_iftrue
+from selection.tests.flags import SMALL_SAMPLES
+from selection.tests.decorators import set_sampling_params_iftrue
+from selection.randomized.randomization import randomization
+
+
+class randomization_ppf(randomization):
+
+    def __init__(self, rand, ppf):
+
+        self._cdf = rand._cdf
+        self._ppf = ppf
+        self.shape = rand.shape
+
+    @staticmethod
+    def laplace(shape, scale):
+        ppf = lambda x: laplace.ppf(x, loc=0, scale=scale)
+        rand = randomization.laplace(shape, scale)
+        return randomization_ppf(rand, ppf)
+
 
 def inverse_truncated_cdf(x, lower, upper, randomization):
     #if (x<0 or x>1):
     #    raise ValueError("argument for cdf inverse should be in (0,1)")
     arg = randomization._cdf(lower) + np.multiply(x, randomization._cdf(upper) - randomization._cdf(lower))
     return randomization._ppf(arg)
+    #return randomization._ppf(arg)
 
 def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
     uniform_samples = np.random.uniform(0,1, size=(nsamples,randomization.shape[0]))
@@ -95,6 +114,10 @@ def test_optimization_sampler(ndraw=20000, burnin=2000):
 
         W = np.ones(X.shape[1]) * 1
         conv = const(X, Y, W, randomizer=rand)
+
+
+        randomizer = randomization_ppf.laplace((p,), scale=conv.randomizer_scale)
+
         signs = conv.fit()
         print("signs", signs)
 
@@ -106,13 +129,15 @@ def test_optimization_sampler(ndraw=20000, burnin=2000):
 
         S = target_sampler.sample(ndraw,
                                   burnin,
-                                  stepsize=1.e-3)
+                                  stepsize=None)
         print(S.shape)
         print([np.mean(S[:,i]) for i in range(p)])
 
         opt_samples = sample_opt_vars(X,Y, selected_features, signs, W[0], conv.ridge_term,
-                                      conv.randomizer, nsamples =1000)
+                                      randomizer, nsamples =1000)
 
         print([np.mean(opt_samples[:,i]) for i in range(p)])
 
 
+np.random.seed(1)
+test_optimization_sampler()
\ No newline at end of file

From a76713781d2b982348c5436fda7400c56ba3e768 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Mon, 11 Sep 2017 16:54:44 -0700
Subject: [PATCH 210/617] merged with JT master

---
 selection/randomized/convenience.py           |  1 +
 selection/randomized/query.py                 | 37 ++++++++++--------
 .../tests/test_opt_weighted_intervals.py      | 19 +++++----
 selection/randomized/tests/test_sampling.py   | 39 +++++++++++++++----
 4 files changed, 64 insertions(+), 32 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 641faaafd..9fe88eb53 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -87,6 +87,7 @@ def __init__(self,
 
         self.covariance_estimator = covariance_estimator
 
+        self.randomizer_scale = randomizer_scale
         if randomizer == 'laplace':
             self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
         elif randomizer == 'gaussian':
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 78b3ac76d..335256a13 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1078,7 +1078,9 @@ def sample(self, ndraw, burnin, stepsize=None):
         '''
 
         if stepsize is None:
-            stepsize = 1. / self.crude_lipschitz()
+            print("here")
+            stepsize = 1./len(self.observed_state) #
+            #stepsize = 1. / self.crude_lipschitz()
 
         target_langevin = projected_langevin(self.observed_state.copy(),
                                              self.gradient,
@@ -1253,7 +1255,7 @@ def confidence_intervals(self,
         return np.array(limits)
 
     def coefficient_pvalues(self,
-                            observed,
+                            observed_target,
                             parameter=None,
                             ndraw=10000,
                             burnin=2000,
@@ -1300,22 +1302,19 @@ def coefficient_pvalues(self,
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
 
         if parameter is None:
-            parameter = np.zeros(self.shape)
+            parameter = np.zeros(observed_target.shape[0])
 
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
+        _intervals = optimization_intervals(self,
+                                            sample,
+                                            observed_target)
+        pvals = []
 
-        pval = intervals_instance.pivots_all(parameter)
+        for i in range(observed_target.shape[0]):
+            keep = np.zeros_like(observed_target)
+            keep[i] = 1.
+            pvals.append(_intervals.pivot(keep, candidate=parameter[i], alternative=alternative))
 
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * np.minimum(pval, 1 - pval)
+        return np.array(pvals)
 
     def crude_lipschitz(self):
         """
@@ -1458,14 +1457,18 @@ def pivot(self,
         score_cov = []
         for i in range(len(self.opt_sampler.objectives)):
             cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
+
             cur_nuisance = self.opt_sampler.observed_score[i] - cur_score_cov * observed_stat / target_cov
+
             # cur_nuisance is in the view's internal coordinates
             score_linear, score_offset = self.opt_sampler.score_info[i]
             # final_nuisance is on the scale of the original randomization
             final_nuisance = score_linear.dot(cur_nuisance) + score_offset
             nuisance.append(final_nuisance)
+
             score_cov.append(score_linear.dot(cur_score_cov) / target_cov)
 
+
         weights = self._weights(sample_stat + candidate,  # normal sample under candidate
                                 nuisance,                 # nuisance sufficient stats for each view
                                 score_cov,                # points will be moved like sample * score_cov
@@ -1500,7 +1503,7 @@ def _rootL(gamma):
         upper = bisect(_rootU, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
         lower = bisect(_rootL, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
 
-        print(_rootU(upper), _rootL(lower), 'pivot')
+        #print(_rootU(upper), _rootL(lower), 'pivot')
         return lower + observed_stat, upper + observed_stat
 
     # Private methods
@@ -1529,7 +1532,7 @@ def _weights(self,
 
         _lognum = 0
         for i in range(len(log_densities)):
-            density_arg = np.multiply.outer(score_cov[i], sample_stat) + nuisance[i][:,None]
+            density_arg = np.multiply.outer(score_cov[i], sample_stat) + nuisance[i][:, None]
             _lognum += log_densities[i](density_arg.T + self.reconstructed_sample)
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index d9e5a9048..210681e2d 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -2,17 +2,17 @@
 import numpy as np
 import nose.tools as nt
 
-from ..convenience import lasso, step, threshold
-from ..query import optimization_sampler
-from ...tests.instance import (gaussian_instance,
+from selection.randomized.convenience import lasso, step, threshold
+from selection.randomized.query import optimization_sampler
+from selection.tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
-from ...tests.flags import SMALL_SAMPLES
-from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+from selection.tests.flags import SMALL_SAMPLES
+from selection.tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 
 from scipy.stats import t as tdist
-from ..glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
-from ..M_estimator import restricted_Mest
+from selection.randomized.glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
+from selection.randomized.M_estimator import restricted_Mest
 
 @set_seed_iftrue(True, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
@@ -58,14 +58,17 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
         unpenalized_mle = restricted_Mest(conv.loglike, selected_features)
         form_covariances = glm_nonparametric_bootstrap(n, n)
-        conv._queries.setup_sampler(form_covariances)
+        #conv._queries.setup_sampler(form_covariances)
         boot_target, boot_target_observed = pairs_bootstrap_glm(conv.loglike, selected_features, inactive=None)
         opt_sampler.setup_target(boot_target,
                                  form_covariances)
 
+        selective_pvalues = opt_sampler.coefficient_pvalues(unpenalized_mle, sample=S)
+        print("pvalues ", selective_pvalues)
         selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
         print(selective_CI)
 
         return selective_CI
 
 
+test_opt_weighted_intervals()
\ No newline at end of file
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index a51e701e7..87c5abb2e 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -3,20 +3,39 @@
 
 import numpy as np
 from scipy.stats import t as tdist
+from scipy.stats import laplace, logistic, norm as ndist
 
-from ..convenience import lasso, step, threshold
-from ..query import optimization_sampler
-from ...tests.instance import (gaussian_instance,
+from selection.randomized.convenience import lasso, step, threshold
+from selection.randomized.query import optimization_sampler
+from selection.tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
-from ...tests.flags import SMALL_SAMPLES
-from ...tests.decorators import set_sampling_params_iftrue
+from selection.tests.flags import SMALL_SAMPLES
+from selection.tests.decorators import set_sampling_params_iftrue
+from selection.randomized.randomization import randomization
+
+
+class randomization_ppf(randomization):
+
+    def __init__(self, rand, ppf):
+
+        self._cdf = rand._cdf
+        self._ppf = ppf
+        self.shape = rand.shape
+
+    @staticmethod
+    def laplace(shape, scale):
+        ppf = lambda x: laplace.ppf(x, loc=0, scale=scale)
+        rand = randomization.laplace(shape, scale)
+        return randomization_ppf(rand, ppf)
+
 
 def inverse_truncated_cdf(x, lower, upper, randomization):
     #if (x<0 or x>1):
     #    raise ValueError("argument for cdf inverse should be in (0,1)")
     arg = randomization._cdf(lower) + np.multiply(x, randomization._cdf(upper) - randomization._cdf(lower))
     return randomization._ppf(arg)
+    #return randomization._ppf(arg)
 
 def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
     uniform_samples = np.random.uniform(0,1, size=(nsamples,randomization.shape[0]))
@@ -95,6 +114,10 @@ def test_optimization_sampler(ndraw=20000, burnin=2000):
 
         W = np.ones(X.shape[1]) * 1
         conv = const(X, Y, W, randomizer=rand)
+
+
+        randomizer = randomization_ppf.laplace((p,), scale=conv.randomizer_scale)
+
         signs = conv.fit()
         print("signs", signs)
 
@@ -106,13 +129,15 @@ def test_optimization_sampler(ndraw=20000, burnin=2000):
 
         S = target_sampler.sample(ndraw,
                                   burnin,
-                                  stepsize=1.e-3)
+                                  stepsize=None)
         print(S.shape)
         print([np.mean(S[:,i]) for i in range(p)])
 
         opt_samples = sample_opt_vars(X,Y, selected_features, signs, W[0], conv.ridge_term,
-                                      conv.randomizer, nsamples =1000)
+                                      randomizer, nsamples =1000)
 
         print([np.mean(opt_samples[:,i]) for i in range(p)])
 
 
+np.random.seed(1)
+test_optimization_sampler()
\ No newline at end of file

From 633ec29fd78fe9d9b5f5bd4affa108f69e73742e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 11 Sep 2017 17:17:57 -0700
Subject: [PATCH 211/617] BF: setup.py broken, now building

---
 setup.py         | 11 ++++++-----
 setup_helpers.py | 28 ++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index c306de52d..241b52f6b 100755
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,8 @@
 from distutils.extension import Extension
 
 from cythexts import cyproc_exts, get_pyx_sdist
-from setup_helpers import package_check
+from setup_helpers import package_check, read_vars_from
+info = read_vars_from(pjoin('selection', 'info.py'))
 
 # Define extensions
 EXTS = []
@@ -43,10 +44,10 @@
 
 class installer(install.install):
     def run(self):
-        package_check('numpy', NUMPY_MIN_VERSION)
-        package_check('scipy', SCIPY_MIN_VERSION)
-        package_check('sklearn', SKLEARN_MIN_VERSION)
-        package_check('mpmath', MPMATH_MIN_VERSION)
+        package_check('numpy', info.NUMPY_MIN_VERSION)
+        package_check('scipy', info.SCIPY_MIN_VERSION)
+        package_check('sklearn', info.SKLEARN_MIN_VERSION)
+        package_check('mpmath', info.MPMATH_MIN_VERSION)
         install.install.run(self)
 
 cmdclass = dict(
diff --git a/setup_helpers.py b/setup_helpers.py
index fce0bf5f1..5a69172ad 100644
--- a/setup_helpers.py
+++ b/setup_helpers.py
@@ -77,3 +77,31 @@ def version_getter(pkg_name):
             raise RuntimeError(msgs['version too old'] % (have_version,
                                                           pkg_name,
                                                           version))
+
+class Bunch(object):
+    def __init__(self, vars):
+        for key, name in vars.items():
+            if key.startswith('__'):
+                continue
+            self.__dict__[key] = name
+
+
+def read_vars_from(ver_file):
+    """ Read variables from Python text file
+
+    Parameters
+    ----------
+    ver_file : str
+        Filename of file to read
+
+    Returns
+    -------
+    info_vars : Bunch instance
+        Bunch object where variables read from `ver_file` appear as
+        attributes
+    """
+    # Use exec for compabibility with Python 3
+    ns = {}
+    with open(ver_file, 'rt') as fobj:
+        exec(fobj.read(), ns)
+    return Bunch(ns)

From 16c9e2875427d2325b55ab008b7a5e9a630213c3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 11 Sep 2017 17:23:22 -0700
Subject: [PATCH 212/617] Added more install types

---
 .travis.yml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 0129b1f1e..41e9b7394 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,6 +23,32 @@ matrix:
     - python: 2.7
       env:
         - RUN_R_TESTS=1
+    # Testing without matplotlib
+    - python: 2.7
+      env:
+        - DEPENDS="cython numpy scipy"
+    # Documentation doctests
+    - python: 2.7
+      env:
+        - DOC_DOC_TEST=1
+    # Setup.py install
+    - python: 2.7
+      env:
+        - INSTALL_TYPE=setup
+    - python: 2.7
+      env:
+        # Sdist install should collect all dependencies
+        - INSTALL_TYPE=sdist
+        - DEPENDS=
+    - python: 2.7
+      env:
+        # Wheel install should collect all dependencies
+        - INSTALL_TYPE=wheel
+        - DEPENDS=
+    - python: 2.7
+      env:
+        - INSTALL_TYPE=requirements
+        - DEPENDS=
 before_install:
   - source travis-tools/utils.sh
   - travis_before_install

From 5b6c8e75a71c9ef7bb99f065feb24a4d44d10523 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Mon, 11 Sep 2017 17:49:00 -0700
Subject: [PATCH 213/617] imports

---
 .../tests/test_opt_weighted_intervals.py      | 17 ++++++--------
 selection/randomized/tests/test_sampling.py   | 22 +++++++++----------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 210681e2d..cf2c72337 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -2,17 +2,17 @@
 import numpy as np
 import nose.tools as nt
 
-from selection.randomized.convenience import lasso, step, threshold
-from selection.randomized.query import optimization_sampler
-from selection.tests.instance import (gaussian_instance,
+from ..convenience import lasso, step, threshold
+from ..query import optimization_sampler
+from ...tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
-from selection.tests.flags import SMALL_SAMPLES
-from selection.tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 
 from scipy.stats import t as tdist
-from selection.randomized.glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
-from selection.randomized.M_estimator import restricted_Mest
+from ..glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
+from ..M_estimator import restricted_Mest
 
 @set_seed_iftrue(True, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
@@ -69,6 +69,3 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         print(selective_CI)
 
         return selective_CI
-
-
-test_opt_weighted_intervals()
\ No newline at end of file
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 87c5abb2e..3d3251fa7 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -5,14 +5,16 @@
 from scipy.stats import t as tdist
 from scipy.stats import laplace, logistic, norm as ndist
 
-from selection.randomized.convenience import lasso, step, threshold
-from selection.randomized.query import optimization_sampler
-from selection.tests.instance import (gaussian_instance,
+from ..convenience import lasso, step, threshold
+from ..query import optimization_sampler
+from ...tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
-from selection.tests.flags import SMALL_SAMPLES
-from selection.tests.decorators import set_sampling_params_iftrue
-from selection.randomized.randomization import randomization
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+
+from ...tests.decorators import set_sampling_params_iftrue
+from ..randomization import randomization
 
 
 class randomization_ppf(randomization):
@@ -100,9 +102,9 @@ def _noise(n, df=np.inf):
 
 
 
-
+@set_seed_iftrue(True, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_optimization_sampler(ndraw=20000, burnin=2000):
+def test_sampling(ndraw=20000, burnin=2000):
 
     cls = lasso
     for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace']):
@@ -138,6 +140,4 @@ def test_optimization_sampler(ndraw=20000, burnin=2000):
 
         print([np.mean(opt_samples[:,i]) for i in range(p)])
 
-
-np.random.seed(1)
-test_optimization_sampler()
\ No newline at end of file
+        return None
\ No newline at end of file

From 3b1d4b887400558b22a8aa01e3b0e4a146e6d7f4 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Mon, 11 Sep 2017 18:34:39 -0700
Subject: [PATCH 214/617] removed self.randomized_scale

---
 selection/randomized/convenience.py         |  1 -
 selection/randomized/tests/test_sampling.py | 23 ++++++++++++++-------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 9fe88eb53..641faaafd 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -87,7 +87,6 @@ def __init__(self,
 
         self.covariance_estimator = covariance_estimator
 
-        self.randomizer_scale = randomizer_scale
         if randomizer == 'laplace':
             self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
         elif randomizer == 'gaussian':
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 3d3251fa7..0cab896ba 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -31,6 +31,12 @@ def laplace(shape, scale):
         rand = randomization.laplace(shape, scale)
         return randomization_ppf(rand, ppf)
 
+    @staticmethod
+    def isotropic_gaussian(shape, scale):
+        ppf = lambda x: ndist.pdf(x, loc=0., scale=scale)
+        rand = randomization.isotropic_gaussian(shape, scale)
+        return randomization_ppf(rand, ppf)
+
 
 def inverse_truncated_cdf(x, lower, upper, randomization):
     #if (x<0 or x>1):
@@ -107,7 +113,7 @@ def _noise(n, df=np.inf):
 def test_sampling(ndraw=20000, burnin=2000):
 
     cls = lasso
-    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace']):
+    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
 
         inst, const = const_info
 
@@ -115,10 +121,14 @@ def test_sampling(ndraw=20000, burnin=2000):
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 1
-        conv = const(X, Y, W, randomizer=rand)
-
+        randomizer_scale =1.
+        conv = const(X, Y, W, randomizer=rand, randomizer_scale = randomizer_scale)
 
-        randomizer = randomization_ppf.laplace((p,), scale=conv.randomizer_scale)
+        print(rand)
+        if rand == "laplace":
+            randomizer = randomization_ppf.laplace((p,), scale=randomizer_scale)
+        elif rand=="gaussian":
+            randomizer = randomization_ppf.isotropic_gaussian((p,),scale=randomizer_scale)
 
         signs = conv.fit()
         print("signs", signs)
@@ -135,9 +145,8 @@ def test_sampling(ndraw=20000, burnin=2000):
         print(S.shape)
         print([np.mean(S[:,i]) for i in range(p)])
 
-        opt_samples = sample_opt_vars(X,Y, selected_features, signs, W[0], conv.ridge_term,
-                                      randomizer, nsamples =1000)
+        opt_samples = sample_opt_vars(X,Y, selected_features, signs, W[0], conv.ridge_term, randomizer, nsamples =1000)
 
         print([np.mean(opt_samples[:,i]) for i in range(p)])
 
-        return None
\ No newline at end of file
+    return None
\ No newline at end of file

From 5aeee960c5b24a349c6fea93d8c45615e52631d8 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Mon, 11 Sep 2017 18:36:21 -0700
Subject: [PATCH 215/617] ppf for gaussian

---
 selection/randomized/tests/test_sampling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 0cab896ba..d7e0bad3d 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -33,7 +33,7 @@ def laplace(shape, scale):
 
     @staticmethod
     def isotropic_gaussian(shape, scale):
-        ppf = lambda x: ndist.pdf(x, loc=0., scale=scale)
+        ppf = lambda x: ndist.ppf(x, loc=0., scale=scale)
         rand = randomization.isotropic_gaussian(shape, scale)
         return randomization_ppf(rand, ppf)
 

From e52083b118228da580e4221fa519d76527c9452b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 11 Sep 2017 20:12:10 -0700
Subject: [PATCH 216/617] a plot for checking the sampling -- made sampling a
 little more generic, but still orthogonal

---
 selection/randomized/query.py               |   4 +-
 selection/randomized/tests/test_sampling.py | 127 ++++++++++++++------
 2 files changed, 93 insertions(+), 38 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 335256a13..a2ce3b051 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1078,9 +1078,7 @@ def sample(self, ndraw, burnin, stepsize=None):
         '''
 
         if stepsize is None:
-            print("here")
-            stepsize = 1./len(self.observed_state) #
-            #stepsize = 1. / self.crude_lipschitz()
+            stepsize = 1./len(self.observed_state) 
 
         target_langevin = projected_langevin(self.observed_state.copy(),
                                              self.gradient,
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index d7e0bad3d..7b5c77fe0 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -10,7 +10,7 @@
 from ...tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
-from ...tests.flags import SMALL_SAMPLES
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 
 from ...tests.decorators import set_sampling_params_iftrue
@@ -39,11 +39,10 @@ def isotropic_gaussian(shape, scale):
 
 
 def inverse_truncated_cdf(x, lower, upper, randomization):
-    #if (x<0 or x>1):
-    #    raise ValueError("argument for cdf inverse should be in (0,1)")
-    arg = randomization._cdf(lower) + np.multiply(x, randomization._cdf(upper) - randomization._cdf(lower))
+    arg = (randomization._cdf(lower) + 
+           np.multiply(x, randomization._cdf(upper) - 
+                       randomization._cdf(lower)))
     return randomization._ppf(arg)
-    #return randomization._ppf(arg)
 
 def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
     uniform_samples = np.random.uniform(0,1, size=(nsamples,randomization.shape[0]))
@@ -53,6 +52,8 @@ def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
     return samples
 
 def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =10000):
+
+    Xdiag = np.diag(X.T.dot(X))
     p = X.shape[1]
     nactive = active.sum()
     lower = np.zeros(p)
@@ -62,25 +63,33 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     for i in range(nactive):
         var = active_set[i]
         if signs[var]>0:
-            lower[i] = -np.dot(X[:, var].T,y) + lam*signs[var]
+            lower[i] = -(X[:, var].T.dot(y) + lam * signs[var]) / Xdiag[var]
             upper[i] = np.inf
         else:
             lower[i] = -np.inf
-            upper[i] = -np.dot(X[:,var].T,y) + lam*signs[var]
+            upper[i] = -X[:,var].T.dot(y) + lam * signs[var] / Xdiag[var]
 
-    lower[range(nactive,p)] = -lam-np.dot(X[:, ~active].T, y)
-    upper[range(nactive,p)]= lam-np.dot(X[:,~active].T, y)
+    lower[range(nactive,p)] = -lam - X[:, ~active].T.dot(y)
+    upper[range(nactive,p)]= lam - X[:, ~active].T.dot(y)
 
-    omega_samples = sampling_truncated_dist(lower, upper, randomization, nsamples=nsamples)
+    omega_samples = sampling_truncated_dist(lower, 
+                                            upper, 
+                                            randomization, 
+                                            nsamples=nsamples)
 
-    abs_beta_samples = np.true_divide(omega_samples[:,:nactive]+np.dot(X[:,active].T, y)-lam*signs[active], (epsilon+1)*signs[active])
-    u_samples = (omega_samples[:, nactive:]+np.dot(X[:,~active].T, y))
+    abs_beta_samples = np.true_divide( 
+                          omega_samples[:,:nactive] * Xdiag[active] + 
+                          X[:,active].T.dot(y)- 
+                          lam * signs[active], 
+                          (epsilon + Xdiag[active]) * signs[active])
+    u_samples = omega_samples[:, nactive:] + X[:, ~active].T.dot(y)
 
     return np.concatenate((abs_beta_samples, u_samples), axis=1)
 
-
-def orthogonal_design(n, p, s, signal, sigma, df=np.inf, random_signs=False):
+def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
+    scale = np.linspace(1, 1.2, p)
     X = np.identity(n)[:,:p]
+    X *= scale[None, :]
 
     beta = np.zeros(p)
     signal = np.atleast_1d(signal)
@@ -95,40 +104,46 @@ def orthogonal_design(n, p, s, signal, sigma, df=np.inf, random_signs=False):
     active = np.zeros(p, np.bool)
     active[beta != 0] = True
 
-    # noise model
-    def _noise(n, df=np.inf):
-        if df == np.inf:
-            return np.random.standard_normal(n)
-        else:
-            sd_t = np.std(tdist.rvs(df, size=50000))
-        return tdist.rvs(df, size=n) / sd_t
-
-    Y = (X.dot(beta) + _noise(n, df)) * sigma
+    Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma
     return X, Y, beta * sigma, np.nonzero(active)[0], sigma
 
 
-
-@set_seed_iftrue(True, 200)
+@set_seed_iftrue(SET_SEED, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_sampling(ndraw=20000, burnin=2000):
+def test_conditional_law(ndraw=20000, burnin=2000):
+    """
+    Checks the conditional law of opt variables given the data
+    """
 
-    cls = lasso
-    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
+    results = []
+    for const_info, rand in product(zip([gaussian_instance], 
+                                        [lasso.gaussian]), 
+                                    ['laplace', 'gaussian']):
 
         inst, const = const_info
 
-        X, Y = orthogonal_design(n=100, p=10, s=0, signal=2, sigma=1)[:2]
+        X, Y, beta = orthogonal_design(n=100, 
+                                       p=10, 
+                                       s=3, 
+                                       signal=(2,3), 
+                                       sigma=1.2)[:3]
         n, p = X.shape
 
-        W = np.ones(X.shape[1]) * 1
+        W = np.ones(X.shape[1]) * 1.2
         randomizer_scale =1.
-        conv = const(X, Y, W, randomizer=rand, randomizer_scale = randomizer_scale)
+        conv = const(X, 
+                     Y, 
+                     W, 
+                     randomizer=rand, 
+                     randomizer_scale=randomizer_scale)
 
         print(rand)
         if rand == "laplace":
-            randomizer = randomization_ppf.laplace((p,), scale=randomizer_scale)
+            randomizer = randomization_ppf.laplace((p,), \
+                             scale=randomizer_scale)
         elif rand=="gaussian":
-            randomizer = randomization_ppf.isotropic_gaussian((p,),scale=randomizer_scale)
+            randomizer = randomization_ppf.isotropic_gaussian((p,), \
+                             scale=randomizer_scale)
 
         signs = conv.fit()
         print("signs", signs)
@@ -145,8 +160,50 @@ def test_sampling(ndraw=20000, burnin=2000):
         print(S.shape)
         print([np.mean(S[:,i]) for i in range(p)])
 
-        opt_samples = sample_opt_vars(X,Y, selected_features, signs, W[0], conv.ridge_term, randomizer, nsamples =1000)
+        opt_samples = sample_opt_vars(X, 
+                                      Y, 
+                                      selected_features, 
+                                      signs, 
+                                      W[0], 
+                                      conv.ridge_term, 
+                                      randomizer, 
+                                      nsamples=ndraw)
 
         print([np.mean(opt_samples[:,i]) for i in range(p)])
 
-    return None
\ No newline at end of file
+        results.append((rand, S, opt_samples))
+
+    return results
+
+def plot_ecdf(ndraw=10000, burnin=1000):
+
+    np.random.seed(20)
+
+    import matplotlib.pyplot as plt
+    from statsmodels.distributions import ECDF
+
+    for (rand, 
+         mcmc, 
+         truncated) in test_conditional_law(ndraw=ndraw, burnin=burnin):
+
+        fig = plt.figure(num=1, figsize=(8,15))
+        plt.clf()
+        idx = 0
+        for i in range(mcmc.shape[1]):
+            plt.subplot(5,2,idx+1)
+            xval = np.linspace(min(mcmc[:,i].min(), truncated[:,i].min()), 
+                               max(mcmc[:,i].max(), truncated[:,i].max()), 
+                               200)
+            plt.plot(xval, ECDF(mcmc[:,i])(xval), label='MCMC')
+            plt.plot(xval, ECDF(truncated[:,i])(xval), label='truncated')
+            idx += 1
+            if idx == 1:
+                plt.legend(loc='lower right')
+        plt.savefig('fig%s.pdf' % rand)
+    plt.show()
+
+            
+            
+    
+
+    

From 1cadd363a4b4953cf18c8863ff5095b7433f1b57 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Mon, 11 Sep 2017 21:00:45 -0700
Subject: [PATCH 217/617] two bracket typos

---
 selection/randomized/tests/test_sampling.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 7b5c77fe0..6e3e38ed1 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -63,14 +63,14 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     for i in range(nactive):
         var = active_set[i]
         if signs[var]>0:
-            lower[i] = -(X[:, var].T.dot(y) + lam * signs[var]) / Xdiag[var]
+            lower[i] = (-X[:, var].T.dot(y) + lam * signs[var]) / Xdiag[var]
             upper[i] = np.inf
         else:
             lower[i] = -np.inf
-            upper[i] = -X[:,var].T.dot(y) + lam * signs[var] / Xdiag[var]
+            upper[i] = (-X[:,var].T.dot(y) + lam * signs[var]) / Xdiag[var]
 
     lower[range(nactive,p)] = -lam - X[:, ~active].T.dot(y)
-    upper[range(nactive,p)]= lam - X[:, ~active].T.dot(y)
+    upper[range(nactive,p)] = lam - X[:, ~active].T.dot(y)
 
     omega_samples = sampling_truncated_dist(lower, 
                                             upper, 
@@ -124,7 +124,7 @@ def test_conditional_law(ndraw=20000, burnin=2000):
 
         X, Y, beta = orthogonal_design(n=100, 
                                        p=10, 
-                                       s=3, 
+                                       s=3,
                                        signal=(2,3), 
                                        sigma=1.2)[:3]
         n, p = X.shape

From 65bdd8dff754a1b76dcddaa7d16f11471329da84 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 11 Sep 2017 21:41:14 -0700
Subject: [PATCH 218/617] BF: reconstruction

---
 selection/randomized/tests/test_sampling.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 6e3e38ed1..2658ca798 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -63,11 +63,11 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     for i in range(nactive):
         var = active_set[i]
         if signs[var]>0:
-            lower[i] = (-X[:, var].T.dot(y) + lam * signs[var]) / Xdiag[var]
+            lower[i] = (-X[:, var].T.dot(y) + lam * signs[var])
             upper[i] = np.inf
         else:
             lower[i] = -np.inf
-            upper[i] = (-X[:,var].T.dot(y) + lam * signs[var]) / Xdiag[var]
+            upper[i] = (-X[:,var].T.dot(y) + lam * signs[var]) 
 
     lower[range(nactive,p)] = -lam - X[:, ~active].T.dot(y)
     upper[range(nactive,p)] = lam - X[:, ~active].T.dot(y)
@@ -78,8 +78,8 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
                                             nsamples=nsamples)
 
     abs_beta_samples = np.true_divide( 
-                          omega_samples[:,:nactive] * Xdiag[active] + 
-                          X[:,active].T.dot(y)- 
+                          omega_samples[:,:nactive] + 
+                          X[:,active].T.dot(y) - 
                           lam * signs[active], 
                           (epsilon + Xdiag[active]) * signs[active])
     u_samples = omega_samples[:, nactive:] + X[:, ~active].T.dot(y)
@@ -110,7 +110,7 @@ def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
 
 @set_seed_iftrue(SET_SEED, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_conditional_law(ndraw=20000, burnin=2000):
+def test_conditional_law(ndraw=20000, burnin=2000, epsilon=0.1):
     """
     Checks the conditional law of opt variables given the data
     """
@@ -123,7 +123,7 @@ def test_conditional_law(ndraw=20000, burnin=2000):
         inst, const = const_info
 
         X, Y, beta = orthogonal_design(n=100, 
-                                       p=10, 
+                                       p=9, 
                                        s=3,
                                        signal=(2,3), 
                                        sigma=1.2)[:3]
@@ -135,7 +135,8 @@ def test_conditional_law(ndraw=20000, burnin=2000):
                      Y, 
                      W, 
                      randomizer=rand, 
-                     randomizer_scale=randomizer_scale)
+                     randomizer_scale=randomizer_scale,
+                     ridge_term=epsilon)
 
         print(rand)
         if rand == "laplace":
@@ -186,11 +187,11 @@ def plot_ecdf(ndraw=10000, burnin=1000):
          mcmc, 
          truncated) in test_conditional_law(ndraw=ndraw, burnin=burnin):
 
-        fig = plt.figure(num=1, figsize=(8,15))
+        fig = plt.figure(num=1, figsize=(8,8))
         plt.clf()
         idx = 0
         for i in range(mcmc.shape[1]):
-            plt.subplot(5,2,idx+1)
+            plt.subplot(3,3,idx+1)
             xval = np.linspace(min(mcmc[:,i].min(), truncated[:,i].min()), 
                                max(mcmc[:,i].max(), truncated[:,i].max()), 
                                200)

From bc6ed03002c623b8f17f33fca6787e7377f66c6f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 11 Sep 2017 22:00:22 -0700
Subject: [PATCH 219/617] faster sampling of the truncated laws

---
 selection/randomized/tests/test_sampling.py | 24 +++++++++++----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 2658ca798..ad34c8391 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -46,10 +46,7 @@ def inverse_truncated_cdf(x, lower, upper, randomization):
 
 def sampling_truncated_dist(lower, upper, randomization, nsamples=1000):
     uniform_samples = np.random.uniform(0,1, size=(nsamples,randomization.shape[0]))
-    samples = np.zeros((nsamples, randomization.shape[0]))
-    for i in range(nsamples):
-        samples[i,:] = inverse_truncated_cdf(uniform_samples[i,:], lower, upper, randomization)
-    return samples
+    return inverse_truncated_cdf(uniform_samples, lower, upper, randomization)
 
 def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =10000):
 
@@ -110,7 +107,7 @@ def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
 
 @set_seed_iftrue(SET_SEED, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_conditional_law(ndraw=20000, burnin=2000, epsilon=0.1):
+def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5):
     """
     Checks the conditional law of opt variables given the data
     """
@@ -136,7 +133,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, epsilon=0.1):
                      W, 
                      randomizer=rand, 
                      randomizer_scale=randomizer_scale,
-                     ridge_term=epsilon)
+                     ridge_term=ridge_term)
 
         print(rand)
         if rand == "laplace":
@@ -157,7 +154,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, epsilon=0.1):
 
         S = target_sampler.sample(ndraw,
                                   burnin,
-                                  stepsize=None)
+                                  stepsize=1.e-2)
         print(S.shape)
         print([np.mean(S[:,i]) for i in range(p)])
 
@@ -176,9 +173,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, epsilon=0.1):
 
     return results
 
-def plot_ecdf(ndraw=10000, burnin=1000):
-
-    np.random.seed(20)
+def plot_ecdf(ndraw=50000, burnin=5000, remove_atom=False):
 
     import matplotlib.pyplot as plt
     from statsmodels.distributions import ECDF
@@ -195,7 +190,14 @@ def plot_ecdf(ndraw=10000, burnin=1000):
             xval = np.linspace(min(mcmc[:,i].min(), truncated[:,i].min()), 
                                max(mcmc[:,i].max(), truncated[:,i].max()), 
                                200)
-            plt.plot(xval, ECDF(mcmc[:,i])(xval), label='MCMC')
+
+            if remove_atom:
+                mcmc_ = mcmc[:,i]
+                mcmc_ = mcmc_[mcmc_ < np.max(mcmc_)]
+                mcmc_ = mcmc_[mcmc_ > np.min(mcmc_)]
+            else:
+                mcmc_ = mcmc[:,i]
+            plt.plot(xval, ECDF(mcmc_)(xval), label='MCMC')
             plt.plot(xval, ECDF(truncated[:,i])(xval), label='truncated')
             idx += 1
             if idx == 1:

From 000863c71fc1b133ef54a2d7dd52a25f1aed39d7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 11 Sep 2017 22:30:28 -0700
Subject: [PATCH 220/617] put plot into example script, looks good

---
 doc/examples/conditional_sampling.py        | 74 ++++++++++++++++++++
 selection/randomized/tests/test_sampling.py | 77 +++++++--------------
 2 files changed, 100 insertions(+), 51 deletions(-)
 create mode 100644 doc/examples/conditional_sampling.py

diff --git a/doc/examples/conditional_sampling.py b/doc/examples/conditional_sampling.py
new file mode 100644
index 000000000..16bbf499c
--- /dev/null
+++ b/doc/examples/conditional_sampling.py
@@ -0,0 +1,74 @@
+"""
+We demonstrate that our optimization variables have
+the correct distribution given the data.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from statsmodels.distributions import ECDF
+
+from selection.randomized.tests.test_sampling import test_conditional_law
+
+def main(ndraw=50000, burnin=5000, remove_atom=False, which='omega'):
+
+    fig_idx = 0
+    for (rand,
+         mcmc_opt, 
+         mcmc_omega,
+         truncated_opt,
+         truncated_omega) in test_conditional_law(ndraw=ndraw, burnin=burnin, stepsize=1.e-2):
+
+        fig_idx += 1
+        fig = plt.figure(num=fig_idx, figsize=(8,8))
+        plt.clf()
+        idx = 0
+        for i in range(mcmc_opt.shape[1]):
+            plt.subplot(3,3,idx+1)
+
+            mcmc_ = mcmc_opt[:, i]
+            truncated_ = truncated_opt[:, i]
+
+            xval = np.linspace(min(mcmc_.min(), truncated_.min()), 
+                               max(mcmc_.max(), truncated_.max()), 
+                               200)
+
+            if remove_atom:
+                mcmc_ = mcmc_[mcmc_ < np.max(mcmc_)]
+                mcmc_ = mcmc_[mcmc_ > np.min(mcmc_)]
+
+            plt.plot(xval, ECDF(mcmc_)(xval), label='MCMC')
+            plt.plot(xval, ECDF(truncated_)(xval), label='truncated')
+            idx += 1
+            if idx == 1:
+                plt.legend(loc='lower right')
+        
+        fig_idx += 1
+        fig = plt.figure(num=fig_idx, figsize=(8,8))
+
+        plt.clf()
+        idx = 0
+        for i in range(mcmc_opt.shape[1]):
+            plt.subplot(3,3,idx+1)
+
+            mcmc_ = mcmc_omega[:, i]
+            truncated_ = truncated_omega[:, i]
+
+            xval = np.linspace(min(mcmc_.min(), truncated_.min()), 
+                               max(mcmc_.max(), truncated_.max()), 
+                               200)
+
+            if remove_atom:
+                mcmc_ = mcmc_[mcmc_ < np.max(mcmc_)]
+                mcmc_ = mcmc_[mcmc_ > np.min(mcmc_)]
+            plt.plot(xval, ECDF(mcmc_)(xval), label='MCMC')
+            plt.plot(xval, ECDF(truncated_)(xval), label='truncated')
+            idx += 1
+            if idx == 1:
+                plt.legend(loc='lower right')
+        
+    plt.show()
+
+            
+            
+    
+
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index ad34c8391..a66eb2e70 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -56,6 +56,7 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     lower = np.zeros(p)
     upper = np.zeros(p)
     active_set = np.where(active)[0]
+    inactive_set = np.where(~active)[0]
 
     for i in range(nactive):
         var = active_set[i]
@@ -66,8 +67,8 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
             lower[i] = -np.inf
             upper[i] = (-X[:,var].T.dot(y) + lam * signs[var]) 
 
-    lower[range(nactive,p)] = -lam - X[:, ~active].T.dot(y)
-    upper[range(nactive,p)] = lam - X[:, ~active].T.dot(y)
+    lower[range(nactive, p)] = -lam - X[:, inactive_set].T.dot(y)
+    upper[range(nactive, p)] = lam - X[:, inactive_set].T.dot(y)
 
     omega_samples = sampling_truncated_dist(lower, 
                                             upper, 
@@ -75,13 +76,19 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
                                             nsamples=nsamples)
 
     abs_beta_samples = np.true_divide( 
-                          omega_samples[:,:nactive] + 
-                          X[:,active].T.dot(y) - 
-                          lam * signs[active], 
-                          (epsilon + Xdiag[active]) * signs[active])
-    u_samples = omega_samples[:, nactive:] + X[:, ~active].T.dot(y)
+                          omega_samples[:, :nactive] + 
+                          X[:, active_set].T.dot(y) - 
+                          lam * signs[active_set], 
+                          (epsilon + Xdiag[active_set]) * signs[active_set])
+    u_samples = omega_samples[:, nactive:] + X[:, inactive_set].T.dot(y)
 
-    return np.concatenate((abs_beta_samples, u_samples), axis=1)
+    # this ordering should be correct?
+
+    reordered_omega = np.zeros_like(omega_samples)
+    reordered_omega[:, active_set] = omega_samples[:, :nactive]
+    reordered_omega[:, inactive_set] = omega_samples[:, nactive:]
+
+    return np.concatenate((abs_beta_samples, u_samples), axis=1), reordered_omega
 
 def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
     scale = np.linspace(1, 1.2, p)
@@ -107,7 +114,7 @@ def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
 
 @set_seed_iftrue(SET_SEED, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5):
+def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None):
     """
     Checks the conditional law of opt variables given the data
     """
@@ -150,14 +157,18 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5):
 
         conv._queries.setup_sampler(form_covariances=None)
         conv._queries.setup_opt_state()
-        target_sampler = optimization_sampler(conv._queries)
+        opt_sampler = optimization_sampler(conv._queries)
 
-        S = target_sampler.sample(ndraw,
-                                  burnin,
-                                  stepsize=1.e-2)
+        S = opt_sampler.sample(ndraw,
+                               burnin,
+                               stepsize=stepsize)
         print(S.shape)
         print([np.mean(S[:,i]) for i in range(p)])
 
+        # let's also reconstruct the omegas to compare
+
+        S_omega = opt_sampler.reconstruct(S)
+
         opt_samples = sample_opt_vars(X, 
                                       Y, 
                                       selected_features, 
@@ -167,46 +178,10 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5):
                                       randomizer, 
                                       nsamples=ndraw)
 
-        print([np.mean(opt_samples[:,i]) for i in range(p)])
+        print([np.mean(opt_samples[0][:,i]) for i in range(p)])
 
-        results.append((rand, S, opt_samples))
+        results.append((rand, S, S_omega,) + opt_samples)
 
     return results
 
-def plot_ecdf(ndraw=50000, burnin=5000, remove_atom=False):
-
-    import matplotlib.pyplot as plt
-    from statsmodels.distributions import ECDF
-
-    for (rand, 
-         mcmc, 
-         truncated) in test_conditional_law(ndraw=ndraw, burnin=burnin):
-
-        fig = plt.figure(num=1, figsize=(8,8))
-        plt.clf()
-        idx = 0
-        for i in range(mcmc.shape[1]):
-            plt.subplot(3,3,idx+1)
-            xval = np.linspace(min(mcmc[:,i].min(), truncated[:,i].min()), 
-                               max(mcmc[:,i].max(), truncated[:,i].max()), 
-                               200)
-
-            if remove_atom:
-                mcmc_ = mcmc[:,i]
-                mcmc_ = mcmc_[mcmc_ < np.max(mcmc_)]
-                mcmc_ = mcmc_[mcmc_ > np.min(mcmc_)]
-            else:
-                mcmc_ = mcmc[:,i]
-            plt.plot(xval, ECDF(mcmc_)(xval), label='MCMC')
-            plt.plot(xval, ECDF(truncated[:,i])(xval), label='truncated')
-            idx += 1
-            if idx == 1:
-                plt.legend(loc='lower right')
-        plt.savefig('fig%s.pdf' % rand)
-    plt.show()
-
-            
-            
-    
-
     

From c2dff1dc5b98ac9f9b7e2c7ea408eeab2c3ddc48 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 12 Sep 2017 08:46:37 -0700
Subject: [PATCH 221/617] RF: changed name and signature of construct_weights,
 same signature as log_density

---
 selection/randomized/M_estimator.py           |  10 +-
 selection/randomized/query.py                 | 726 +-----------------
 selection/randomized/target.py                | 681 ++++++++++++++++
 selection/randomized/tests/test_Mest.py       |   8 +-
 .../randomized/tests/test_convenience.py      |  35 +-
 .../randomized/tests/test_greedy_step.py      |   6 +-
 selection/randomized/threshold_score.py       |   5 +-
 7 files changed, 756 insertions(+), 715 deletions(-)
 create mode 100644 selection/randomized/target.py

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 25769e763..149cab939 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -2,7 +2,8 @@
 import regreg.api as rr
 import regreg.affine as ra
 
-from .query import query
+from .query import query 
+from .target import reconstruct_full_internal
 from .randomization import split
 
 class M_estimator(query):
@@ -476,7 +477,7 @@ def condition_on_scalings(self):
         self.num_opt_var = new_linear.shape[1]
 
 
-    def construct_weights(self, full_state):
+    def grad_log_density(self, internal_state, opt_state):
         """
             marginalizing over the sub-gradient
 
@@ -487,6 +488,9 @@ def construct_weights(self, full_state):
             raise ValueError('setup_sampler should be called before using this function')
 
         if self._marginalize_subgradient:
+
+            full_state = reconstruct_full_internal(self, internal_state, opt_state)
+
             p = self.penalty.shape[0]
             weights = np.zeros(p)
 
@@ -505,7 +509,7 @@ def fraction(full_state_plus, full_state_minus, inactive_marginal_groups):
 
             return -weights
         else:
-            return query.construct_weights(self, full_state)
+            return query.grad_log_density(self, internal_state, opt_state)
 
 def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
     """
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index a2ce3b051..3b86ac445 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -8,6 +8,11 @@
 
 from ..distributions.api import discrete_family, intervals_from_sample
 from ..sampling.langevin import projected_langevin
+from .target import (targeted_sampler,
+                     bootstrapped_target_sampler,
+                     reconstruct_full,
+                     reconstruct_opt)
+
 
 class query(object):
 
@@ -26,9 +31,6 @@ def randomize(self):
             self.randomized_loss, self._initial_omega = self.randomization.randomize(self.loss, self.epsilon)
         self._randomized = True
 
-    def construct_weights(self, full_state):
-        return self.randomization.gradient(full_state)
-
     def linear_decomposition(self, target_score_cov, target_cov, observed_target_state):
         """
         Compute out the linear decomposition
@@ -60,23 +62,16 @@ def linear_decomposition(self, target_score_cov, target_cov, observed_target_sta
 
         return (composition_linear_part, composition_offset)
 
-    # Reconstruct different parts of 
-    # randomization: optimization, data and full
-
-    def reconstruct_opt(self, opt_state):
+    # the default log conditional density of state given data 
+    # with no conditioning or marginalizing
 
-        if not self._setup:
-            raise ValueError('setup_sampler should be called before using this function')
+    def log_density(self, internal_state, opt_state):
+        full_state = reconstruct_full_internal(internal_state, opt_state)
+        return self.randomization.log_density(full_state)
 
-        opt_linear, opt_offset = self.opt_transform
-        if opt_linear is not None:
-            opt_state = np.atleast_2d(opt_state)
-            return np.squeeze(opt_linear.dot(opt_state.T) + opt_offset[:, None]).T
-        else:
-            return opt_offset
-
-    def log_density(self, full_data):
-        return self.randomization.log_density(full_data)
+    def grad_log_density(self, internal_state, opt_state):
+        full_state = reconstruct_full_internal(internal_state, opt_state)
+        return self.randomization.gradient(full_state)
 
      # implemented by subclasses
 
@@ -120,25 +115,6 @@ def projection(self, opt_state):
 
         raise NotImplementedError('abstract method -- projection of optimization variables')
 
-def reconstruct_data(data_state, data_transform):
-
-    data_state = np.atleast_2d(data_state)
-    data_linear, data_offset = data_transform
-    if data_linear is not None:
-        return np.squeeze(data_linear.dot(data_state.T) + data_offset[:,None]).T
-    else:
-        return np.squeeze(data_offset)
-
-def reconstruct_full(data_state, data_transform, query, opt_state):
-
-    if not query._setup:
-        raise ValueError('setup_sampler should be called before using this function')
-
-    data_piece = reconstruct_data(data_state, data_transform)
-    opt_piece =  query.reconstruct_opt(opt_state)
-
-    return np.squeeze((data_piece + opt_piece))
-
 class multiple_queries(object):
 
     '''
@@ -303,639 +279,6 @@ def setup_bootstrapped_target(self,
                                            reference=reference,
                                            boot_size=boot_size)
 
-class targeted_sampler(object):
-
-    '''
-    Object to sample from target of a selective sampler.
-    '''
-
-    def __init__(self,
-                 multi_view,
-                 target_info,
-                 observed_target_state,
-                 form_covariances,
-                 reference=None,
-                 target_set=None,
-                 parametric=False):
-
-        '''
-        Parameters
-        ----------
-
-        multi_view : `multiple_queries`
-           Instance of `multiple_queries`. Attributes
-           `objectives`, `score_info` are key
-           attributed. (Should maybe change constructor
-           to reflect only what is needed.)
-
-        target_info : object
-           Passed as first argument to `self.form_covariances`.
-
-        observed_target_state : np.float
-           Observed value of the target estimator.
-
-        form_covariances : callable
-           Used in linear decomposition of each score
-           and the target.
-
-        reference : np.float (optional)
-           Reference parameter for Gaussian approximation
-           of target.
-
-        target_set : sequence (optional)
-           Which coordinates of target are really
-           of interest. If not None, then coordinates
-           not in target_set are assumed to have 0
-           mean in the sampler.
-
-        parametric : bool
-           Use parametric covariance estimate?
-
-        Notes
-        -----
-        The callable `form_covariances`
-        should accept `target_info` as first argument
-        and a keyword argument `cross_terms` which
-        correspond to the `score_info` of each
-        objective of `multi_view`. This used in
-        a linear decomposition of each score into
-        a piece correlated with `target` and
-        an independent piece.
-        The independent piece is treated as a
-        nuisance parameter and conditioned on
-        (i.e. is fixed within the sampler).
-        '''
-
-        # sampler will draw samples for bootstrap
-        # these are arguments to target_info and score_bootstrap
-        # nonparamteric bootstrap is np.random.choice(n, size=(n,), replace=True)
-        # residual bootstrap might be X_E.dot(\bar{\beta}_E)
-        # + np.random.choice(resid, size=(n,), replace=True)
-
-        # if target_set is not None, we assume that
-        # these coordinates (specified by a list of coordinates) of target
-        # is assumed to be independent of the rest
-        # the corresponding block of `target_cov` is zeroed out
-
-        # we need these attributes of multi_view
-
-        self.nqueries = len(multi_view.objectives)
-        self.opt_slice = multi_view.opt_slice
-        self.objectives = multi_view.objectives
-
-        self.observed_target_state = observed_target_state
-        self.shape = observed_target_state.shape
-
-        self.total_randomization_length = multi_view.total_randomization_length
-        self.randomization_slice = multi_view.randomization_slice
-
-        self.score_cov = []
-        target_cov_sum = 0
-        for i in range(self.nqueries):
-            if parametric == False:
-                target_cov, cross_cov = multi_view.form_covariances(target_info,  
-                                                                    cross_terms=[multi_view.score_info[i]],
-                                                                    nsample=multi_view.nboot[i])
-            else:
-                target_cov, cross_cov = multi_view.form_covariances(target_info, 
-                                                                    cross_terms=[multi_view.score_info[i]])
-
-            target_cov_sum += target_cov
-            self.score_cov.append(cross_cov)
-
-        self.target_cov = target_cov_sum / self.nqueries
-
-        # XXX we're not really using this target_set in our tests
-
-        # zero out some coordinates of target_cov
-        # to enforce independence of target and null statistics
-
-        if target_set is not None:
-            null_set = set(range(self.target_cov.shape[0])).difference(target_set)
-            for t, n in product(target_set, null_set):
-                self.target_cov[t, n] = 0.
-                self.target_cov[n, t] = 0.
-
-        self.target_transform = []
-
-        for i in range(self.nqueries):
-            self.target_transform.append(
-                self.objectives[i].linear_decomposition(self.score_cov[i],
-                                                        self.target_cov,
-                                                        self.observed_target_state))
-
-        self.target_cov = np.atleast_2d(self.target_cov)
-        self.target_inv_cov = np.linalg.inv(self.target_cov)
-
-        # size of reference? should it only be target_set?
-
-        if reference is None:
-            reference = np.zeros(self.target_inv_cov.shape[0])
-        self.reference = reference
-
-        # need to vectorize the state for Langevin
-
-        self.overall_opt_slice = slice(0, multi_view.num_opt_var)
-        self.target_slice = slice(multi_view.num_opt_var,
-                                  multi_view.num_opt_var + self._reference_inv.shape[0])
-        self.keep_slice = self.target_slice
-
-        # set the observed state
-
-        self.observed_state = np.zeros(multi_view.num_opt_var + self._reference_inv.shape[0])
-        self.observed_state[self.target_slice] = self.observed_target_state
-        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
-
-        # added for the reconstruction map in case we marginalize over optimization variables
-
-        randomization_length_total = 0
-        self.randomization_slice = []
-        for i in range(self.nqueries):
-            self.randomization_slice.append(
-                slice(randomization_length_total, randomization_length_total + self.objectives[i].ndim))
-            randomization_length_total += self.objectives[i].ndim
-
-        self.randomization_length_total = randomization_length_total
-
-    def set_reference(self, reference):
-        self._reference = np.atleast_1d(reference)
-        self._reference_inv = self.target_inv_cov.dot(self.reference).flatten()
-
-    def get_reference(self):
-        return self._reference
-
-    reference = property(get_reference, set_reference)
-
-    def projection(self, state):
-        '''
-        Projection map of projected Langevin sampler.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Typically, the projection will only act on
-           `opt_vars`.
-        Returns
-        -------
-        projected_state : np.float
-        '''
-
-        opt_state = state[self.overall_opt_slice]
-        new_opt_state = np.zeros_like(opt_state)
-        for i in range(self.nqueries):
-            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
-        state[self.overall_opt_slice] = new_opt_state
-        return state
-
-    def gradient(self, state):
-        '''
-        Gradient of log-density at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        target_state, opt_state = state[self.target_slice], state[self.overall_opt_slice]
-        target_grad, opt_grad = np.zeros_like(target_state), np.zeros_like(opt_state)
-        full_grad = np.zeros_like(state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        for i in range(self.nqueries):
-
-            randomization_state = reconstruct_full(target_state, 
-                                                   self.target_transform[i], 
-                                                   self.objectives[i],
-                                                   opt_state[self.opt_slice[i]])
-
-            grad = self.objectives[i].construct_weights(randomization_state)
-            target_linear, target_offset = self.target_transform[i]
-            opt_linear, opt_offset = self.objectives[i].opt_transform
-            if target_linear is not None:
-                target_grad += target_linear.T.dot(grad)
-            if opt_linear is not None:
-                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
-
-        target_grad = -target_grad
-        target_grad += self._reference_inv - self.target_inv_cov.dot(target_state)
-        full_grad[self.target_slice] = target_grad
-        full_grad[self.overall_opt_slice] = -opt_grad
-
-        return full_grad
-
-
-    def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
-        '''
-        Sample `target` from selective density
-        using projected Langevin sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-
-        Parameters
-        ----------
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        keep_opt : bool
-           Should we return optimization variables
-           as well as the target?
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if stepsize is None:
-            stepsize = 1. / self.crude_lipschitz()
-
-        if keep_opt:
-            keep_slice = slice(None, None, None)
-        else:
-            keep_slice = self.keep_slice
-
-        target_langevin = projected_langevin(self.observed_state.copy(),
-                                             self.gradient,
-                                             self.projection,
-                                             stepsize)
-
-        samples = []
-
-        for i in range(ndraw + burnin):
-            target_langevin.next()
-            if (i >= burnin):
-                samples.append(target_langevin.state[keep_slice].copy())
-        return np.asarray(samples)
-
-    def hypothesis_test(self,
-                        test_stat,
-                        observed_value,
-                        ndraw=10000,
-                        burnin=2000,
-                        stepsize=None,
-                        sample=None,
-                        parameter=None,
-                        alternative='twosided'):
-
-        '''
-        Sample `target` from selective density
-        using projected Langevin sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-        Parameters
-        ----------
-        test_stat : callable
-           Test statistic to evaluate on sample from
-           selective distribution.
-        observed_value : float
-           Observed value of test statistic.
-           Used in p-value calculation.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc. If not None,
-           `ndraw, burnin, stepsize` are ignored.
-        parameter : np.float (optional)
-           If not None, defaults to `self.reference`.
-           Otherwise, sample is reweighted using Gaussian tilting.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        if parameter is None:
-            parameter = self.reference
-
-        sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
-
-
-        delta = self.target_inv_cov.dot(parameter - self.reference)
-        W = np.exp(sample.dot(delta))
-
-        family = discrete_family(sample_test_stat, W)
-        pval = family.cdf(0, observed_value)
-
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * min(pval, 1 - pval)
-
-    def confidence_intervals(self,
-                             observed,
-                             ndraw=10000,
-                             burnin=2000,
-                             stepsize=None,
-                             sample=None,
-                             level=0.9):
-        '''
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        level : float (optional)
-            Specify the
-            confidence level.
-        Notes
-        -----
-        Construct selective confidence intervals
-        for each parameter of the target.
-        Returns
-        -------
-        intervals : [(float, float)]
-            List of confidence intervals.
-        '''
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
-
-        return intervals_instance.confidence_intervals_all(level=level)
-
-    def coefficient_pvalues(self,
-                            observed,
-                            parameter=None,
-                            ndraw=10000,
-                            burnin=2000,
-                            stepsize=None,
-                            sample=None,
-                            alternative='twosided'):
-        '''
-        Construct selective p-values
-        for each parameter of the target.
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        parameter : np.float (optional)
-            A vector of parameters with shape `self.shape`
-            at which to evaluate p-values. Defaults
-            to `np.zeros(self.shape)`.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalues : np.float
-
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        if parameter is None:
-            parameter = np.zeros(self.shape)
-
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
-
-        pval = intervals_instance.pivots_all(parameter)
-
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * np.minimum(pval, 1 - pval)
-
-    def crude_lipschitz(self):
-        """
-        A crude Lipschitz constant for the
-        gradient of the log-density.
-        Returns
-        -------
-        lipschitz : float
-
-        """
-        lipschitz = power_L(self.target_inv_cov)
-        for transform, objective in zip(self.target_transform, self.objectives):
-            lipschitz += power_L(transform[0])**2 * objective.randomization.lipschitz
-            lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
-        return lipschitz
-
-
-    def reconstruct(self, state):
-        '''
-        Reconstruction of randomization at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be array with each row a state.
-        Returns
-        -------
-        reconstructed : np.float
-           Has shape of `opt_vars` with same number of rows
-           as `state`.
-
-        '''
-
-        state = np.atleast_2d(state)
-        if len(state.shape) > 2:
-            raise ValueError('expecting at most 2-dimensional array')
-
-        target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
-        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
-
-        for i in range(self.nqueries):
-            reconstructed[:, self.randomization_slice[i]] = reconstruct_full(target_state,
-                                                                             self.target_transform[i],
-                                                                             self.objectives[i],
-                                                                             opt_state[:, self.opt_slice[i]])
-
-        return np.squeeze(reconstructed)
-
-    def log_density(self, state):
-        '''
-        Log of randomization density at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be two-dimensional with each row a state.
-        Returns
-        -------
-        density : np.float
-            Has number of rows as `state` if 2-dimensional.
-        '''
-
-        reconstructed = self.reconstruct(state)
-        value = np.zeros(reconstructed.shape[0])
-
-        for i in range(self.nqueries):
-            log_dens = self.objectives[i].randomization.log_density
-            value += log_dens(reconstructed[:,self.opt_slice[i]])
-        return np.squeeze(value)
-
-class bootstrapped_target_sampler(targeted_sampler):
-
-    # make one of these for each hypothesis test
-
-    def __init__(self,
-                 multi_view,
-                 target_info,
-                 observed_target_state,
-                 target_alpha,
-                 target_set=None,
-                 reference=None,
-                 boot_size=None):
-
-        # sampler will draw bootstrapped weights for the target
-
-        if boot_size is None:
-            boot_size = target_alpha.shape[1]
-
-        targeted_sampler.__init__(self, multi_view,
-                                  target_info,
-                                  observed_target_state,
-                                  target_set,
-                                  reference)
-        # for bootstrap
-
-        self.boot_size = boot_size
-        self.target_alpha = target_alpha
-        self.boot_transform = []
-
-        for i in range(self.nqueries):
-            composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
-                                                                                                  self.target_cov,
-                                                                                                  self.observed_target_state)
-            boot_linear_part = np.dot(composition_linear_part, target_alpha)
-            boot_offset = composition_offset + np.dot(composition_linear_part, self.reference).flatten()
-            self.boot_transform.append((boot_linear_part, boot_offset))
-
-        # set the observed state for bootstrap
-
-        self.boot_slice = slice(multi_view.num_opt_var, multi_view.num_opt_var + self.boot_size)
-        self.observed_state = np.zeros(multi_view.num_opt_var + self.boot_size)
-        self.observed_state[self.boot_slice] = np.ones(self.boot_size)
-        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
-
-
-    def gradient(self, state):
-
-        boot_state, opt_state = state[self.boot_slice], state[self.overall_opt_slice]
-        boot_grad, opt_grad = np.zeros_like(boot_state), np.zeros_like(opt_state)
-        full_grad = np.zeros_like(state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        for i in range(self.nqueries):
-
-            randomization_state = reconstruct_full(boot_state, 
-                                                   self.boot_transform[i], 
-                                                   self.objectives[i],
-                                                   opt_state[self.opt_slice[i]])
-
-            grad = self.objectives[i].construct_weights(randomization_state)
-            boot_linear, boot_offset = self.boot_transform[i]
-            opt_linear, opt_offset = self.objectives[i].opt_transform
-            if boot_linear is not None:
-                boot_grad += boot_linear.T.dot(grad)
-            if opt_linear is not None:
-                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
-
-        boot_grad = -boot_grad
-        boot_grad -= boot_state
-
-        full_grad[self.boot_slice] = boot_grad
-        full_grad[self.overall_opt_slice] = -opt_grad
-
-        return full_grad
-
-    def sample(self, ndraw, burnin, stepsize = None, keep_opt=False):
-        if stepsize is None:
-            stepsize = 1. / self.observed_state.shape[0]
-
-        bootstrap_langevin = projected_langevin(self.observed_state.copy(),
-                                                self.gradient,
-                                                self.projection,
-                                                stepsize)
-        if keep_opt:
-            boot_slice = slice(None, None, None)
-        else:
-            boot_slice = self.boot_slice
-
-        samples = []
-        for i in range(ndraw + burnin):
-            bootstrap_langevin.next()
-            if (i >= burnin):
-                samples.append(bootstrap_langevin.state[boot_slice].copy())
-        samples = np.asarray(samples)
-
-        if keep_opt:
-            target_samples = samples[:,self.boot_slice].dot(self.target_alpha.T) + self.reference[None, :]
-            opt_sample0 = samples[0,self.overall_opt_slice]
-            result = np.zeros((samples.shape[0], opt_sample0.shape[0] + target_samples.shape[1]))
-            result[:,self.overall_opt_slice] = samples[:,self.overall_opt_slice]
-            result[:,self.target_slice] = target_samples
-            return result
-        else:
-            target_samples = samples.dot(self.target_alpha.T) + self.reference[None, :]
-            return target_samples
 
 class optimization_sampler(object):
 
@@ -1046,10 +389,10 @@ def gradient(self, state):
         # randomization_gradient are gradients of a CONVEX function
 
         for i in range(self.nqueries):
-            reconstructed_opt_state = self.objectives[i].reconstruct_opt(opt_state[self.opt_slice[i]])
+            reconstructed_opt_state = reconstruct_opt(self.objectives[i], opt_state[self.opt_slice[i]])
             opt_linear, opt_offset = self.objectives[i].opt_transform
             opt_grad[self.opt_slice[i]] = \
-                opt_linear.T.dot(self.objectives[i].construct_weights(reconstructed_opt_state + self.observed_raw_score[i]))
+                opt_linear.T.dot(self.objectives[i].grad_log_density(self.observed_score[i], opt_state[self.opt_slice[i]]))
         return -opt_grad
 
     def sample(self, ndraw, burnin, stepsize=None):
@@ -1353,8 +696,9 @@ def reconstruct(self, state):
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
 
         for i in range(self.nqueries):
-            reconstructed[:,self.randomization_slice[i]] = self.objectives[i].reconstruct_opt(  
-                state[:,self.opt_slice[i]]) + self.observed_raw_score[i]
+            reconstructed[:,self.randomization_slice[i]] = (reconstruct_opt(self.objectives[i],  
+                                                                           state[:,self.opt_slice[i]]) + 
+                                                            self.observed_raw_score[i])
 
         return np.squeeze(reconstructed)
 
@@ -1382,31 +726,31 @@ def reconstruct_opt(self, state):
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
 
         for i in range(self.nqueries):
-            reconstructed[:,self.randomization_slice[i]] = self.objectives[i].reconstruct_opt(  
-                state[:,self.opt_slice[i]])
+            reconstructed[:,self.randomization_slice[i]] = reconstruct_opt(self.objectives[i],
+                                                                           state[:,self.opt_slice[i]])
 
         return np.squeeze(reconstructed)
 
-    def log_density(self, state):
+    def log_density(self, internal_state, opt_state):
         '''
         Log of randomization density at current state.
         Parameters
         ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be two-dimensional with each row a state.
+        internal_state : sequence
+           Sequence of internal scores for each view (i.e.
+           in their own coordinate systems).
+
         Returns
         -------
         density : np.float
-            Has number of rows as `state` if 2-dimensional.
+            Has number of rows as `opt_state` if 2-dimensional.
         '''
 
-        reconstructed = self.reconstruct(state)
-        value = np.zeros(reconstructed.shape[0])
+        value = np.zeros(opt_state.shape[0])
 
         for i in range(self.nqueries):
-            log_dens = self.objectives[i].randomization.log_density
-            value += log_dens(reconstructed[:,self.opt_slice[i]])
+            log_dens = self.objectives[i].log_density
+            value += log_dens(internal_state[i], opt_state[:, self.opt_slice[i]]) # may have to broadcast shape here
         return np.squeeze(value)
 
 class optimization_intervals(object):
@@ -1416,8 +760,7 @@ def __init__(self,
                  opt_sample,
                  observed):
 
-        full_sample = opt_sampler.reconstruct(opt_sample) # observed_score + affine(opt_sample)
-        self._logden = opt_sampler.log_density(full_sample)
+        self._logden = opt_sampler.log_density(opt_sampler.observed_score, opt_sample)
 
         # we now remove the observed_score from full_sample
         self.reconstructed_sample = opt_sampler.reconstruct_opt(opt_sample) # affine(opt_sample)
@@ -1431,6 +774,7 @@ def __init__(self,
         self._normal_sample = np.random.multivariate_normal(mean=np.zeros(self.target_cov.shape[0]), 
                                                             cov=self.target_cov, 
                                                             size=(opt_sample.shape[0],))
+
     def pivot(self,
               linear_func,
               candidate,
@@ -1462,9 +806,9 @@ def pivot(self,
             score_linear, score_offset = self.opt_sampler.score_info[i]
             # final_nuisance is on the scale of the original randomization
             final_nuisance = score_linear.dot(cur_nuisance) + score_offset
-            nuisance.append(final_nuisance)
+            nuisance.append(cur_nuisance)
 
-            score_cov.append(score_linear.dot(cur_score_cov) / target_cov)
+            score_cov.append(cur_score_cov / target_cov)
 
 
         weights = self._weights(sample_stat + candidate,  # normal sample under candidate
@@ -1530,8 +874,8 @@ def _weights(self,
 
         _lognum = 0
         for i in range(len(log_densities)):
-            density_arg = np.multiply.outer(score_cov[i], sample_stat) + nuisance[i][:, None]
-            _lognum += log_densities[i](density_arg.T + self.reconstructed_sample)
+            density_arg = np.multiply.outer(sample_stat, score_cov[i]) + nuisance[i][None, :] # these are now internal coordinates
+            _lognum += log_densities[i](density_arg, self.reconstructed_sample)
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
 
diff --git a/selection/randomized/target.py b/selection/randomized/target.py
new file mode 100644
index 000000000..66946a961
--- /dev/null
+++ b/selection/randomized/target.py
@@ -0,0 +1,681 @@
+from itertools import product
+import numpy as np
+
+from regreg.affine import power_L
+
+from ..distributions.api import discrete_family, intervals_from_sample
+from ..sampling.langevin import projected_langevin
+
+def reconstruct_internal(data_state, data_transform):
+
+    data_state = np.atleast_2d(data_state)
+    data_linear, data_offset = data_transform
+    if data_linear is not None:
+        return np.squeeze(data_linear.dot(data_state.T) + data_offset[:,None]).T
+    else:
+        return np.squeeze(data_offset)
+
+def reconstruct_full(query, data_state, data_transform, opt_state):
+
+    if not query._setup:
+        raise ValueError('setup_sampler should be called before using this function')
+
+    internal_state = reconstruct_internal(data_state, data_transform)
+    return np.squeeze(reconstruct_full_internal(query, internal_state, opt_state))
+
+def reconstruct_opt(query, opt_state):
+    """
+    makes sense for queries that have not marginalized or conditioned
+    """
+    if not query._setup:
+        raise ValueError('setup_sampler should be called on query before using this function')
+
+    opt_linear, opt_offset = query.opt_transform
+    if opt_linear is not None:
+        opt_state = np.atleast_2d(opt_state)
+        return np.squeeze(opt_linear.dot(opt_state.T) + opt_offset[:, None]).T
+    else:
+        return opt_offset
+
+def reconstruct_full_internal(query, internal_state, opt_state):
+    score_linear, score_offset = query.score_transform
+    randomization_internal = score_linear.dot(internal_state.T).T + score_offset
+    randomization_opt = reconstruct_opt(query, opt_state)
+    full_state = randomization_internal + randomization_opt
+    return full_state
+
+class targeted_sampler(object):
+
+    '''
+    Object to sample from target of a selective sampler.
+    '''
+
+    def __init__(self,
+                 multi_view,
+                 target_info,
+                 observed_target_state,
+                 form_covariances,
+                 reference=None,
+                 target_set=None,
+                 parametric=False):
+
+        '''
+        Parameters
+        ----------
+
+        multi_view : `multiple_queries`
+           Instance of `multiple_queries`. Attributes
+           `objectives`, `score_info` are key
+           attributed. (Should maybe change constructor
+           to reflect only what is needed.)
+
+        target_info : object
+           Passed as first argument to `self.form_covariances`.
+
+        observed_target_state : np.float
+           Observed value of the target estimator.
+
+        form_covariances : callable
+           Used in linear decomposition of each score
+           and the target.
+
+        reference : np.float (optional)
+           Reference parameter for Gaussian approximation
+           of target.
+
+        target_set : sequence (optional)
+           Which coordinates of target are really
+           of interest. If not None, then coordinates
+           not in target_set are assumed to have 0
+           mean in the sampler.
+
+        parametric : bool
+           Use parametric covariance estimate?
+
+        Notes
+        -----
+        The callable `form_covariances`
+        should accept `target_info` as first argument
+        and a keyword argument `cross_terms` which
+        correspond to the `score_info` of each
+        objective of `multi_view`. This used in
+        a linear decomposition of each score into
+        a piece correlated with `target` and
+        an independent piece.
+        The independent piece is treated as a
+        nuisance parameter and conditioned on
+        (i.e. is fixed within the sampler).
+        '''
+
+        # sampler will draw samples for bootstrap
+        # these are arguments to target_info and score_bootstrap
+        # nonparamteric bootstrap is np.random.choice(n, size=(n,), replace=True)
+        # residual bootstrap might be X_E.dot(\bar{\beta}_E)
+        # + np.random.choice(resid, size=(n,), replace=True)
+
+        # if target_set is not None, we assume that
+        # these coordinates (specified by a list of coordinates) of target
+        # is assumed to be independent of the rest
+        # the corresponding block of `target_cov` is zeroed out
+
+        # we need these attributes of multi_view
+
+        self.nqueries = len(multi_view.objectives)
+        self.opt_slice = multi_view.opt_slice
+        self.objectives = multi_view.objectives
+
+        self.observed_target_state = observed_target_state
+        self.shape = observed_target_state.shape
+
+        self.total_randomization_length = multi_view.total_randomization_length
+        self.randomization_slice = multi_view.randomization_slice
+
+        self.score_cov = []
+        target_cov_sum = 0
+        for i in range(self.nqueries):
+            if parametric == False:
+                target_cov, cross_cov = multi_view.form_covariances(target_info,  
+                                                                    cross_terms=[multi_view.score_info[i]],
+                                                                    nsample=multi_view.nboot[i])
+            else:
+                target_cov, cross_cov = multi_view.form_covariances(target_info, 
+                                                                    cross_terms=[multi_view.score_info[i]])
+
+            target_cov_sum += target_cov
+            self.score_cov.append(cross_cov)
+
+        self.target_cov = target_cov_sum / self.nqueries
+
+        # XXX we're not really using this target_set in our tests
+
+        # zero out some coordinates of target_cov
+        # to enforce independence of target and null statistics
+
+        if target_set is not None:
+            null_set = set(range(self.target_cov.shape[0])).difference(target_set)
+            for t, n in product(target_set, null_set):
+                self.target_cov[t, n] = 0.
+                self.target_cov[n, t] = 0.
+
+        self.target_transform = []
+
+        for i in range(self.nqueries):
+            self.target_transform.append(
+                self.objectives[i].linear_decomposition(self.score_cov[i],
+                                                        self.target_cov,
+                                                        self.observed_target_state))
+
+        self.target_cov = np.atleast_2d(self.target_cov)
+        self.target_inv_cov = np.linalg.inv(self.target_cov)
+
+        # size of reference? should it only be target_set?
+
+        if reference is None:
+            reference = np.zeros(self.target_inv_cov.shape[0])
+        self.reference = reference
+
+        # need to vectorize the state for Langevin
+
+        self.overall_opt_slice = slice(0, multi_view.num_opt_var)
+        self.target_slice = slice(multi_view.num_opt_var,
+                                  multi_view.num_opt_var + self._reference_inv.shape[0])
+        self.keep_slice = self.target_slice
+
+        # set the observed state
+
+        self.observed_state = np.zeros(multi_view.num_opt_var + self._reference_inv.shape[0])
+        self.observed_state[self.target_slice] = self.observed_target_state
+        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
+
+        # added for the reconstruction map in case we marginalize over optimization variables
+
+        randomization_length_total = 0
+        self.randomization_slice = []
+        for i in range(self.nqueries):
+            self.randomization_slice.append(
+                slice(randomization_length_total, randomization_length_total + self.objectives[i].ndim))
+            randomization_length_total += self.objectives[i].ndim
+
+        self.randomization_length_total = randomization_length_total
+
+    def set_reference(self, reference):
+        self._reference = np.atleast_1d(reference)
+        self._reference_inv = self.target_inv_cov.dot(self.reference).flatten()
+
+    def get_reference(self):
+        return self._reference
+
+    reference = property(get_reference, set_reference)
+
+    def projection(self, state):
+        '''
+        Projection map of projected Langevin sampler.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Typically, the projection will only act on
+           `opt_vars`.
+        Returns
+        -------
+        projected_state : np.float
+        '''
+
+        opt_state = state[self.overall_opt_slice]
+        new_opt_state = np.zeros_like(opt_state)
+        for i in range(self.nqueries):
+            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
+        state[self.overall_opt_slice] = new_opt_state
+        return state
+
+    def gradient(self, state):
+        '''
+        Gradient of log-density at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        target_state, opt_state = state[self.target_slice], state[self.overall_opt_slice]
+        target_grad, opt_grad = np.zeros_like(target_state), np.zeros_like(opt_state)
+        full_grad = np.zeros_like(state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+
+            randomization_state = reconstruct_full(self.objectives[i],
+                                                   target_state, 
+                                                   self.target_transform[i], 
+                                                   opt_state[self.opt_slice[i]])
+
+            internal_state = reconstruct_internal(target_state, self.target_transform[i])
+            grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]]) 
+            target_linear, target_offset = self.target_transform[i]
+            opt_linear, opt_offset = self.objectives[i].opt_transform
+            if target_linear is not None:
+                target_grad += target_linear.T.dot(grad)
+            if opt_linear is not None:
+                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
+
+        target_grad = -target_grad
+        target_grad += self._reference_inv - self.target_inv_cov.dot(target_state)
+        full_grad[self.target_slice] = target_grad
+        full_grad[self.overall_opt_slice] = -opt_grad
+
+        return full_grad
+
+
+    def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+
+        Parameters
+        ----------
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        keep_opt : bool
+           Should we return optimization variables
+           as well as the target?
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if stepsize is None:
+            stepsize = 1. / self.crude_lipschitz()
+
+        if keep_opt:
+            keep_slice = slice(None, None, None)
+        else:
+            keep_slice = self.keep_slice
+
+        target_langevin = projected_langevin(self.observed_state.copy(),
+                                             self.gradient,
+                                             self.projection,
+                                             stepsize)
+
+        samples = []
+
+        for i in range(ndraw + burnin):
+            target_langevin.next()
+            if (i >= burnin):
+                samples.append(target_langevin.state[keep_slice].copy())
+        return np.asarray(samples)
+
+    def hypothesis_test(self,
+                        test_stat,
+                        observed_value,
+                        ndraw=10000,
+                        burnin=2000,
+                        stepsize=None,
+                        sample=None,
+                        parameter=None,
+                        alternative='twosided'):
+
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+        Parameters
+        ----------
+        test_stat : callable
+           Test statistic to evaluate on sample from
+           selective distribution.
+        observed_value : float
+           Observed value of test statistic.
+           Used in p-value calculation.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc. If not None,
+           `ndraw, burnin, stepsize` are ignored.
+        parameter : np.float (optional)
+           If not None, defaults to `self.reference`.
+           Otherwise, sample is reweighted using Gaussian tilting.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        if parameter is None:
+            parameter = self.reference
+
+        sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
+
+
+        delta = self.target_inv_cov.dot(parameter - self.reference)
+        W = np.exp(sample.dot(delta))
+
+        family = discrete_family(sample_test_stat, W)
+        pval = family.cdf(0, observed_value)
+
+        if alternative == 'greater':
+            return 1 - pval
+        elif alternative == 'less':
+            return pval
+        else:
+            return 2 * min(pval, 1 - pval)
+
+    def confidence_intervals(self,
+                             observed,
+                             ndraw=10000,
+                             burnin=2000,
+                             stepsize=None,
+                             sample=None,
+                             level=0.9):
+        '''
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        level : float (optional)
+            Specify the
+            confidence level.
+        Notes
+        -----
+        Construct selective confidence intervals
+        for each parameter of the target.
+        Returns
+        -------
+        intervals : [(float, float)]
+            List of confidence intervals.
+        '''
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        nactive = observed.shape[0]
+        intervals_instance = intervals_from_sample(self.reference,
+                                                   sample,
+                                                   observed,
+                                                   self.target_cov)
+
+        return intervals_instance.confidence_intervals_all(level=level)
+
+    def coefficient_pvalues(self,
+                            observed,
+                            parameter=None,
+                            ndraw=10000,
+                            burnin=2000,
+                            stepsize=None,
+                            sample=None,
+                            alternative='twosided'):
+        '''
+        Construct selective p-values
+        for each parameter of the target.
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        parameter : np.float (optional)
+            A vector of parameters with shape `self.shape`
+            at which to evaluate p-values. Defaults
+            to `np.zeros(self.shape)`.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalues : np.float
+
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        if parameter is None:
+            parameter = np.zeros(self.shape)
+
+        nactive = observed.shape[0]
+        intervals_instance = intervals_from_sample(self.reference,
+                                                   sample,
+                                                   observed,
+                                                   self.target_cov)
+
+        pval = intervals_instance.pivots_all(parameter)
+
+        if alternative == 'greater':
+            return 1 - pval
+        elif alternative == 'less':
+            return pval
+        else:
+            return 2 * np.minimum(pval, 1 - pval)
+
+    def crude_lipschitz(self):
+        """
+        A crude Lipschitz constant for the
+        gradient of the log-density.
+        Returns
+        -------
+        lipschitz : float
+
+        """
+        lipschitz = power_L(self.target_inv_cov)
+        for transform, objective in zip(self.target_transform, self.objectives):
+            lipschitz += power_L(transform[0])**2 * objective.randomization.lipschitz
+            lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
+        return lipschitz
+
+
+    def reconstruct(self, state):
+        '''
+        Reconstruction of randomization at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be array with each row a state.
+        Returns
+        -------
+        reconstructed : np.float
+           Has shape of `opt_vars` with same number of rows
+           as `state`.
+
+        '''
+
+        state = np.atleast_2d(state)
+        if len(state.shape) > 2:
+            raise ValueError('expecting at most 2-dimensional array')
+
+        target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
+        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
+
+        for i in range(self.nqueries):
+            reconstructed[:, self.randomization_slice[i]] = reconstruct_full(self.objectives[i],
+                                                                             target_state,
+                                                                             self.target_transform[i],
+                                                                             opt_state[:, self.opt_slice[i]])
+
+        return np.squeeze(reconstructed)
+
+    def log_density(self, state):
+        '''
+        Log of randomization density at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be two-dimensional with each row a state.
+        Returns
+        -------
+        density : np.float
+            Has number of rows as `state` if 2-dimensional.
+        '''
+
+        reconstructed = self.reconstruct(state)
+        value = np.zeros(reconstructed.shape[0])
+
+        for i in range(self.nqueries):
+            log_dens = self.objectives[i].randomization.log_density
+            value += log_dens(reconstructed[:,self.opt_slice[i]])
+        return np.squeeze(value)
+
+class bootstrapped_target_sampler(targeted_sampler):
+
+    # make one of these for each hypothesis test
+
+    def __init__(self,
+                 multi_view,
+                 target_info,
+                 observed_target_state,
+                 target_alpha,
+                 target_set=None,
+                 reference=None,
+                 boot_size=None):
+
+        # sampler will draw bootstrapped weights for the target
+
+        if boot_size is None:
+            boot_size = target_alpha.shape[1]
+
+        targeted_sampler.__init__(self, multi_view,
+                                  target_info,
+                                  observed_target_state,
+                                  target_set,
+                                  reference)
+        # for bootstrap
+
+        self.boot_size = boot_size
+        self.target_alpha = target_alpha
+        self.boot_transform = []
+
+        for i in range(self.nqueries):
+            composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
+                                                                                                  self.target_cov,
+                                                                                                  self.observed_target_state)
+            boot_linear_part = np.dot(composition_linear_part, target_alpha)
+            boot_offset = composition_offset + np.dot(composition_linear_part, self.reference).flatten()
+            self.boot_transform.append((boot_linear_part, boot_offset))
+
+        # set the observed state for bootstrap
+
+        self.boot_slice = slice(multi_view.num_opt_var, multi_view.num_opt_var + self.boot_size)
+        self.observed_state = np.zeros(multi_view.num_opt_var + self.boot_size)
+        self.observed_state[self.boot_slice] = np.ones(self.boot_size)
+        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
+
+
+    def gradient(self, state):
+
+        boot_state, opt_state = state[self.boot_slice], state[self.overall_opt_slice]
+        boot_grad, opt_grad = np.zeros_like(boot_state), np.zeros_like(opt_state)
+        full_grad = np.zeros_like(state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+
+            randomization_state = reconstruct_full(self.objectives[i],
+                                                   boot_state, 
+                                                   self.boot_transform[i], 
+                                                   opt_state[self.opt_slice[i]])
+
+            internal_state = reconstruct_internal(boot_state, self.boot_transform[i])
+            grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]])
+            boot_linear, boot_offset = self.boot_transform[i]
+            opt_linear, opt_offset = self.objectives[i].opt_transform
+            if boot_linear is not None:
+                boot_grad += boot_linear.T.dot(grad)
+            if opt_linear is not None:
+                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
+
+        boot_grad = -boot_grad
+        boot_grad -= boot_state
+
+        full_grad[self.boot_slice] = boot_grad
+        full_grad[self.overall_opt_slice] = -opt_grad
+
+        return full_grad
+
+    def sample(self, ndraw, burnin, stepsize = None, keep_opt=False):
+        if stepsize is None:
+            stepsize = 1. / self.observed_state.shape[0]
+
+        bootstrap_langevin = projected_langevin(self.observed_state.copy(),
+                                                self.gradient,
+                                                self.projection,
+                                                stepsize)
+        if keep_opt:
+            boot_slice = slice(None, None, None)
+        else:
+            boot_slice = self.boot_slice
+
+        samples = []
+        for i in range(ndraw + burnin):
+            bootstrap_langevin.next()
+            if (i >= burnin):
+                samples.append(bootstrap_langevin.state[boot_slice].copy())
+        samples = np.asarray(samples)
+
+        if keep_opt:
+            target_samples = samples[:,self.boot_slice].dot(self.target_alpha.T) + self.reference[None, :]
+            opt_sample0 = samples[0,self.overall_opt_slice]
+            result = np.zeros((samples.shape[0], opt_sample0.shape[0] + target_samples.shape[1]))
+            result[:,self.overall_opt_slice] = samples[:,self.overall_opt_slice]
+            result[:,self.target_slice] = target_samples
+            return result
+        else:
+            target_samples = samples.dot(self.target_alpha.T) + self.reference[None, :]
+            return target_samples
diff --git a/selection/randomized/tests/test_Mest.py b/selection/randomized/tests/test_Mest.py
index 8ba805543..8e79da624 100644
--- a/selection/randomized/tests/test_Mest.py
+++ b/selection/randomized/tests/test_Mest.py
@@ -16,7 +16,7 @@
 from ..glm import bootstrap_cov
 from ...distributions.discrete_family import discrete_family
 from ...sampling.langevin import projected_langevin
-from ..query import reconstruct_full
+from ..target import reconstruct_internal
 
 @register_report(['pvalue', 'active'])
 @wait_for_return_value()
@@ -93,10 +93,10 @@ def target_gradient(state):
             opt_state1 = state[opt_slice1]
             opt_state2 = state[opt_slice2]
             opt_linear1 = M_est1.opt_transform[0]
-            arg1 = reconstruct_full(target, (A1, b1), M_est1, opt_state1); grad1 = M_est1.construct_weights(arg1)
+            arg1 = reconstruct_internal(target, (A1, b1)); grad1 = M_est1.grad_log_density(arg1, opt_state1)
 
             opt_linear2 = M_est2.opt_transform[0]
-            arg2 = reconstruct_full(target, (A2, b2), M_est2, opt_state2); grad2 = M_est2.construct_weights(arg2)
+            arg2 = reconstruct_internal(target, (A2, b2)); grad2 = M_est2.grad_log_density(arg2, opt_state2)
 
             full_grad = np.zeros_like(state)
             full_grad[opt_slice1] = -opt_linear1.T.dot(grad1)
@@ -207,7 +207,7 @@ def target_gradient(state):
 
 
             opt_linear1 = M_est1.opt_transform[0]
-            arg1 = reconstruct_full(target, (A1, b1), M_est1, opt_state1); grad1 = M_est1.construct_weights(arg1)
+            arg1 = reconstruct_internal(target, (A1, b1)); grad1 = M_est1.grad_log_density(arg1, opt_state1)
 
             full_grad = np.zeros_like(state)
             full_grad[opt_slice1] = -opt_linear1.T.dot(grad1)
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index bb2405d7c..e288896ef 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -10,33 +10,42 @@
 from ...tests.flags import SMALL_SAMPLES
 from ...tests.decorators import set_sampling_params_iftrue 
 
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=2, burnin=2)
 def test_lasso_constructors(ndraw=1000, burnin=200):
     """
     Smoke tests for lasso convenience constructors
     """
     cls = lasso
-    for const_info, rand in product(zip([gaussian_instance,
-                                         logistic_instance,
-                                         poisson_instance],
-                                        [cls.gaussian,
-                                         cls.logistic,
-                                         cls.poisson]),
-                              ['gaussian', 'logistic', 'laplace']):
+    for const_info, rand, marginalize, condition in product(zip([gaussian_instance,
+                                                                 logistic_instance,
+                                                                 poisson_instance],
+                                                                [cls.gaussian,
+                                                                 cls.logistic,
+                                                                 cls.poisson]),
+                                                            ['gaussian', 'logistic', 'laplace'],
+                                                            [False, True],
+                                                            [False, True]):
 
         inst, const = const_info
-        X, Y = inst()[:2]
+        X, Y = inst(n=10, p=20, signal=1, s=3)[:2]
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 20
         conv = const(X, Y, W, randomizer=rand)
         signs = conv.fit()
 
-        marginalizing_groups = np.zeros(p, np.bool)
-        marginalizing_groups[:int(p/2)] = True
+        marginalizing_groups = None
+        if marginalize:
+            marginalizing_groups = np.zeros(p, np.bool)
+            marginalizing_groups[:int(p/2)] = True
         
-        conditioning_groups = ~marginalizing_groups
-        conditioning_groups[-int(p/4):] = False
+        conditioning_groups = None
+        if condition:
+            if marginalize:
+                conditioning_groups = ~marginalizing_groups
+            else:
+                conditioning_groups = np.ones(p, np.bool)
+            conditioning_groups[-int(p/4):] = False
 
         selected_features = np.zeros(p, np.bool)
         selected_features[:3] = True
diff --git a/selection/randomized/tests/test_greedy_step.py b/selection/randomized/tests/test_greedy_step.py
index d193702e0..67c840860 100644
--- a/selection/randomized/tests/test_greedy_step.py
+++ b/selection/randomized/tests/test_greedy_step.py
@@ -24,7 +24,7 @@
 from ..glm import bootstrap_cov
 from ...distributions.discrete_family import discrete_family
 from ...sampling.langevin import projected_langevin
-from ..query import reconstruct_full
+from ..target import reconstruct_internal
 
 @register_report(['pvalue', 'active'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
@@ -113,10 +113,10 @@ def target_gradient(state):
             opt_state2 = state[opt_slice2]
 
             opt_linear1 = M_est1.opt_transform[0]
-            arg1 = reconstruct_full(target, (A1, b1), M_est1, opt_state1); grad1 = M_est1.construct_weights(arg1)
+            arg1 = reconstruct_internal(target, (A1, b1)); grad1 = M_est1.grad_log_density(arg1, opt_state1)
 
             opt_linear2 = step.opt_transform[0]
-            arg2 = reconstruct_full(target, (A2, b2), step, opt_state2); grad2 = step.construct_weights(arg2)
+            arg2 = reconstruct_internal(target, (A2, b2)); grad2 = step.grad_log_density(arg2, opt_state2)
 
             full_grad = np.zeros_like(state)
             full_grad[opt_slice1] = -opt_linear1.T.dot(grad1)
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index ce43f86ca..d82f450ab 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -2,6 +2,7 @@
 import regreg.api as rr
 
 from .query import query
+from .target import reconstruct_full_internal
 from .M_estimator import restricted_Mest
 
 class threshold_score(query):
@@ -124,7 +125,7 @@ def solve(self, nboot=2000):
         self.nboot = nboot
         self.ndim = self.loss.shape[0]
 
-    def construct_weights(self, full_state):
+    def grad_log_density(self, internal_state, opt_state):
         """
         marginalizing over the sub-gradient
         """
@@ -132,6 +133,8 @@ def construct_weights(self, full_state):
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
+        full_state = reconstruct_full_internal(self, internal_state, opt_state)
+
         threshold = self.threshold
         weights = np.zeros_like(self.boundary, np.float)
 

From d8de38acb9fc1b0e4dbab0cc1e7aade0ab1bf0a2 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 12 Sep 2017 09:05:28 -0700
Subject: [PATCH 222/617] added reconstruction module, forcing query to use the
 reconstruction maps

---
 selection/randomized/M_estimator.py     |  2 +-
 selection/randomized/query.py           | 10 ++--
 selection/randomized/reconstruction.py  | 75 +++++++++++++++++++++++++
 selection/randomized/target.py          | 63 +++++----------------
 selection/randomized/threshold_score.py |  2 +-
 5 files changed, 95 insertions(+), 57 deletions(-)
 create mode 100644 selection/randomized/reconstruction.py

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 149cab939..c01717002 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -3,7 +3,7 @@
 import regreg.affine as ra
 
 from .query import query 
-from .target import reconstruct_full_internal
+from .reconstruction import reconstruct_full_from_internal
 from .randomization import split
 
 class M_estimator(query):
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 3b86ac445..1eebd38e5 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -9,9 +9,9 @@
 from ..distributions.api import discrete_family, intervals_from_sample
 from ..sampling.langevin import projected_langevin
 from .target import (targeted_sampler,
-                     bootstrapped_target_sampler,
-                     reconstruct_full,
-                     reconstruct_opt)
+                     bootstrapped_target_sampler)
+from .reconstruction import (reconstruct_opt,
+                             reconstruct_full_from_internal)
 
 
 class query(object):
@@ -66,11 +66,11 @@ def linear_decomposition(self, target_score_cov, target_cov, observed_target_sta
     # with no conditioning or marginalizing
 
     def log_density(self, internal_state, opt_state):
-        full_state = reconstruct_full_internal(internal_state, opt_state)
+        full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
         return self.randomization.log_density(full_state)
 
     def grad_log_density(self, internal_state, opt_state):
-        full_state = reconstruct_full_internal(internal_state, opt_state)
+        full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
         return self.randomization.gradient(full_state)
 
      # implemented by subclasses
diff --git a/selection/randomized/reconstruction.py b/selection/randomized/reconstruction.py
new file mode 100644
index 000000000..dc827aa73
--- /dev/null
+++ b/selection/randomized/reconstruction.py
@@ -0,0 +1,75 @@
+"""
+As part of forming the selective likelihood ratio, various reconstructions
+of parts of the original randomization are necessary.
+
+In this module, generally speaking: 
+
+- `internal` refers to coordinates internal to a given query
+as each query can represent its data in its own coordinates;
+
+- `full` refers to the coordinate system of the original randomization
+and is the sum of a `score` as well as an `opt` term
+
+ 
+"""
+import numpy as np
+
+def reconstruct_internal(data_state, data_transform):
+    """
+    Reconstruct some internal state data
+    based on an affine mapping from `data_state` to the
+    internal coordinates of the query.
+    """
+
+    data_state = np.atleast_2d(data_state)
+    data_linear, data_offset = data_transform
+    if data_linear is not None:
+        return np.squeeze(data_linear.dot(data_state.T) + data_offset[:,None]).T
+    else:
+        return np.squeeze(data_offset)
+
+def reconstruct_full_from_data(query, data_state, data_transform, opt_state):
+    """
+    Reconstruct original randomization state from state data
+    and optimization state.
+    """
+
+    if not query._setup:
+        raise ValueError('setup_sampler should be called before using this function')
+
+    internal_state = reconstruct_internal(data_state, data_transform)
+    return np.squeeze(reconstruct_full_from_internal(query, internal_state, opt_state))
+
+def reconstruct_opt(query, opt_state):
+    """
+    Reconstruct part of the original randomization state 
+    in terms of optimization state.
+    """
+    if not query._setup:
+        raise ValueError('setup_sampler should be called on query before using this function')
+
+    opt_linear, opt_offset = query.opt_transform
+    if opt_linear is not None:
+        opt_state = np.atleast_2d(opt_state)
+        return np.squeeze(opt_linear.dot(opt_state.T) + opt_offset[:, None]).T
+    else:
+        return opt_offset
+
+def reconstruct_score(query, internal_state):
+    """
+    Reconstruct part of the original randomization state 
+    determined by the score of the loss from 
+    a query's internal coordinates.
+    """
+    score_linear, score_offset = query.score_transform
+    return score_linear.dot(internal_state.T).T + score_offset
+
+def reconstruct_full_from_internal(query, internal_state, opt_state):
+    """
+    Reconstruct original randomization state from internal state data
+    and optimization state.
+    """
+    randomization_internal = reconstruct_score(query, internal_state)
+    randomization_opt = reconstruct_opt(query, opt_state)
+    return randomization_internal + randomization_opt
+
diff --git a/selection/randomized/target.py b/selection/randomized/target.py
index 66946a961..6513ff435 100644
--- a/selection/randomized/target.py
+++ b/selection/randomized/target.py
@@ -5,44 +5,7 @@
 
 from ..distributions.api import discrete_family, intervals_from_sample
 from ..sampling.langevin import projected_langevin
-
-def reconstruct_internal(data_state, data_transform):
-
-    data_state = np.atleast_2d(data_state)
-    data_linear, data_offset = data_transform
-    if data_linear is not None:
-        return np.squeeze(data_linear.dot(data_state.T) + data_offset[:,None]).T
-    else:
-        return np.squeeze(data_offset)
-
-def reconstruct_full(query, data_state, data_transform, opt_state):
-
-    if not query._setup:
-        raise ValueError('setup_sampler should be called before using this function')
-
-    internal_state = reconstruct_internal(data_state, data_transform)
-    return np.squeeze(reconstruct_full_internal(query, internal_state, opt_state))
-
-def reconstruct_opt(query, opt_state):
-    """
-    makes sense for queries that have not marginalized or conditioned
-    """
-    if not query._setup:
-        raise ValueError('setup_sampler should be called on query before using this function')
-
-    opt_linear, opt_offset = query.opt_transform
-    if opt_linear is not None:
-        opt_state = np.atleast_2d(opt_state)
-        return np.squeeze(opt_linear.dot(opt_state.T) + opt_offset[:, None]).T
-    else:
-        return opt_offset
-
-def reconstruct_full_internal(query, internal_state, opt_state):
-    score_linear, score_offset = query.score_transform
-    randomization_internal = score_linear.dot(internal_state.T).T + score_offset
-    randomization_opt = reconstruct_opt(query, opt_state)
-    full_state = randomization_internal + randomization_opt
-    return full_state
+from .reconstruction import reconstruct_full_from_data, reconstruct_internal
 
 class targeted_sampler(object):
 
@@ -248,10 +211,10 @@ def gradient(self, state):
 
         for i in range(self.nqueries):
 
-            randomization_state = reconstruct_full(self.objectives[i],
-                                                   target_state, 
-                                                   self.target_transform[i], 
-                                                   opt_state[self.opt_slice[i]])
+            randomization_state = reconstruct_full_from_data(self.objectives[i],
+                                                             target_state, 
+                                                             self.target_transform[i], 
+                                                             opt_state[self.opt_slice[i]])
 
             internal_state = reconstruct_internal(target_state, self.target_transform[i])
             grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]]) 
@@ -543,10 +506,10 @@ def reconstruct(self, state):
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
 
         for i in range(self.nqueries):
-            reconstructed[:, self.randomization_slice[i]] = reconstruct_full(self.objectives[i],
-                                                                             target_state,
-                                                                             self.target_transform[i],
-                                                                             opt_state[:, self.opt_slice[i]])
+            reconstructed[:, self.randomization_slice[i]] = reconstruct_full_from_data(self.objectives[i],
+                                                                                       target_state,
+                                                                                       self.target_transform[i],
+                                                                                       opt_state[:, self.opt_slice[i]])
 
         return np.squeeze(reconstructed)
 
@@ -627,10 +590,10 @@ def gradient(self, state):
 
         for i in range(self.nqueries):
 
-            randomization_state = reconstruct_full(self.objectives[i],
-                                                   boot_state, 
-                                                   self.boot_transform[i], 
-                                                   opt_state[self.opt_slice[i]])
+            randomization_state = reconstruct_full_from_data(self.objectives[i],
+                                                             boot_state, 
+                                                             self.boot_transform[i], 
+                                                             opt_state[self.opt_slice[i]])
 
             internal_state = reconstruct_internal(boot_state, self.boot_transform[i])
             grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]])
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index d82f450ab..145c471bb 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -2,7 +2,7 @@
 import regreg.api as rr
 
 from .query import query
-from .target import reconstruct_full_internal
+from .reconstruction import reconstruct_full_from_internal
 from .M_estimator import restricted_Mest
 
 class threshold_score(query):

From 57d87d837569178bba756dc72e86a72c2f168303 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 12 Sep 2017 09:30:31 -0700
Subject: [PATCH 223/617] BF: wrong argument to log density but plots still
 look good

---
 selection/randomized/M_estimator.py            | 2 +-
 selection/randomized/convenience.py            | 4 ++--
 selection/randomized/query.py                  | 8 +++-----
 selection/randomized/tests/test_convenience.py | 5 ++++-
 selection/randomized/threshold_score.py        | 2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index c01717002..8e40ec4ff 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -489,7 +489,7 @@ def grad_log_density(self, internal_state, opt_state):
 
         if self._marginalize_subgradient:
 
-            full_state = reconstruct_full_internal(self, internal_state, opt_state)
+            full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
 
             p = self.penalty.shape[0]
             weights = np.zeros(p)
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 641faaafd..21b5b40e7 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -100,7 +100,7 @@ def __init__(self,
                                       weights=dict(zip(np.arange(p), self.feature_weights)), lagrange=1.)
 
     def fit(self, solve_args={'tol':1.e-12, 'min_its':50}, 
-            views=[]):
+            views=[], nboot=1000):
         """
         Fit the randomized lasso using `regreg`.
 
@@ -123,7 +123,7 @@ def fit(self, solve_args={'tol':1.e-12, 'min_its':50},
 
         p = self.nfeature
         self._view = glm_group_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
-        self._view.solve()
+        self._view.solve(nboot=nboot)
 
         views = copy(views); views.append(self._view)
         self._queries = multiple_queries(views)
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 1eebd38e5..47aae75c8 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -762,8 +762,6 @@ def __init__(self,
 
         self._logden = opt_sampler.log_density(opt_sampler.observed_score, opt_sample)
 
-        # we now remove the observed_score from full_sample
-        self.reconstructed_sample = opt_sampler.reconstruct_opt(opt_sample) # affine(opt_sample)
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
         # setup_target has been called on opt_sampler
@@ -872,10 +870,10 @@ def _weights(self,
 
         # In this function, \hat{\theta}_i will change with the Monte Carlo sample
 
-        _lognum = 0
+        internal_sample = []
         for i in range(len(log_densities)):
-            density_arg = np.multiply.outer(sample_stat, score_cov[i]) + nuisance[i][None, :] # these are now internal coordinates
-            _lognum += log_densities[i](density_arg, self.reconstructed_sample)
+            internal_sample.append(np.multiply.outer(sample_stat, score_cov[i]) + nuisance[i][None, :]) # these are now internal coordinates
+        _lognum = self.opt_sampler.log_density(internal_sample, self.opt_sample)
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
 
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index e288896ef..cd917c25b 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -32,7 +32,10 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
 
         W = np.ones(X.shape[1]) * 20
         conv = const(X, Y, W, randomizer=rand)
-        signs = conv.fit()
+        nboot = 1000
+        if SMALL_SAMPLES:
+            nboot = 20
+        signs = conv.fit(nboot=nboot)
 
         marginalizing_groups = None
         if marginalize:
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index 145c471bb..e6f088613 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -133,7 +133,7 @@ def grad_log_density(self, internal_state, opt_state):
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-        full_state = reconstruct_full_internal(self, internal_state, opt_state)
+        full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
 
         threshold = self.threshold
         weights = np.zeros_like(self.boundary, np.float)

From f6652e051bc1cf8cdaf8046fa44b4b8afad10fed Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 12 Sep 2017 09:32:55 -0700
Subject: [PATCH 224/617] removing the raw score

---
 selection/randomized/query.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 47aae75c8..56674d600 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -348,13 +348,10 @@ def __init__(self,
         # independent of the data in each view
 
         self.observed_score = [] # in the view's coordinates
-        self.observed_raw_score = [] # in the data coordinates, not the view's coordinates
-                                     # will typically be \nabla \ell(\bar{\beta}_E) - \nabla^2 \ell(\bar{\beta}_E) \bar{\beta}_E
         self.score_info = []
         for i in range(self.nqueries):
             obj = self.objectives[i]
             score_linear, score_offset = obj.score_transform
-            self.observed_raw_score.append(score_linear.dot(obj.observed_score_state) + score_offset)
             self.observed_score.append(obj.observed_score_state)
             self.score_info.append(obj.score_transform)
 
@@ -696,9 +693,10 @@ def reconstruct(self, state):
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
 
         for i in range(self.nqueries):
-            reconstructed[:,self.randomization_slice[i]] = (reconstruct_opt(self.objectives[i],  
-                                                                           state[:,self.opt_slice[i]]) + 
-                                                            self.observed_raw_score[i])
+            reconstructed[:,self.randomization_slice[i]] = reconstruct_full_from_internal(self.objectives[i],  
+                                                                                          self.observed_score[i],
+                                                                                          state[:,self.opt_slice[i]])
+
 
         return np.squeeze(reconstructed)
 

From f7416efa1dd6dd5a979ed2cf31527a0030fcecc1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 12 Sep 2017 09:39:44 -0700
Subject: [PATCH 225/617] removing all reconstruct methods in query -- still in
 target

---
 selection/randomized/query.py                 | 61 -------------------
 .../tests/test_optimization_sampler.py        |  2 +-
 selection/randomized/tests/test_sampling.py   | 33 +++++++++-
 3 files changed, 33 insertions(+), 63 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 56674d600..dca12eac6 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -386,7 +386,6 @@ def gradient(self, state):
         # randomization_gradient are gradients of a CONVEX function
 
         for i in range(self.nqueries):
-            reconstructed_opt_state = reconstruct_opt(self.objectives[i], opt_state[self.opt_slice[i]])
             opt_linear, opt_offset = self.objectives[i].opt_transform
             opt_grad[self.opt_slice[i]] = \
                 opt_linear.T.dot(self.objectives[i].grad_log_density(self.observed_score[i], opt_state[self.opt_slice[i]]))
@@ -669,66 +668,6 @@ def crude_lipschitz(self):
             lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
         return lipschitz
 
-    def reconstruct(self, state):
-        '''
-        Reconstruction of randomization at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be array with each row a state.
-
-        Returns
-        -------
-        reconstructed : np.float
-           Has shape of `opt_vars` with same number of rows
-           as `state`.
-
-        '''
-
-        state = np.atleast_2d(state)
-        if state.ndim > 2:
-            raise ValueError('expecting at most 2-dimensional array')
-
-        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
-
-        for i in range(self.nqueries):
-            reconstructed[:,self.randomization_slice[i]] = reconstruct_full_from_internal(self.objectives[i],  
-                                                                                          self.observed_score[i],
-                                                                                          state[:,self.opt_slice[i]])
-
-
-        return np.squeeze(reconstructed)
-
-    def reconstruct_opt(self, state):
-        '''
-        Reconstruction of randomization at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be array with each row a state.
-
-        Returns
-        -------
-        reconstructed : np.float
-           Has shape of `opt_vars` with same number of rows
-           as `state`.
-
-        '''
-
-        state = np.atleast_2d(state)
-        if state.ndim > 2:
-            raise ValueError('expecting at most 2-dimensional array')
-
-        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
-
-        for i in range(self.nqueries):
-            reconstructed[:,self.randomization_slice[i]] = reconstruct_opt(self.objectives[i],
-                                                                           state[:,self.opt_slice[i]])
-
-        return np.squeeze(reconstructed)
-
     def log_density(self, internal_state, opt_state):
         '''
         Log of randomization density at current state.
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
index 46a28c100..27afbfcc4 100644
--- a/selection/randomized/tests/test_optimization_sampler.py
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -59,5 +59,5 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
                                burnin,
                                stepsize=1.e-10)
 
-        opt_sampler.reconstruct(S)
+        
         
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index a66eb2e70..f1f74f717 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -15,6 +15,7 @@
 
 from ...tests.decorators import set_sampling_params_iftrue
 from ..randomization import randomization
+from ..reconstruction import reconstruct_full_from_internal
 
 
 class randomization_ppf(randomization):
@@ -167,7 +168,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
 
         # let's also reconstruct the omegas to compare
 
-        S_omega = opt_sampler.reconstruct(S)
+        S_omega = reconstruct_opt(opt_sampler, S)
 
         opt_samples = sample_opt_vars(X, 
                                       Y, 
@@ -185,3 +186,33 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
     return results
 
     
+def reconstruct_opt(opt_sampler, state):
+    '''
+    Reconstruction of randomization at current state.
+    Parameters
+    ----------
+    state : np.float
+       State of sampler made up of `(target, opt_vars)`.
+       Can be array with each row a state.
+
+    Returns
+    -------
+    reconstructed : np.float
+       Has shape of `opt_vars` with same number of rows
+       as `state`.
+
+    '''
+
+    state = np.atleast_2d(state)
+    if state.ndim > 2:
+        raise ValueError('expecting at most 2-dimensional array')
+
+    reconstructed = np.zeros((state.shape[0], opt_sampler.total_randomization_length))
+
+    for i in range(opt_sampler.nqueries):
+        reconstructed[:,opt_sampler.randomization_slice[i]] = reconstruct_full_from_internal(opt_sampler.objectives[i],  
+                                                                                             opt_sampler.observed_score[i],
+                                                                                             state[:,opt_sampler.opt_slice[i]])
+
+
+    return np.squeeze(reconstructed)

From 6853b610e3fe210d443d1c51aece67963754a234 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Tue, 12 Sep 2017 10:29:02 -0700
Subject: [PATCH 226/617] plotting pivots

---
 .../tests/test_opt_weighted_intervals.py      | 42 +++++++++++++++----
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index cf2c72337..14c186b9b 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -9,6 +9,7 @@
                                poisson_instance)
 from ...tests.flags import SMALL_SAMPLES
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+import matplotlib.pyplot as plt
 
 from scipy.stats import t as tdist
 from ..glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
@@ -18,12 +19,13 @@
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
 def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
+    results=[]
     cls = lasso
-    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace']):
+    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
 
         inst, const = const_info
 
-        X, Y = inst(n=100, p=10, s=0)[:2]
+        X, Y, beta = inst(n=100, p=10, s=3)[:3]
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 1
@@ -63,9 +65,35 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         opt_sampler.setup_target(boot_target,
                                  form_covariances)
 
-        selective_pvalues = opt_sampler.coefficient_pvalues(unpenalized_mle, sample=S)
-        print("pvalues ", selective_pvalues)
-        selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
-        print(selective_CI)
+        sel_pivots = opt_sampler.coefficient_pvalues(unpenalized_mle, parameter = beta[selected_features], sample=S)
+        print("pivots ", sel_pivots)
+        results.append((sel_pivots,))
+        #selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
+        #print(selective_CI)
+
+    return results
+
+from statsmodels.distributions import ECDF
+
+
+def main(ndraw=10000, burnin=2000, nsim=2):
+
+    sel_pivots_all = [[],[]]
+    for i in range(nsim):
+        for idx, (sel_pivots,) in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
+            sel_pivots_all[idx].append(sel_pivots)
+
+    for idx in range(2):
+
+        fig = plt.figure(num=idx, figsize=(1,1))
+        plt.clf()
+        xval = np.linspace(0,1,50)
+        flat_list = [item for sublist in sel_pivots_all[idx] for item in sublist]
+        plt.plot(xval, ECDF(flat_list)(xval), label='selective')
+        plt.plot(xval, xval, 'k-', lw=1)
+        plt.legend(loc='lower right')
+
+
+    plt.show()
+
 
-        return selective_CI

From 063ce54a65bf5628d2d63d9077e7ed784b7972e9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 12 Sep 2017 10:47:21 -0700
Subject: [PATCH 227/617] seem to have broken conditional sampling by changing
 parameters

---
 selection/randomized/tests/test_sampling.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index f1f74f717..70cff56a6 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -92,7 +92,7 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     return np.concatenate((abs_beta_samples, u_samples), axis=1), reordered_omega
 
 def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
-    scale = np.linspace(1, 1.2, p)
+    scale = np.linspace(2, 3, p)
     X = np.identity(n)[:,:p]
     X *= scale[None, :]
 
@@ -130,12 +130,13 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
         X, Y, beta = orthogonal_design(n=100, 
                                        p=9, 
                                        s=3,
-                                       signal=(2,3), 
+                                       signal=(1,2), 
                                        sigma=1.2)[:3]
         n, p = X.shape
 
-        W = np.ones(X.shape[1]) * 1.2
-        randomizer_scale =1.
+        W = np.ones(X.shape[1]) * 3.1
+        W[0] = 0.
+        randomizer_scale = 1.
         conv = const(X, 
                      Y, 
                      W, 

From 8e67eae707751f4ec08d3975fe08ed010e3ef602 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 12 Sep 2017 11:37:53 -0700
Subject: [PATCH 228/617] renaming score to internal where appropriate

---
 selection/randomized/M_estimator.py           |  6 ++--
 selection/randomized/cv_view.py               |  2 +-
 selection/randomized/greedy_step.py           |  8 ++++--
 selection/randomized/query.py                 | 28 +++++++++----------
 .../randomized/tests/test_nonrandomized.py    |  8 +++---
 selection/randomized/tests/test_sampling.py   | 18 ++++++------
 selection/randomized/threshold_score.py       |  2 +-
 7 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 8e40ec4ff..90e1d6dd6 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -169,10 +169,10 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         _hessian = loss.hessian(beta_full)
         self._beta_full = beta_full
 
-        # observed state for score
+        # observed state for score in internal coordinates
 
-        self.observed_score_state = np.hstack([_beta_unpenalized * _sqrt_scaling,
-                                               -loss.smooth_objective(beta_full, 'grad')[inactive] / _sqrt_scaling])
+        self.observed_internal_state = np.hstack([_beta_unpenalized * _sqrt_scaling,
+                                                  -loss.smooth_objective(beta_full, 'grad')[inactive] / _sqrt_scaling])
 
         # form linear part
         self.num_opt_var = self.observed_opt_state.shape[0]
diff --git a/selection/randomized/cv_view.py b/selection/randomized/cv_view.py
index 3baca0928..35c62a8f6 100644
--- a/selection/randomized/cv_view.py
+++ b/selection/randomized/cv_view.py
@@ -52,7 +52,7 @@ def solve(self, glmnet=False, K=5):
 
         if (self.scale1 is not None) and (self.scale2 is not None):
             self.SD = self.SD+self.scale1**2+self.scale2**2
-        (self.observed_opt_state, self.observed_score_state) = (CVR_val, CV1_val)
+        (self.observed_opt_state, self.observed_internal_state) = (CVR_val, CV1_val)
         self.num_opt_var = self.lam_seq.shape[0]
         self.lam_idx = list(self.lam_seq).index(self.lam_CVR)  # index of the minimizer
 
diff --git a/selection/randomized/greedy_step.py b/selection/randomized/greedy_step.py
index e134f3b6c..896616a91 100644
--- a/selection/randomized/greedy_step.py
+++ b/selection/randomized/greedy_step.py
@@ -74,20 +74,22 @@ def solve(self, nboot=2000):
             
         # score at unpenalized M-estimator
 
-        self.observed_score_state = - self.loss.smooth_objective(beta_full, 'grad')[candidate]
+        self.observed_internal_state = - self.loss.smooth_objective(beta_full, 'grad')[candidate]
         self._randomZ = self.randomization.sample()
 
         self.num_opt_var = self._randomZ.shape[0]
 
         # find the randomized maximizer
 
-        randomized_score = self.observed_score_state - self._randomZ
+        # score transform is identity here so internal is the same as score coords
+
+        randomized_score = self.observed_internal_state - self._randomZ
         terms = self.group_lasso_dual.terms(randomized_score)
 
         # assuming a.s. unique maximizing group here
 
         maximizing_group = np.unique(self.group_lasso_dual.groups)[np.argmax(terms)]
-        maximizing_subgrad = self.observed_score_state[self.group_lasso_dual.groups == maximizing_group]
+        maximizing_subgrad = self.observed_internal_state[self.group_lasso_dual.groups == maximizing_group]
         maximizing_subgrad /= np.linalg.norm(maximizing_subgrad) # this is now a unit vector
         maximizing_subgrad *= self.group_lasso_dual.weights[maximizing_group] # now a vector of length given by weight of maximizing group
         self.maximizing_subgrad = np.zeros(candidate.sum())
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index dca12eac6..47d3e94f6 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -50,7 +50,7 @@ def linear_decomposition(self, target_score_cov, target_cov, observed_target_sta
 
         linear_part = target_score_cov.T.dot(np.linalg.pinv(target_cov))
 
-        offset = self.observed_score_state - linear_part.dot(observed_target_state)
+        offset = self.observed_internal_state - linear_part.dot(observed_target_state)
 
         # now compute the composition of this map with
         # self.score_transform
@@ -102,7 +102,7 @@ def setup_sampler(self):
         Setup query to prepare for sampling.
         Should set a few key attributes:
 
-            - observed_score_state
+            - observed_internal_state
             - num_opt_var
             - observed_opt_state
             - opt_transform
@@ -347,12 +347,12 @@ def __init__(self,
         # We implicitly assume that we are sampling a target
         # independent of the data in each view
 
-        self.observed_score = [] # in the view's coordinates
+        self.observed_internal = [] # in the view's coordinates
         self.score_info = []
         for i in range(self.nqueries):
             obj = self.objectives[i]
             score_linear, score_offset = obj.score_transform
-            self.observed_score.append(obj.observed_score_state)
+            self.observed_internal.append(obj.observed_internal_state)
             self.score_info.append(obj.score_transform)
 
     def projection(self, state):
@@ -388,7 +388,7 @@ def gradient(self, state):
         for i in range(self.nqueries):
             opt_linear, opt_offset = self.objectives[i].opt_transform
             opt_grad[self.opt_slice[i]] = \
-                opt_linear.T.dot(self.objectives[i].grad_log_density(self.observed_score[i], opt_state[self.opt_slice[i]]))
+                opt_linear.T.dot(self.objectives[i].grad_log_density(self.observed_internal[i], opt_state[self.opt_slice[i]]))
         return -opt_grad
 
     def sample(self, ndraw, burnin, stepsize=None):
@@ -687,6 +687,7 @@ def log_density(self, internal_state, opt_state):
 
         for i in range(self.nqueries):
             log_dens = self.objectives[i].log_density
+            print(internal_state[i].shape, 'internal')
             value += log_dens(internal_state[i], opt_state[:, self.opt_slice[i]]) # may have to broadcast shape here
         return np.squeeze(value)
 
@@ -697,7 +698,7 @@ def __init__(self,
                  opt_sample,
                  observed):
 
-        self._logden = opt_sampler.log_density(opt_sampler.observed_score, opt_sample)
+        self._logden = opt_sampler.log_density(opt_sampler.observed_internal, opt_sample)
 
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
@@ -735,12 +736,11 @@ def pivot(self,
         for i in range(len(self.opt_sampler.objectives)):
             cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
 
-            cur_nuisance = self.opt_sampler.observed_score[i] - cur_score_cov * observed_stat / target_cov
-
             # cur_nuisance is in the view's internal coordinates
+            cur_nuisance = self.opt_sampler.observed_internal[i] - cur_score_cov * observed_stat / target_cov
+
             score_linear, score_offset = self.opt_sampler.score_info[i]
-            # final_nuisance is on the scale of the original randomization
-            final_nuisance = score_linear.dot(cur_nuisance) + score_offset
+
             nuisance.append(cur_nuisance)
 
             score_cov.append(cur_score_cov / target_cov)
@@ -748,8 +748,7 @@ def pivot(self,
 
         weights = self._weights(sample_stat + candidate,  # normal sample under candidate
                                 nuisance,                 # nuisance sufficient stats for each view
-                                score_cov,                # points will be moved like sample * score_cov
-                                self.opt_sampler.log_densities)
+                                score_cov)                # points will be moved like sample * score_cov
         
         pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
 
@@ -788,15 +787,14 @@ def _rootL(gamma):
     def _weights(self, 
                  sample_stat,
                  nuisance,
-                 score_cov,
-                 log_densities):
+                 score_cov):
 
         # Here we should loop through the views
         # and move the score of each view 
         # for each projected (through linear_func) normal sample
         # using the linear decomposition
 
-        # We need access to the map that takes observed_score for each view
+        # We need access to the map that takes observed_internal for each view
         # and constructs the full randomization -- this is the reconstruction map
         # for each view
 
diff --git a/selection/randomized/tests/test_nonrandomized.py b/selection/randomized/tests/test_nonrandomized.py
index a1da8b4ae..a009ee409 100644
--- a/selection/randomized/tests/test_nonrandomized.py
+++ b/selection/randomized/tests/test_nonrandomized.py
@@ -42,7 +42,7 @@ def test_nonrandomized(s=0,
     if nactive == 0:
         return None
 
-    #score_mean = M_est.observed_score_state.copy()
+    #score_mean = M_est.observed_internal_state.copy()
     #score_mean[nactive:] = 0
     M_est.setup_sampler(score_mean = np.zeros(p))
     #M_est.setup_sampler(score_mean=score_mean)
@@ -51,10 +51,10 @@ def test_nonrandomized(s=0,
     if set(nonzero).issubset(np.nonzero(active)[0]):
         check_screen=True
         #test_stat = lambda x: np.linalg.norm(x)
-        #return M_est.hypothesis_test(test_stat, test_stat(M_est.observed_score_state), stepsize=1./p)
+        #return M_est.hypothesis_test(test_stat, test_stat(M_est.observed_internal_state), stepsize=1./p)
 
-        ci = M_est.confidence_intervals(M_est.observed_score_state)
-        pivots = M_est.coefficient_pvalues(M_est.observed_score_state)
+        ci = M_est.confidence_intervals(M_est.observed_internal_state)
+        pivots = M_est.coefficient_pvalues(M_est.observed_internal_state)
         def coverage(LU):
             L, U = LU[:, 0], LU[:, 1]
             covered = np.zeros(nactive)
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 70cff56a6..6ad4f79af 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -62,14 +62,14 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     for i in range(nactive):
         var = active_set[i]
         if signs[var]>0:
-            lower[i] = (-X[:, var].T.dot(y) + lam * signs[var])
+            lower[i] = (-X[:, var].T.dot(y) + lam[var] * signs[var])
             upper[i] = np.inf
         else:
             lower[i] = -np.inf
-            upper[i] = (-X[:,var].T.dot(y) + lam * signs[var]) 
+            upper[i] = (-X[:,var].T.dot(y) + lam[var] * signs[var]) 
 
-    lower[range(nactive, p)] = -lam - X[:, inactive_set].T.dot(y)
-    upper[range(nactive, p)] = lam - X[:, inactive_set].T.dot(y)
+    lower[range(nactive, p)] = -lam[inactive_set] - X[:, inactive_set].T.dot(y)
+    upper[range(nactive, p)] = lam[inactive_set] - X[:, inactive_set].T.dot(y)
 
     omega_samples = sampling_truncated_dist(lower, 
                                             upper, 
@@ -79,7 +79,7 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     abs_beta_samples = np.true_divide( 
                           omega_samples[:, :nactive] + 
                           X[:, active_set].T.dot(y) - 
-                          lam * signs[active_set], 
+                          lam[active_set] * signs[active_set], 
                           (epsilon + Xdiag[active_set]) * signs[active_set])
     u_samples = omega_samples[:, nactive:] + X[:, inactive_set].T.dot(y)
 
@@ -134,8 +134,8 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
                                        sigma=1.2)[:3]
         n, p = X.shape
 
-        W = np.ones(X.shape[1]) * 3.1
-        W[0] = 0.
+        W = np.linspace(2, 3, X.shape[1])
+        #W[0] = 0
         randomizer_scale = 1.
         conv = const(X, 
                      Y, 
@@ -175,7 +175,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
                                       Y, 
                                       selected_features, 
                                       signs, 
-                                      W[0], 
+                                      W, 
                                       conv.ridge_term, 
                                       randomizer, 
                                       nsamples=ndraw)
@@ -212,7 +212,7 @@ def reconstruct_opt(opt_sampler, state):
 
     for i in range(opt_sampler.nqueries):
         reconstructed[:,opt_sampler.randomization_slice[i]] = reconstruct_full_from_internal(opt_sampler.objectives[i],  
-                                                                                             opt_sampler.observed_score[i],
+                                                                                             opt_sampler.observed_internal[i],
                                                                                              state[:,opt_sampler.opt_slice[i]])
 
 
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index e6f088613..8e58b39f3 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -116,7 +116,7 @@ def solve(self, nboot=2000):
 
         self.interior = ~self.boundary
 
-        self.observed_score_state = candidate_score
+        self.observed_internal_state = candidate_score
 
         self.selection_variable = {'boundary_set': self.boundary}
 

From 4130221d7271e80599b43dbf0ee036ac69d1192e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 12 Sep 2017 12:13:53 -0700
Subject: [PATCH 229/617] looking at exact zero penalty and close to zero
 penalty

---
 doc/examples/conditional_sampling.py        |  4 +--
 selection/randomized/tests/test_sampling.py | 28 ++++++++++++---------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/doc/examples/conditional_sampling.py b/doc/examples/conditional_sampling.py
index 16bbf499c..efd7d6779 100644
--- a/doc/examples/conditional_sampling.py
+++ b/doc/examples/conditional_sampling.py
@@ -9,14 +9,14 @@
 
 from selection.randomized.tests.test_sampling import test_conditional_law
 
-def main(ndraw=50000, burnin=5000, remove_atom=False, which='omega'):
+def main(ndraw=50000, burnin=5000, remove_atom=False, unpenalized=True):
 
     fig_idx = 0
     for (rand,
          mcmc_opt, 
          mcmc_omega,
          truncated_opt,
-         truncated_omega) in test_conditional_law(ndraw=ndraw, burnin=burnin, stepsize=1.e-2):
+         truncated_omega) in test_conditional_law(ndraw=ndraw, burnin=burnin, stepsize=1.e-2, unpenalized=unpenalized):
 
         fig_idx += 1
         fig = plt.figure(num=fig_idx, figsize=(8,8))
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 6ad4f79af..cc4338b51 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -54,23 +54,26 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     Xdiag = np.diag(X.T.dot(X))
     p = X.shape[1]
     nactive = active.sum()
-    lower = np.zeros(p)
-    upper = np.zeros(p)
+    lower = -np.ones(p) * np.inf
+    upper = -lower
     active_set = np.where(active)[0]
     inactive_set = np.where(~active)[0]
 
     for i in range(nactive):
         var = active_set[i]
-        if signs[var]>0:
-            lower[i] = (-X[:, var].T.dot(y) + lam[var] * signs[var])
-            upper[i] = np.inf
-        else:
-            lower[i] = -np.inf
-            upper[i] = (-X[:,var].T.dot(y) + lam[var] * signs[var]) 
+        if lam[var] != 0:
+            if signs[var]>0:
+                    lower[i] = (-X[:, var].T.dot(y) + lam[var] * signs[var])
+                    upper[i] = np.inf
+            else:
+                lower[i] = -np.inf
+                upper[i] = (-X[:,var].T.dot(y) + lam[var] * signs[var]) 
 
     lower[range(nactive, p)] = -lam[inactive_set] - X[:, inactive_set].T.dot(y)
     upper[range(nactive, p)] = lam[inactive_set] - X[:, inactive_set].T.dot(y)
 
+    print(lower, 'lower')
+    print(upper, 'upper')
     omega_samples = sampling_truncated_dist(lower, 
                                             upper, 
                                             randomization, 
@@ -115,7 +118,7 @@ def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
 
 @set_seed_iftrue(SET_SEED, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None):
+def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None, unpenalized=False):
     """
     Checks the conditional law of opt variables given the data
     """
@@ -135,7 +138,10 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
         n, p = X.shape
 
         W = np.linspace(2, 3, X.shape[1])
-        #W[0] = 0
+        if unpenalized:
+            W[4] = 0
+        else:
+            W[4] = 1.e-5
         randomizer_scale = 1.
         conv = const(X, 
                      Y, 
@@ -157,8 +163,6 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
 
         selected_features = conv._view.selection_variable['variables']
 
-        conv._queries.setup_sampler(form_covariances=None)
-        conv._queries.setup_opt_state()
         opt_sampler = optimization_sampler(conv._queries)
 
         S = opt_sampler.sample(ndraw,

From 3a0c9559799293d5406068c7854a31e125c419ad Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Tue, 12 Sep 2017 16:33:30 -0700
Subject: [PATCH 230/617] pivot bug

---
 selection/randomized/query.py                 |  4 +-
 .../tests/test_opt_weighted_intervals.py      | 46 +++++++++++++++----
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 47d3e94f6..951ba93e7 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -750,7 +750,7 @@ def pivot(self,
                                 nuisance,                 # nuisance sufficient stats for each view
                                 score_cov)                # points will be moved like sample * score_cov
         
-        pivot = np.mean((sample_stat <= observed_stat) * weights) / np.mean(weights)
+        pivot = np.mean((sample_stat + candidate <= observed_stat) * weights) / np.mean(weights)
 
         if alternative == 'twosided':
             return 2 * min(pivot, 1 - pivot)
@@ -806,7 +806,7 @@ def _weights(self,
         # In this function, \hat{\theta}_i will change with the Monte Carlo sample
 
         internal_sample = []
-        for i in range(len(log_densities)):
+        for i in range(len(self.opt_sampler.log_densities)):
             internal_sample.append(np.multiply.outer(sample_stat, score_cov[i]) + nuisance[i][None, :]) # these are now internal coordinates
         _lognum = self.opt_sampler.log_density(internal_sample, self.opt_sample)
         _logratio = _lognum - self._logden
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index cf2c72337..348b3f018 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -9,21 +9,23 @@
                                poisson_instance)
 from ...tests.flags import SMALL_SAMPLES
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+import matplotlib.pyplot as plt
 
 from scipy.stats import t as tdist
 from ..glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
 from ..M_estimator import restricted_Mest
 
-@set_seed_iftrue(True, 200)
+@set_seed_iftrue(False, 200)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
 def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
+    results=[]
     cls = lasso
-    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace']):
+    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
 
         inst, const = const_info
 
-        X, Y = inst(n=100, p=10, s=0)[:2]
+        X, Y, beta = inst(n=100, p=10, s=3, signal=5.)[:3]
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 1
@@ -63,9 +65,37 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         opt_sampler.setup_target(boot_target,
                                  form_covariances)
 
-        selective_pvalues = opt_sampler.coefficient_pvalues(unpenalized_mle, sample=S)
-        print("pvalues ", selective_pvalues)
-        selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
-        print(selective_CI)
+        sel_pivots = opt_sampler.coefficient_pvalues(unpenalized_mle, parameter = beta[selected_features], sample=S)
+        print("pivots ", sel_pivots)
+        results.append((rand, sel_pivots,))
+        #selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
+        #print(selective_CI)
+
+    return results
+
+from statsmodels.distributions import ECDF
+
+
+def main(ndraw=10000, burnin=2000, nsim=10):
+
+    sel_pivots_all = [[],[]]
+    rand_all = []
+    for i in range(nsim):
+        for idx, (rand, sel_pivots,) in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
+            sel_pivots_all[idx].append(sel_pivots)
+            if i==0:
+               rand_all.append(rand)
+    xval = np.linspace(0, 1, 200)
+    print(rand_all)
+
+    for idx in range(2):
+        fig = plt.figure(num=idx, figsize=(8,8))
+        plt.clf()
+        flat_list = [item for sublist in sel_pivots_all[idx] for item in sublist]
+        print(len(flat_list))
+        plt.plot(xval, ECDF(flat_list)(xval), label='selective')
+        plt.plot(xval, xval, 'k-', lw=1)
+        plt.legend(loc='lower right')
+        plt.savefig(''.join(["fig", rand_all[idx], '.pdf']))
+
 
-        return selective_CI

From d865a47e570bd1372f39c7a2a0f95c024239426c Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Tue, 12 Sep 2017 16:48:26 -0700
Subject: [PATCH 231/617] better plot fn

---
 selection/randomized/tests/test_opt_weighted_intervals.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index a9fa2cd25..6640b01f0 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -80,17 +80,20 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
 def main(ndraw=10000, burnin=2000, nsim=10):
 
-    sel_pivots_all = [[],[]]
+    sel_pivots_all = list()
     rand_all = []
     for i in range(nsim):
         for idx, (rand, sel_pivots,) in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
+            print(idx)
+            if i==0:
+                sel_pivots_all.append([])
             sel_pivots_all[idx].append(sel_pivots)
             if i==0:
                rand_all.append(rand)
     xval = np.linspace(0, 1, 200)
     print(rand_all)
 
-    for idx in range(2):
+    for idx in range(len(rand_all)):
         fig = plt.figure(num=idx, figsize=(8,8))
         plt.clf()
         flat_list = [item for sublist in sel_pivots_all[idx] for item in sublist]

From 21e2deebb218dcfdedb6b4139d89812e8f532a67 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Tue, 12 Sep 2017 17:18:26 -0700
Subject: [PATCH 232/617] computing ci coverage

---
 .../tests/test_opt_weighted_intervals.py      | 41 ++++++++++++-------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 6640b01f0..47fad799b 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -28,7 +28,7 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         X, Y, beta = inst(n=100, p=10, s=3, signal=5.)[:3]
         n, p = X.shape
 
-        W = np.ones(X.shape[1]) * 1
+        W = np.ones(X.shape[1]) * 8
         conv = const(X, Y, W, randomizer=rand)
         signs = conv.fit()
         print("signs", signs)
@@ -40,6 +40,8 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
         selected_features = conv._view.selection_variable['variables']
 
+        #if not set(np.where(beta)[0]).issubset(set(np.where(selected_features)[0])):
+        #    return None
         #conv.summary(selected_features,
         #             ndraw=ndraw,
         #             burnin=burnin,
@@ -67,38 +69,49 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
         sel_pivots = opt_sampler.coefficient_pvalues(unpenalized_mle, parameter = beta[selected_features], sample=S)
         print("pivots ", sel_pivots)
-        results.append((rand, sel_pivots,))
-
-        #selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
-        #print(selective_CI)
+        selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
+        print(selective_CI)
+        results.append((rand, sel_pivots,selective_CI, beta[selected_features]))
 
     return results
 
+
 from statsmodels.distributions import ECDF
 
+def compute_coverage(sel_ci, true_vec):
+    nactive = true_vec.shape[0]
+    coverage = np.zeros(nactive)
+    for i in range(nactive):
+        if true_vec[i]>=sel_ci[i,0] and true_vec[i]<=sel_ci[i,1]:
+            coverage[i]=1
+    return coverage
 
 
-def main(ndraw=10000, burnin=2000, nsim=10):
+def main(ndraw=20000, burnin=5000, nsim=10):
 
     sel_pivots_all = list()
+    sel_ci_all = list()
     rand_all = []
     for i in range(nsim):
-        for idx, (rand, sel_pivots,) in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
-            print(idx)
+        for idx, (rand, sel_pivots, sel_ci, true_vec) in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
             if i==0:
                 sel_pivots_all.append([])
+                rand_all.append(rand)
+                sel_ci_all.append([])
             sel_pivots_all[idx].append(sel_pivots)
-            if i==0:
-               rand_all.append(rand)
+            sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
+
     xval = np.linspace(0, 1, 200)
-    print(rand_all)
 
     for idx in range(len(rand_all)):
         fig = plt.figure(num=idx, figsize=(8,8))
         plt.clf()
-        flat_list = [item for sublist in sel_pivots_all[idx] for item in sublist]
-        print(len(flat_list))
-        plt.plot(xval, ECDF(flat_list)(xval), label='selective')
+        sel_pivots_all[idx] = [item for sublist in sel_pivots_all[idx] for item in sublist]
+        plt.plot(xval, ECDF(sel_pivots_all[idx])(xval), label='selective')
         plt.plot(xval, xval, 'k-', lw=1)
         plt.legend(loc='lower right')
+
+        sel_ci_all[idx] = [item for sublist in sel_ci_all[idx] for item in sublist]
+        plt.title(''.join(["coverage ", str(np.mean(sel_ci_all[idx]))]))
         plt.savefig(''.join(["fig", rand_all[idx], '.pdf']))
+

From 8d90fed4a562894b77d78f1d0bddacd6db543c62 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Wed, 13 Sep 2017 10:43:22 -0700
Subject: [PATCH 233/617] summary in conv

---
 selection/randomized/convenience.py           | 42 ++++++++++-------
 .../tests/test_opt_weighted_intervals.py      | 45 +++++--------------
 2 files changed, 37 insertions(+), 50 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 21b5b40e7..d692be287 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -10,9 +10,12 @@
 from .glm import (target as glm_target, 
                   glm_group_lasso,
                   glm_greedy_step,
-                  glm_threshold_score)
+                  glm_threshold_score,
+                  glm_nonparametric_bootstrap,
+                  pairs_bootstrap_glm)
 from .randomization import randomization
-from .query import multiple_queries
+from .query import multiple_queries, optimization_sampler
+from .M_estimator import restricted_Mest
 
 class lasso(object):
 
@@ -201,27 +204,32 @@ def summary(self, selected_features,
         if not hasattr(self, "_queries"):
             raise ValueError('run `fit` method before producing summary.')
 
-        target_sampler, target_observed = glm_target(self.loglike,
-                                                     selected_features,
-                                                     self._queries,
-                                                     bootstrap=bootstrap)
-
         if null_value is None:
             null_value = np.zeros(self.loglike.shape[0])
 
+        self._queries.setup_sampler(form_covariances=None)
+        self._queries.setup_opt_state()
+        opt_sampler = optimization_sampler(self._queries)
+
+        S = opt_sampler.sample(ndraw,
+                               burnin,
+                               stepsize=1.e-3)
+        # print(S.shape)
+        # print([np.mean(S[:,i]) for i in range(p)])
+
+        unpenalized_mle = restricted_Mest(self.loglike, selected_features)
+        n = self.loglike.data[0].shape[0]
+        form_covariances = glm_nonparametric_bootstrap(n, n)
+        # conv._queries.setup_sampler(form_covariances)
+        boot_target, boot_target_observed = pairs_bootstrap_glm(self.loglike, selected_features, inactive=None)
+        opt_sampler.setup_target(boot_target, form_covariances)
+
+        pvalues = opt_sampler.coefficient_pvalues(unpenalized_mle, parameter=null_value, sample=S)
         intervals = None
-        full_sample = target_sampler.sample(ndraw=ndraw,
-                                            burnin=burnin,
-                                            keep_opt=False)
-        pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                     parameter=null_value,
-                                                     sample=full_sample)
         if compute_intervals:
-            intervals = target_sampler.confidence_intervals(target_observed,
-                                                            sample=full_sample,
-                                                            level=level)
+            intervals = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
 
-        return intervals, pvalues
+        return pvalues, intervals
 
     @staticmethod
     def gaussian(X, 
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 47fad799b..7b97161c9 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -19,7 +19,7 @@
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=1000, burnin=100)
 def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
-    results=[]
+    results = []
     cls = lasso
     for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
 
@@ -37,41 +37,18 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         #marginalizing_groups[:int(p/2)] = True
         #conditioning_groups = ~marginalizing_groups
         #conditioning_groups[-int(p/4):] = False
+        #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+        #                           conditioning_groups=conditioning_groups)
 
         selected_features = conv._view.selection_variable['variables']
 
-        #if not set(np.where(beta)[0]).issubset(set(np.where(selected_features)[0])):
-        #    return None
-        #conv.summary(selected_features,
-        #             ndraw=ndraw,
-        #             burnin=burnin,
-        #             compute_intervals=True)
-
-        #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
-        #                           conditioning_groups=conditioning_groups)
+        sel_pivots, sel_ci = conv.summary(selected_features,
+                                          null_value=beta[selected_features],
+                                          ndraw=ndraw,
+                                          burnin=burnin,
+                                          compute_intervals=True)
 
-        conv._queries.setup_sampler(form_covariances=None)
-        conv._queries.setup_opt_state()
-        opt_sampler = optimization_sampler(conv._queries)
-
-        S = opt_sampler.sample(ndraw,
-                               burnin,
-                               stepsize=1.e-3)
-        #print(S.shape)
-        #print([np.mean(S[:,i]) for i in range(p)])
-
-        unpenalized_mle = restricted_Mest(conv.loglike, selected_features)
-        form_covariances = glm_nonparametric_bootstrap(n, n)
-        #conv._queries.setup_sampler(form_covariances)
-        boot_target, boot_target_observed = pairs_bootstrap_glm(conv.loglike, selected_features, inactive=None)
-        opt_sampler.setup_target(boot_target,
-                                 form_covariances)
-
-        sel_pivots = opt_sampler.coefficient_pvalues(unpenalized_mle, parameter = beta[selected_features], sample=S)
-        print("pivots ", sel_pivots)
-        selective_CI = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
-        print(selective_CI)
-        results.append((rand, sel_pivots,selective_CI, beta[selected_features]))
+        results.append((rand, sel_pivots, sel_ci, beta[selected_features]))
 
     return results
 
@@ -87,7 +64,7 @@ def compute_coverage(sel_ci, true_vec):
     return coverage
 
 
-def main(ndraw=20000, burnin=5000, nsim=10):
+def main(ndraw=20000, burnin=5000, nsim=2):
 
     sel_pivots_all = list()
     sel_ci_all = list()
@@ -99,6 +76,7 @@ def main(ndraw=20000, burnin=5000, nsim=10):
                 rand_all.append(rand)
                 sel_ci_all.append([])
             sel_pivots_all[idx].append(sel_pivots)
+            print(sel_ci)
             sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
 
     xval = np.linspace(0, 1, 200)
@@ -112,6 +90,7 @@ def main(ndraw=20000, burnin=5000, nsim=10):
         plt.legend(loc='lower right')
 
         sel_ci_all[idx] = [item for sublist in sel_ci_all[idx] for item in sublist]
+        print(sel_ci_all)
         plt.title(''.join(["coverage ", str(np.mean(sel_ci_all[idx]))]))
         plt.savefig(''.join(["fig", rand_all[idx], '.pdf']))
 

From 39959cc3e4eeea4f569fbdb56ba0c442b88a6f31 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Wed, 13 Sep 2017 11:55:03 -0700
Subject: [PATCH 234/617] parametric cov runs but bad coverage

---
 selection/randomized/convenience.py           | 62 +++++++++++--------
 selection/randomized/glm.py                   |  1 +
 selection/randomized/query.py                 |  3 +-
 .../tests/test_opt_weighted_intervals.py      |  2 +-
 selection/randomized/tests/test_sampling.py   |  1 +
 5 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index d692be287..2b15ed39b 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -9,9 +9,11 @@
 
 from .glm import (target as glm_target, 
                   glm_group_lasso,
+                  glm_group_lasso_parametric,
                   glm_greedy_step,
                   glm_threshold_score,
                   glm_nonparametric_bootstrap,
+                  glm_parametric_covariance,
                   pairs_bootstrap_glm)
 from .randomization import randomization
 from .query import multiple_queries, optimization_sampler
@@ -40,7 +42,7 @@ def __init__(self,
                  ridge_term,
                  randomizer_scale,
                  randomizer='gaussian',
-                 covariance_estimator=None):
+                 parametric_cov_estimator=False):
         r"""
 
         Create a new post-selection object for the LASSO problem
@@ -88,7 +90,7 @@ def __init__(self,
             feature_weights = np.ones(loglike.shape) * feature_weights
         self.feature_weights = np.asarray(feature_weights)
 
-        self.covariance_estimator = covariance_estimator
+        self.parametric_cov_estimator = parametric_cov_estimator
 
         if randomizer == 'laplace':
             self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
@@ -125,7 +127,10 @@ def fit(self, solve_args={'tol':1.e-12, 'min_its':50},
         """
 
         p = self.nfeature
-        self._view = glm_group_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
+        if self.parametric_cov_estimator==True:
+            self._view = glm_group_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
+        else:
+            self._view = glm_group_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
         self._view.solve(nboot=nboot)
 
         views = copy(views); views.append(self._view)
@@ -167,13 +172,14 @@ def decompose_subgradient(self,
 
         self._queries.setup_opt_state()
 
-    def summary(self, selected_features, 
+    def summary(self,
+                selected_features,
                 null_value=None,
                 level=0.9,
                 ndraw=10000, 
                 burnin=2000,
                 compute_intervals=False,
-                bootstrap=False):
+                bootstrap_sampler=False):
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
@@ -214,15 +220,18 @@ def summary(self, selected_features,
         S = opt_sampler.sample(ndraw,
                                burnin,
                                stepsize=1.e-3)
-        # print(S.shape)
-        # print([np.mean(S[:,i]) for i in range(p)])
 
         unpenalized_mle = restricted_Mest(self.loglike, selected_features)
-        n = self.loglike.data[0].shape[0]
-        form_covariances = glm_nonparametric_bootstrap(n, n)
-        # conv._queries.setup_sampler(form_covariances)
-        boot_target, boot_target_observed = pairs_bootstrap_glm(self.loglike, selected_features, inactive=None)
-        opt_sampler.setup_target(boot_target, form_covariances)
+        if self.parametric_cov_estimator == False:
+            n = self.loglike.data[0].shape[0]
+            form_covariances = glm_nonparametric_bootstrap(n, n)
+            boot_target, boot_target_observed = pairs_bootstrap_glm(self.loglike, selected_features, inactive=None)
+            target_info = boot_target
+        else:
+            target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
+            form_covariances = glm_parametric_covariance(self.loglike)
+
+        opt_sampler.setup_target(target_info, form_covariances, parametric=self.parametric_cov_estimator)
 
         pvalues = opt_sampler.coefficient_pvalues(unpenalized_mle, parameter=null_value, sample=S)
         intervals = None
@@ -235,8 +244,8 @@ def summary(self, selected_features,
     def gaussian(X, 
                  Y, 
                  feature_weights, 
-                 sigma=1., 
-                 covariance_estimator=None,
+                 sigma=1.,
+                 parametric_cov_estimator=False,
                  quadratic=None,
                  ridge_term=None,
                  randomizer_scale=None,
@@ -308,8 +317,8 @@ def gaussian(X,
         the unpenalized estimator.
 
         """
-        if covariance_estimator is not None:
-            sigma = 1.
+
+        sigma = 1.
         loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
         n, p = X.shape
 
@@ -322,14 +331,14 @@ def gaussian(X,
 
         return lasso(loglike, np.asarray(feature_weights) / sigma**2,
                      ridge_term, randomizer_scale, randomizer=randomizer,
-                     covariance_estimator=covariance_estimator) # XXX: do we use the covariance_estimator?
+                     parametric_cov_estimator=parametric_cov_estimator) # XXX: do we use the covariance_estimator?
 
     @staticmethod
     def logistic(X, 
                  successes, 
                  feature_weights, 
-                 trials=None, 
-                 covariance_estimator=None,
+                 trials=None,
+                 parametric_cov_estimator=False,
                  quadratic=None,
                  ridge_term=None,
                  randomizer='gaussian',
@@ -417,15 +426,15 @@ def logistic(X,
         return lasso(loglike, feature_weights, 
                      ridge_term, 
                      randomizer_scale,
-                     covariance_estimator=covariance_estimator,
+                     parametric_cov_estimator=parametric_cov_estimator,
                      randomizer=randomizer)
 
     @staticmethod
     def coxph(X, 
               times, 
               status, 
-              feature_weights, 
-              covariance_estimator=None,
+              feature_weights,
+              parametric_cov_estimator=False,
               quadratic=None,
               ridge_term=None,
               randomizer='gaussian',
@@ -514,13 +523,13 @@ def coxph(X,
                      ridge_term,
                      randomizer_scale, 
                      randomizer=randomizer,
-                     covariance_estimator=covariance_estimator)
+                     parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def poisson(X, 
                 counts, 
-                feature_weights, 
-                covariance_estimator=None,
+                feature_weights,
+                parametric_cov_estimator=False,
                 quadratic=None,
                 ridge_term=None,
                 randomizer_scale=None,
@@ -605,7 +614,7 @@ def poisson(X,
                      ridge_term,
                      randomizer_scale, 
                      randomizer=randomizer,
-                     covariance_estimator=covariance_estimator)
+                     parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def sqrt_lasso(X, 
@@ -799,6 +808,7 @@ def sqrt_lasso(X,
 
         return L
 
+
 class step(lasso):
 
     r"""
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index baa0a73d6..f151867c4 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -712,6 +712,7 @@ def _WQ(active):
 
     return covariances
 
+
 def glm_parametric_covariance(glm_loss, solve_args={'min_its':50, 'tol':1.e-10}):
     """
     A constructor for parametric covariance
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 951ba93e7..e9d976069 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -450,12 +450,13 @@ def setup_target(self,
         for i in range(self.nqueries):
             view = self.objectives[i]
             self.log_densities.append(view.log_density)
-            score_info = view.setup_sampler(form_covariances)
             if parametric == False:
+                score_info = view.setup_sampler(form_covariances)
                 target_cov, cross_cov = form_covariances(target_info,  
                                                          cross_terms=[score_info],
                                                          nsample=self.nboot[i])
             else:
+                score_info = view.setup_sampler()
                 target_cov, cross_cov = form_covariances(target_info, 
                                                          cross_terms=[score_info])
 
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 7b97161c9..2f1ccc8cd 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -29,7 +29,7 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 8
-        conv = const(X, Y, W, randomizer=rand)
+        conv = const(X, Y, W, randomizer=rand, parametric_cov_estimator=True)
         signs = conv.fit()
         print("signs", signs)
 
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index cc4338b51..1f5fbfd11 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -94,6 +94,7 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
 
     return np.concatenate((abs_beta_samples, u_samples), axis=1), reordered_omega
 
+
 def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
     scale = np.linspace(2, 3, p)
     X = np.identity(n)[:,:p]

From 47535f8112f045f707d46629fc6bd9dae420dd56 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Wed, 13 Sep 2017 16:39:34 -0700
Subject: [PATCH 235/617] parametric cov added sigmas est

---
 selection/randomized/convenience.py                   |  2 +-
 selection/randomized/glm.py                           | 11 +++++++++--
 selection/randomized/query.py                         |  5 +++--
 .../randomized/tests/test_opt_weighted_intervals.py   |  7 ++++---
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 2b15ed39b..bc1683781 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -317,8 +317,8 @@ def gaussian(X,
         the unpenalized estimator.
 
         """
-
         sigma = 1.
+
         loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
         n, p = X.shape
 
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index f151867c4..9c49ef631 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -697,7 +697,10 @@ def _WQ(active):
     XW_T = W_T[:, None] * X_T
     Q_T_inv = np.linalg.inv(X_T.T.dot(XW_T))
 
-    covariances = [linear_func.dot(Q_T_inv).dot(linear_funcT)]
+    beta_T = restricted_Mest(glm_loss, target, solve_args=solve_args)
+    sigma_T = np.sqrt(np.sum((Y-glm_loss.saturated_loss.mean_function(X_T.dot(beta_T)))**2)/(n-np.sum(target)))
+
+    covariances = [linear_func.dot(Q_T_inv).dot(linear_funcT)* (sigma_T **2)]
 
     for cross in cross_terms:
         # the covariances are for (\bar{\beta}_{C}, N_C) -- C for cross
@@ -708,8 +711,12 @@ def _WQ(active):
         null_block = X_IT.dot(XW_T) - X_IT.dot(W_T[:, None] * X_C).dot(Q_C_inv).dot(X[:, cross].T.dot(XW_T))
         null_block = null_block.dot(Q_T_inv)
 
-        covariances.append(np.vstack([beta_block, null_block]).dot(linear_funcT).T)
+        beta_C = restricted_Mest(glm_loss, cross, solve_args=solve_args)
+        sigma_C = np.sqrt(np.sum((Y - glm_loss.saturated_loss.mean_function(X_C.dot(beta_C))) ** 2) / (n - np.sum(cross)))
+
+        covariances.append(np.vstack([beta_block, null_block]).dot(linear_funcT).T * sigma_T * sigma_C)
 
+    print(len(covariances))
     return covariances
 
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index e9d976069..b0187f707 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -457,7 +457,8 @@ def setup_target(self,
                                                          nsample=self.nboot[i])
             else:
                 score_info = view.setup_sampler()
-                target_cov, cross_cov = form_covariances(target_info, 
+                print(score_info)
+                target_cov, cross_cov = form_covariances(target_info,
                                                          cross_terms=[score_info])
 
             target_cov_sum += target_cov
@@ -688,7 +689,7 @@ def log_density(self, internal_state, opt_state):
 
         for i in range(self.nqueries):
             log_dens = self.objectives[i].log_density
-            print(internal_state[i].shape, 'internal')
+            # print(internal_state[i].shape, 'internal')
             value += log_dens(internal_state[i], opt_state[:, self.opt_slice[i]]) # may have to broadcast shape here
         return np.squeeze(value)
 
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 2f1ccc8cd..51fc02376 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -25,10 +25,10 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
         inst, const = const_info
 
-        X, Y, beta = inst(n=100, p=10, s=3, signal=5.)[:3]
+        X, Y, beta = inst(n=100, p=10, s=0, signal=1., sigma=5.)[:3]
         n, p = X.shape
 
-        W = np.ones(X.shape[1]) * 8
+        W = np.ones(X.shape[1]) * 5
         conv = const(X, Y, W, randomizer=rand, parametric_cov_estimator=True)
         signs = conv.fit()
         print("signs", signs)
@@ -64,7 +64,8 @@ def compute_coverage(sel_ci, true_vec):
     return coverage
 
 
-def main(ndraw=20000, burnin=5000, nsim=2):
+def main(ndraw=20000, burnin=5000, nsim=10):
+    np.random.seed(1)
 
     sel_pivots_all = list()
     sel_ci_all = list()

From 3ea954477c7f927f9175b8f54374b9a6361edd47 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 20 Sep 2017 13:49:19 -0700
Subject: [PATCH 236/617] cython version

---
 selection/info.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/info.py b/selection/info.py
index 5edfc6207..cadca57b2 100644
--- a/selection/info.py
+++ b/selection/info.py
@@ -43,7 +43,7 @@
 # versions
 NUMPY_MIN_VERSION='1.3'
 SCIPY_MIN_VERSION = '0.7'
-CYTHON_MIN_VERSION = '0.11.1'
+CYTHON_MIN_VERSION = '0.21'
 MPMATH_MIN_VERSION = "0.18"
 PYINTER_MIN_VERSION = "0.1.6"
 

From 28087a6d6cd1c0c6cca26e01b5fa67069fbe3c2a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 20 Sep 2017 13:54:25 -0700
Subject: [PATCH 237/617] minor edits to refactor_JT

---
 selection/randomized/convenience.py |  1 -
 selection/randomized/glm.py         | 10 +++++++++-
 selection/randomized/query.py       |  3 ---
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index bc1683781..46794a90f 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -317,7 +317,6 @@ def gaussian(X,
         the unpenalized estimator.
 
         """
-        sigma = 1.
 
         loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
         n, p = X.shape
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 9c49ef631..6e8f5edcb 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -681,6 +681,11 @@ def parametric_cov(glm_loss,
     # cross_terms are different active sets
 
     target, linear_func = target_with_linear_func
+
+    target_bool = np.zeros(glm_loss.input_shape, np.bool)
+    target_bool[target] = True
+    target = target_bool
+
     linear_funcT = linear_func.T
 
     X, Y = glm_loss.data
@@ -704,6 +709,10 @@ def _WQ(active):
 
     for cross in cross_terms:
         # the covariances are for (\bar{\beta}_{C}, N_C) -- C for cross
+
+        cross_bool = np.zeros(X.shape[1], np.bool)
+        cross_bool[cross] = True; cross = cross_bool
+
         X_C = X[:, cross]
         X_IT = X[:, ~cross].T
         Q_C_inv = np.linalg.inv(X_C.T.dot(W_T[:, None] * X_C))
@@ -716,7 +725,6 @@ def _WQ(active):
 
         covariances.append(np.vstack([beta_block, null_block]).dot(linear_funcT).T * sigma_T * sigma_C)
 
-    print(len(covariances))
     return covariances
 
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index b0187f707..e5b3552e6 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -457,7 +457,6 @@ def setup_target(self,
                                                          nsample=self.nboot[i])
             else:
                 score_info = view.setup_sampler()
-                print(score_info)
                 target_cov, cross_cov = form_covariances(target_info,
                                                          cross_terms=[score_info])
 
@@ -689,7 +688,6 @@ def log_density(self, internal_state, opt_state):
 
         for i in range(self.nqueries):
             log_dens = self.objectives[i].log_density
-            # print(internal_state[i].shape, 'internal')
             value += log_dens(internal_state[i], opt_state[:, self.opt_slice[i]]) # may have to broadcast shape here
         return np.squeeze(value)
 
@@ -781,7 +779,6 @@ def _rootL(gamma):
         upper = bisect(_rootU, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
         lower = bisect(_rootL, grid_min, grid_max, xtol=1.e-5*(grid_max - grid_min))
 
-        #print(_rootU(upper), _rootL(lower), 'pivot')
         return lower + observed_stat, upper + observed_stat
 
     # Private methods

From a188fd702d035bbe48eb47d7c943680e221ca11f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 20 Sep 2017 15:37:47 -0700
Subject: [PATCH 238/617] a little fixing up for unpenalized -- results look
 better for sampling -- seems issue was our comparison

---
 doc/examples/conditional_sampling.py          |  4 +--
 .../examples/power_comparison.py              |  0
 selection/randomized/M_estimator.py           | 25 +++++++++--------
 selection/randomized/tests/test_sampling.py   | 27 ++++++++++++-------
 4 files changed, 34 insertions(+), 22 deletions(-)
 rename selection/randomized/tests/test_power.py => doc/examples/power_comparison.py (100%)

diff --git a/doc/examples/conditional_sampling.py b/doc/examples/conditional_sampling.py
index efd7d6779..2e9ddd8e5 100644
--- a/doc/examples/conditional_sampling.py
+++ b/doc/examples/conditional_sampling.py
@@ -9,14 +9,14 @@
 
 from selection.randomized.tests.test_sampling import test_conditional_law
 
-def main(ndraw=50000, burnin=5000, remove_atom=False, unpenalized=True):
+def main(ndraw=50000, burnin=5000, remove_atom=False, unpenalized=True, stepsize=1.e-2):
 
     fig_idx = 0
     for (rand,
          mcmc_opt, 
          mcmc_omega,
          truncated_opt,
-         truncated_omega) in test_conditional_law(ndraw=ndraw, burnin=burnin, stepsize=1.e-2, unpenalized=unpenalized):
+         truncated_omega) in test_conditional_law(ndraw=ndraw, burnin=burnin, stepsize=stepsize, unpenalized=unpenalized):
 
         fig_idx += 1
         fig = plt.figure(num=fig_idx, figsize=(8,8))
diff --git a/selection/randomized/tests/test_power.py b/doc/examples/power_comparison.py
similarity index 100%
rename from selection/randomized/tests/test_power.py
rename to doc/examples/power_comparison.py
diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 90e1d6dd6..c47305895 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -1,4 +1,7 @@
 import numpy as np
+import scipy
+from scipy import matrix
+
 import regreg.api as rr
 import regreg.affine as ra
 
@@ -189,8 +192,8 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
 
         Mest_slice = slice(0, overall.sum())
-        _Mest_hessian = _hessian[:,overall]
-        _score_linear_term[:,Mest_slice] = -_Mest_hessian / _sqrt_scaling
+        _Mest_hessian = _hessian[:, overall]
+        _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
 
         # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
 
@@ -206,16 +209,17 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
             _opt_hessian=0
         else:
             _opt_hessian = (_hessian + epsilon * np.identity(p)).dot(active_directions)
-        _opt_linear_term[:,scaling_slice] = _opt_hessian / _sqrt_scaling
+        _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling
 
         self.observed_opt_state[scaling_slice] *= _sqrt_scaling
 
         # beta_U piece
 
         unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
+        print(active_groups, unpenalized, unpenalized_slice, 'unpenalized')
         unpenalized_directions = np.identity(p)[:,unpenalized]
         if unpenalized.sum():
-            _opt_linear_term[:,unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
+            _opt_linear_term[:, unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
 
         self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling
 
@@ -226,7 +230,7 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         for _i, _s in zip(inactive_idx, subgrad_idx):
             _opt_linear_term[_i,_s] = _sqrt_scaling
 
-        self.observed_opt_state[subgrad_slice] /= _sqrt_scaling
+        self.observed_opt_state[subgrad_idx] /= _sqrt_scaling
 
         # form affine part
 
@@ -280,25 +284,24 @@ def form_VQLambda(self):
         nactive_groups = len(self.active_directions_list)
         nactive_vars = sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
         V = np.zeros((nactive_vars, nactive_vars-nactive_groups))
-        #U = np.zeros((nvariables, ngroups))
+
         Lambda = np.zeros((nactive_vars,nactive_vars))
         temp_row, temp_col = 0, 0
         for g in range(len(self.active_directions_list)):
             size_curr_group = self.active_directions_list[g].shape[0]
-            #U[temp_row:(temp_row+size_curr_group),g] = self._active_directions[g]
+
             Lambda[temp_row:(temp_row+size_curr_group),temp_row:(temp_row+size_curr_group)] \
                 = self.active_penalty[g]*np.identity(size_curr_group)
-            import scipy
-            from scipy import linalg, matrix
+
             def null(A, eps=1e-12):
-                u, s, vh = scipy.linalg.svd(A)
+                u, s, vh = np.linalg.svd(A)
                 padding = max(0, np.shape(A)[1] - np.shape(s)[0])
                 null_mask = np.concatenate(((s <= eps), np.ones((padding,), dtype=bool)), axis=0)
                 null_space = scipy.compress(null_mask, vh, axis=0)
                 return scipy.transpose(null_space)
 
             V_g = null(matrix(self.active_directions_list[g]))
-            V[temp_row:(temp_row+V_g.shape[0]), temp_col:(temp_col+V_g.shape[1])] = V_g
+            V[temp_row:(temp_row + V_g.shape[0]), temp_col:(temp_col + V_g.shape[1])] = V_g
             temp_row += V_g.shape[0]
             temp_col += V_g.shape[1]
         self.VQLambda = np.dot(np.dot(V.T,self.Qinv), Lambda.dot(V))
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 1f5fbfd11..aa22ebb8a 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -53,24 +53,27 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
 
     Xdiag = np.diag(X.T.dot(X))
     p = X.shape[1]
-    nactive = active.sum()
+
+    unpenalized = (lam == 0) * active
+    nunpenalized = unpenalized.sum()
     lower = -np.ones(p) * np.inf
     upper = -lower
-    active_set = np.where(active)[0]
+    active_set = np.where(active * (lam > 0))[0]
+    unpen_set = np.where(active * (lam == 0))[0]
     inactive_set = np.where(~active)[0]
 
+    nactive = active.sum() - unpenalized.sum()
+    nunpen = unpenalized.sum()
     for i in range(nactive):
         var = active_set[i]
         if lam[var] != 0:
             if signs[var]>0:
                     lower[i] = (-X[:, var].T.dot(y) + lam[var] * signs[var])
-                    upper[i] = np.inf
             else:
-                lower[i] = -np.inf
                 upper[i] = (-X[:,var].T.dot(y) + lam[var] * signs[var]) 
 
-    lower[range(nactive, p)] = -lam[inactive_set] - X[:, inactive_set].T.dot(y)
-    upper[range(nactive, p)] = lam[inactive_set] - X[:, inactive_set].T.dot(y)
+    lower[range(nactive + nunpen, p)] = -lam[inactive_set] - X[:, inactive_set].T.dot(y)
+    upper[range(nactive + nunpen, p)] = lam[inactive_set] - X[:, inactive_set].T.dot(y)
 
     print(lower, 'lower')
     print(upper, 'upper')
@@ -84,15 +87,20 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
                           X[:, active_set].T.dot(y) - 
                           lam[active_set] * signs[active_set], 
                           (epsilon + Xdiag[active_set]) * signs[active_set])
-    u_samples = omega_samples[:, nactive:] + X[:, inactive_set].T.dot(y)
+    unpen_beta_samples = np.true_divide( 
+                          omega_samples[:, nactive:(nactive + nunpen)] + 
+                          X[:, unpen_set].T.dot(y), 
+                          (epsilon + Xdiag[unpen_set]))
+    u_samples = omega_samples[:, (nactive + nunpen):] + X[:, inactive_set].T.dot(y)
 
     # this ordering should be correct?
 
     reordered_omega = np.zeros_like(omega_samples)
     reordered_omega[:, active_set] = omega_samples[:, :nactive]
-    reordered_omega[:, inactive_set] = omega_samples[:, nactive:]
+    reordered_omega[:, unpen_set] = omega_samples[:, nactive:(nactive + nunpen)]
+    reordered_omega[:, inactive_set] = omega_samples[:, (nactive + nunpen):]
 
-    return np.concatenate((abs_beta_samples, u_samples), axis=1), reordered_omega
+    return np.concatenate((abs_beta_samples, unpen_beta_samples, u_samples), axis=1), reordered_omega
 
 
 def orthogonal_design(n, p, s, signal, sigma, random_signs=True):
@@ -171,6 +179,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
                                stepsize=stepsize)
         print(S.shape)
         print([np.mean(S[:,i]) for i in range(p)])
+        print(selected_features, 'selected')
 
         # let's also reconstruct the omegas to compare
 

From 789edefb92f318a5c3a2ae7c3edd913a09412c7b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 20 Sep 2017 16:03:59 -0700
Subject: [PATCH 239/617] adding parametric_cov_estimator everywhere

---
 selection/randomized/M_estimator.py     |   1 -
 selection/randomized/convenience.py     | 267 ++----------------------
 selection/randomized/query.py           |  10 +-
 selection/randomized/threshold_score.py |  13 +-
 4 files changed, 39 insertions(+), 252 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index c47305895..ed5988bd9 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -216,7 +216,6 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         # beta_U piece
 
         unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
-        print(active_groups, unpenalized, unpenalized_slice, 'unpenalized')
         unpenalized_directions = np.identity(p)[:,unpenalized]
         if unpenalized.sum():
             _opt_linear_term[:, unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 46794a90f..69e2557f3 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -66,20 +66,6 @@ def __init__(self,
         randomizer : str (optional)
             One of ['laplace', 'logistic', 'gaussian']
 
-        covariance_estimator : callable (optional)
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
 
@@ -281,10 +267,6 @@ def gaussian(X,
             Noise variance. Set to 1 if `covariance_estimator` is not None.
             This scales the loglikelihood by `sigma**(-2)`.
 
-        covariance_estimator : callable (optional)
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
             Can also be a linear term by setting quadratic 
@@ -304,17 +286,6 @@ def gaussian(X,
 
         L : `selection.randomized.convenience.lasso`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
-        and return an estimate of some of the
-        rows and columns of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
 
@@ -330,7 +301,7 @@ def gaussian(X,
 
         return lasso(loglike, np.asarray(feature_weights) / sigma**2,
                      ridge_term, randomizer_scale, randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator) # XXX: do we use the covariance_estimator?
+                     parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def logistic(X, 
@@ -375,10 +346,6 @@ def logistic(X,
             Number of trials per response, defaults to
             ones the same shape as Y. 
 
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
             Can also be a linear term by setting quadratic 
@@ -398,16 +365,6 @@ def logistic(X,
 
         L : `selection.randomized.convenience.lasso`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         n, p = X.shape
@@ -493,16 +450,6 @@ def coxph(X,
 
         L : `selection.randomized.convenience.lasso`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         loglike = coxph_obj(X, times, status, quadratic=quadratic)
@@ -560,9 +507,6 @@ def poisson(X,
             `feature_weights` to 0. If `feature_weights` is 
             a float, then all parameters are penalized equally.
 
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
 
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
@@ -583,16 +527,6 @@ def poisson(X,
 
         L : `selection.randomized.convenience.lasso`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, inactive)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the inactive
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         n, p = X.shape
@@ -620,7 +554,7 @@ def sqrt_lasso(X,
                    Y, 
                    feature_weights, 
                    quadratic=None,
-                   covariance='parametric',
+                   parametric_cov_estimator=False,
                    sigma_estimate='truncated',
                    solve_args={'min_its':200},
                    randomizer_scale=None,
@@ -785,15 +719,8 @@ def sqrt_lasso(X,
 
         loglike = rr.glm.gaussian(X, Y, quadratic=quadratic)
 
-        if covariance == 'parametric':
-            cov_est = glm_parametric_estimator(loglike, dispersion=_sigma_hat)
-        elif covariance == 'sandwich':
-            cov_est = glm_sandwich_estimator(loglike, B=2000)
-        else:
-            raise ValueError('covariance must be one of ["parametric", "sandwich"]')
-
         L = lasso(loglike, feature_weights * multiplier * sigma_E,
-                  covariance_estimator=cov_est,
+                  parametric_cov_estimator=parametric_cov_estimator,
                   ignore_inactive_constraints=True)
 
         # these arguments are reused for data carving
@@ -834,7 +761,7 @@ def __init__(self,
                  randomizer_scale,
                  active=None,
                  randomizer='gaussian',
-                 covariance_estimator=None):
+                 parametric_cov_estimator=False):
         r"""
 
         Create a new post-selection for the stepwise problem
@@ -863,20 +790,6 @@ def __init__(self,
         randomizer : str (optional)
             One of ['laplace', 'logistic', 'gaussian']
 
-        covariance_estimator : callable (optional)
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
 
@@ -890,7 +803,7 @@ def __init__(self,
             feature_weights = np.ones(loglike.shape) * feature_weights
         self.feature_weights = np.asarray(feature_weights)
 
-        self.covariance_estimator = covariance_estimator
+        self.parametric_cov_estimator = parametric_cov_estimator
 
         nrandom = candidate.sum()
         if randomizer == 'laplace':
@@ -971,8 +884,8 @@ def gaussian(X,
                  feature_weights, 
                  candidate=None,
                  active=None,
-                 covariance_estimator=None,
                  randomizer_scale=None,
+                 parametric_cov_estimator=False,
                  randomizer='gaussian'):
         r"""
         Take a step with a Gaussian loglikelihood.
@@ -1001,10 +914,6 @@ def gaussian(X,
             set of variables we partially minimize over.
             Defaults to `np.zeros(p, np.bool)`.
 
-        covariance_estimator : callable (optional)
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         randomizer_scale : float
             Scale for IID components of randomizer.
 
@@ -1016,17 +925,6 @@ def gaussian(X,
 
         L : `selection.randomized.convenience.step`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of some of the
-        rows and columns of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         loglike = rr.glm.gaussian(X, Y)
@@ -1047,7 +945,7 @@ def gaussian(X,
                     randomizer_scale, 
                     active=active,
                     randomizer=randomizer,
-                    covariance_estimator=covariance_estimator)  # XXX: do we use the covariance_estimator?
+                    parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def logistic(X, 
@@ -1056,7 +954,7 @@ def logistic(X,
                  active=None,
                  candidate=None,
                  trials=None, 
-                 covariance_estimator=None,
+                 parametric_cov_estimator=False,
                  randomizer_scale=None,
                  randomizer='gaussian'):
         r"""
@@ -1092,10 +990,6 @@ def logistic(X,
             Number of trials per response, defaults to
             ones the same shape as Y. 
 
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         randomizer_scale : float
             Scale for IID components of randomizer.
 
@@ -1106,17 +1000,6 @@ def logistic(X,
         -------
 
         L : `selection.randomized.convenience.step`
-        
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         n, p = X.shape
@@ -1136,7 +1019,7 @@ def logistic(X,
                     candidate,
                     randomizer_scale,
                     active=active,
-                    covariance_estimator=covariance_estimator)
+                    parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def coxph(X, 
@@ -1145,7 +1028,7 @@ def coxph(X,
               feature_weights, 
               candidate=None,
               active=None,
-              covariance_estimator=None,
+              parametric_cov_estimator=False,
               randomizer_scale=None,
               randomizer='gaussian'):
         r"""
@@ -1180,10 +1063,6 @@ def coxph(X,
             set of variables we partially minimize over.
             Defaults to `np.zeros(p, np.bool)`.
 
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         randomizer_scale : float
             Scale for IID components of randomizer.
 
@@ -1195,16 +1074,6 @@ def coxph(X,
 
         L : `selection.randomized.convenience.lasso`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         n, p = X.shape
@@ -1224,7 +1093,7 @@ def coxph(X,
                     randomizer_scale,
                     active=active,
                     randomizer=randomizer,
-                    covariance_estimator=covariance_estimator)
+                    parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def poisson(X, 
@@ -1232,7 +1101,7 @@ def poisson(X,
                 feature_weights, 
                 candidate=None,
                 active=None,
-                covariance_estimator=None,
+                parametric_cov_estimator=False,
                 randomizer_scale=None,
                 randomizer='gaussian'):
         r"""
@@ -1262,10 +1131,6 @@ def poisson(X,
             set of variables we partially minimize over.
             Defaults to `np.zeros(p, np.bool)`.
 
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         randomizer_scale : float
             Scale for IID components of randomizer.
 
@@ -1277,16 +1142,6 @@ def poisson(X,
 
         L : `selection.randomized.convenience.step`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         n, p = X.shape
@@ -1309,7 +1164,7 @@ def poisson(X,
                     randomizer_scale, 
                     active=active,
                     randomizer=randomizer,
-                    covariance_estimator=covariance_estimator)
+                    parametric_cov_estimator=parametric_cov_estimator)
 
 class threshold(lasso):
 
@@ -1335,7 +1190,7 @@ def __init__(self,
                  randomizer_scale,
                  active=None,
                  randomizer='gaussian',
-                 covariance_estimator=None):
+                 parametric_cov_estimator=False):
         r"""
 
         Create a new post-selection for the stepwise problem
@@ -1364,21 +1219,6 @@ def __init__(self,
         randomizer : str (optional)
             One of ['laplace', 'logistic', 'gaussian']
 
-        covariance_estimator : callable (optional)
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
-
         """
 
         self.active = active
@@ -1391,7 +1231,7 @@ def __init__(self,
             threshold = np.ones(loglike.shape) * threshold_value
         self.threshold_value = np.asarray(threshold_value)[self.candidate]
 
-        self.covariance_estimator = covariance_estimator
+        self.parametric_cov_estimator = parametric_cov_estimator
 
         nrandom = candidate.sum()
         if randomizer == 'laplace':
@@ -1469,7 +1309,7 @@ def gaussian(X,
                  threshold_value, 
                  candidate=None,
                  active=None,
-                 covariance_estimator=None,
+                 parametric_cov_estimator=False,
                  randomizer_scale=None,
                  randomizer='gaussian'):
         r"""
@@ -1499,10 +1339,6 @@ def gaussian(X,
             set of variables we partially minimize over.
             Defaults to `np.zeros(p, np.bool)`.
 
-        covariance_estimator : callable (optional)
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         randomizer_scale : float
             Scale for IID components of randomizer.
 
@@ -1514,18 +1350,6 @@ def gaussian(X,
 
         L : `selection.randomized.convenience.threshold`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of some of the
-        rows and columns of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
-
         """
 
         loglike = rr.glm.gaussian(X, Y)
@@ -1546,7 +1370,7 @@ def gaussian(X,
                          randomizer_scale, 
                          active=active,
                          randomizer=randomizer,
-                         covariance_estimator=covariance_estimator)  # XXX: do we use the covariance_estimator?
+                         parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def logistic(X, 
@@ -1555,7 +1379,7 @@ def logistic(X,
                  active=None,
                  candidate=None,
                  trials=None, 
-                 covariance_estimator=None,
+                 parametric_cov_estimator=False,
                  randomizer_scale=None,
                  randomizer='gaussian'):
         r"""
@@ -1591,10 +1415,6 @@ def logistic(X,
             Number of trials per response, defaults to
             ones the same shape as Y. 
 
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         randomizer_scale : float
             Scale for IID components of randomizer.
 
@@ -1606,17 +1426,6 @@ def logistic(X,
 
         L : `selection.randomized.convenience.threshold`
         
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
-
         """
         n, p = X.shape
         loglike = rr.glm.logistic(X, successes, trials=trials)
@@ -1635,7 +1444,7 @@ def logistic(X,
                          candidate,
                          randomizer_scale,
                          active=active,
-                         covariance_estimator=covariance_estimator)
+                         parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def coxph(X, 
@@ -1644,7 +1453,7 @@ def coxph(X,
               threshold_value,
               candidate=None,
               active=None,
-              covariance_estimator=None,
+              parametric_cov_estimator=False,
               randomizer_scale=None,
               randomizer='gaussian'):
         r"""
@@ -1679,10 +1488,6 @@ def coxph(X,
             set of variables we partially minimize over.
             Defaults to `np.zeros(p, np.bool)`.
 
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         randomizer_scale : float
             Scale for IID components of randomizer.
 
@@ -1693,17 +1498,6 @@ def coxph(X,
         -------
 
         L : `selection.randomized.convenience.threshold`
-        
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         n, p = X.shape
@@ -1723,7 +1517,7 @@ def coxph(X,
                          randomizer_scale,
                          active=active,
                          randomizer=randomizer,
-                         covariance_estimator=covariance_estimator)
+                         parametric_cov_estimator=parametric_cov_estimator)
 
     @staticmethod
     def poisson(X, 
@@ -1731,7 +1525,7 @@ def poisson(X,
                 threshold_value,
                 candidate=None,
                 active=None,
-                covariance_estimator=None,
+                parametric_cov_estimator=False,
                 randomizer_scale=None,
                 randomizer='gaussian'):
         r"""
@@ -1761,10 +1555,6 @@ def poisson(X,
             set of variables we partially minimize over.
             Defaults to `np.zeros(p, np.bool)`.
 
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
         randomizer_scale : float
             Scale for IID components of randomizer.
 
@@ -1775,17 +1565,6 @@ def poisson(X,
         -------
 
         L : `selection.randomized.convenience.threshold`
-        
-        Notes
-        -----
-
-        If not None, `covariance_estimator` should 
-        take arguments (beta, active, candidate)
-        and return an estimate of the covariance of
-        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
-        the unpenalized estimator and the candidate
-        coordinates of the gradient of the likelihood at
-        the unpenalized estimator.
 
         """
         n, p = X.shape
@@ -1808,4 +1587,4 @@ def poisson(X,
                          randomizer_scale, 
                          active=active,
                          randomizer=randomizer,
-                         covariance_estimator=covariance_estimator)
+                         parametric_cov_estimator=parametric_cov_estimator)
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index e5b3552e6..d7e49a357 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -387,8 +387,11 @@ def gradient(self, state):
 
         for i in range(self.nqueries):
             opt_linear, opt_offset = self.objectives[i].opt_transform
-            opt_grad[self.opt_slice[i]] = \
-                opt_linear.T.dot(self.objectives[i].grad_log_density(self.observed_internal[i], opt_state[self.opt_slice[i]]))
+            if self.objectives[i].num_opt_var > 0: # thresholding has no opt variables
+                                                   # after marginalizing
+                opt_grad[self.opt_slice[i]] = \
+                    opt_linear.T.dot(self.objectives[i].grad_log_density(self.observed_internal[i], 
+                                                                         opt_state[self.opt_slice[i]]))
         return -opt_grad
 
     def sample(self, ndraw, burnin, stepsize=None):
@@ -447,11 +450,12 @@ def setup_target(self,
         target_cov_sum = 0
 
         # we should pararallelize this over all views at once ?
+
         for i in range(self.nqueries):
             view = self.objectives[i]
             self.log_densities.append(view.log_density)
             if parametric == False:
-                score_info = view.setup_sampler(form_covariances)
+                score_info = view.setup_sampler()
                 target_cov, cross_cov = form_covariances(target_info,  
                                                          cross_terms=[score_info],
                                                          nsample=self.nboot[i])
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index 8e58b39f3..829bf6f42 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -138,12 +138,17 @@ def grad_log_density(self, internal_state, opt_state):
         threshold = self.threshold
         weights = np.zeros_like(self.boundary, np.float)
 
-        weights[self.boundary] = ((self.randomization._density(threshold[self.boundary] - full_state[self.boundary]) - self.randomization._density(-threshold[self.boundary] - full_state[self.boundary])) /
-                                  (1 - self.randomization._cdf(threshold[self.boundary] - full_state[self.boundary]) + self.randomization._cdf(-threshold[self.boundary] - full_state[self.boundary])))
+        weights[self.boundary] = ((self.randomization._density(threshold[self.boundary] - full_state[self.boundary])
+                                   - self.randomization._density(-threshold[self.boundary] - full_state[self.boundary])) /
+                                  (1 - self.randomization._cdf(threshold[self.boundary] - full_state[self.boundary]) + 
+                                   self.randomization._cdf(-threshold[self.boundary] - full_state[self.boundary])))
 
 
-        weights[~self.boundary] = ((-self.randomization._density(threshold[~self.boundary] - full_state[~self.boundary]) + self.randomization._density(-threshold[~self.boundary] - full_state[~self.boundary])) /
-                                   (self.randomization._cdf(threshold[~self.boundary] - full_state[~self.boundary]) - self.randomization._cdf(-threshold[~self.boundary] - full_state[~self.boundary])))
+        weights[~self.boundary] = ((-self.randomization._density(threshold[~self.boundary] - 
+                                                                 full_state[~self.boundary]) + 
+                                     self.randomization._density(-threshold[~self.boundary] - full_state[~self.boundary])) /
+                                   (self.randomization._cdf(threshold[~self.boundary] - full_state[~self.boundary]) - 
+                                    self.randomization._cdf(-threshold[~self.boundary] - full_state[~self.boundary])))
 
         return weights ## tested
 

From 4083045b01951df4129350b9eefeb9c654683fa0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 12:31:59 -0700
Subject: [PATCH 240/617] refactor of optimization_sampler so each view gets
 its own sampler

---
 selection/randomized/convenience.py           |   2 +-
 selection/randomized/glm.py                   |   2 +-
 selection/randomized/query.py                 | 174 +++++-------------
 .../tests/test_optimization_sampler.py        |  11 +-
 selection/randomized/tests/test_sampling.py   |  16 +-
 5 files changed, 65 insertions(+), 140 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 69e2557f3..7ededfa6a 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -201,7 +201,7 @@ def summary(self,
 
         self._queries.setup_sampler(form_covariances=None)
         self._queries.setup_opt_state()
-        opt_sampler = optimization_sampler(self._queries)
+        opt_sampler = optimization_sampler(self._view) # we should add extra views!
 
         S = opt_sampler.sample(ndraw,
                                burnin,
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 6e8f5edcb..862024663 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -682,7 +682,7 @@ def parametric_cov(glm_loss,
 
     target, linear_func = target_with_linear_func
 
-    target_bool = np.zeros(glm_loss.input_shape, np.bool)
+    target_bool = np.zeros(glm_loss.shape, np.bool)
     target_bool[target] = True
     target = target_bool
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index d7e49a357..bfefb0c09 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -288,7 +288,7 @@ class optimization_sampler(object):
     '''
 
     def __init__(self,
-                 multi_view):
+                 query):
 
         '''
         Parameters
@@ -314,48 +314,17 @@ def __init__(self,
 
         # make sure we setup the queries
 
-        multi_view.setup_sampler(form_covariances=None)
-        multi_view.setup_opt_state()
+        self.score_info = query.setup_sampler()
+        self.nboot = query.nboot
+        self.observed_opt_state = query.observed_opt_state.copy()
+        self.observed_internal_state = query.observed_internal_state.copy()
+        self.score_linear, self.score_offset = query.score_transform
+        self.opt_linear, self.opt_offset = query.opt_transform
+        self.projection_map = query.projection
+        self.grad_log_density = query.grad_log_density
+        self.log_density = query.log_density
 
-        # we need these attributes of multi_view
-        self.multi_view = multi_view
-
-        self.nqueries = len(multi_view.objectives)
-        self.opt_slice = multi_view.opt_slice
-        self.objectives = multi_view.objectives
-        self.nboot = multi_view.nboot
-
-        self.total_randomization_length = multi_view.total_randomization_length
-        self.randomization_slice = multi_view.randomization_slice
-
-        # set the observed state
-
-        self.observed_state = np.zeros_like(multi_view.observed_opt_state)
-        self.observed_state[:] = multi_view.observed_opt_state
-
-        # added for the reconstruction map in case we marginalize over optimization variables
-
-        randomization_length_total = 0
-        self.randomization_slice = []
-        for i in range(self.nqueries):
-            self.randomization_slice.append(
-                slice(randomization_length_total, randomization_length_total + self.objectives[i].ndim))
-            randomization_length_total += self.objectives[i].ndim
-
-        self.randomization_length_total = randomization_length_total
-
-        # We implicitly assume that we are sampling a target
-        # independent of the data in each view
-
-        self.observed_internal = [] # in the view's coordinates
-        self.score_info = []
-        for i in range(self.nqueries):
-            obj = self.objectives[i]
-            score_linear, score_offset = obj.score_transform
-            self.observed_internal.append(obj.observed_internal_state)
-            self.score_info.append(obj.score_transform)
-
-    def projection(self, state):
+    def projection(self, opt_state):
         '''
         Projection map of projected Langevin sampler.
         Parameters
@@ -369,29 +338,22 @@ def projection(self, state):
         projected_state : np.float
         '''
 
-        opt_state = state
-        new_opt_state = np.zeros_like(opt_state)
-        for i in range(self.nqueries):
-            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
-        return new_opt_state
+        return self.projection_map(opt_state)
 
-    def gradient(self, state):
+    def gradient(self, opt_state):
         """
         Gradient only w.r.t. opt variables
         """
 
-        opt_state = state
         opt_grad = np.zeros_like(opt_state)
 
         # randomization_gradient are gradients of a CONVEX function
 
-        for i in range(self.nqueries):
-            opt_linear, opt_offset = self.objectives[i].opt_transform
-            if self.objectives[i].num_opt_var > 0: # thresholding has no opt variables
-                                                   # after marginalizing
-                opt_grad[self.opt_slice[i]] = \
-                    opt_linear.T.dot(self.objectives[i].grad_log_density(self.observed_internal[i], 
-                                                                         opt_state[self.opt_slice[i]]))
+        # this presumes grad_log_density is expressed not in internal coordinates
+        # but score coordinates -- hence the chain rule with self.opt_linear
+
+        opt_grad = self.opt_linear.T.dot(self.grad_log_density(self.observed_internal_state, 
+                                                               opt_state))
         return -opt_grad
 
     def sample(self, ndraw, burnin, stepsize=None):
@@ -420,9 +382,9 @@ def sample(self, ndraw, burnin, stepsize=None):
         '''
 
         if stepsize is None:
-            stepsize = 1./len(self.observed_state) 
+            stepsize = 1./len(self.observed_opt_state) 
 
-        target_langevin = projected_langevin(self.observed_state.copy(),
+        target_langevin = projected_langevin(self.observed_opt_state.copy(),
                                              self.gradient,
                                              self.projection,
                                              stepsize)
@@ -447,28 +409,16 @@ def setup_target(self,
         self.score_cov = []
         self.log_densities = []
 
-        target_cov_sum = 0
-
         # we should pararallelize this over all views at once ?
 
-        for i in range(self.nqueries):
-            view = self.objectives[i]
-            self.log_densities.append(view.log_density)
-            if parametric == False:
-                score_info = view.setup_sampler()
-                target_cov, cross_cov = form_covariances(target_info,  
-                                                         cross_terms=[score_info],
-                                                         nsample=self.nboot[i])
-            else:
-                score_info = view.setup_sampler()
-                target_cov, cross_cov = form_covariances(target_info,
-                                                         cross_terms=[score_info])
-
-            target_cov_sum += target_cov
-            self.score_cov.append(cross_cov)
-
-        self.target_cov = target_cov_sum / self.nqueries
-        self.target_invcov = np.linalg.inv(self.target_cov)
+        if parametric == False:
+            self.target_cov, self.score_cov = form_covariances(target_info,  
+                                                               cross_terms=[self.score_info],
+                                                               nsample=self.nboot)
+        else:
+            self.target_cov, self.score_cov = form_covariances(target_info,
+                                                               cross_terms=[self.score_info])
+            
 
     def hypothesis_test(self,
                         test_stat,
@@ -583,8 +533,7 @@ def confidence_intervals(self,
         if sample is None:
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
 
-        _intervals = optimization_intervals(self,
-                                            sample,
+        _intervals = optimization_intervals([(self, sample)],
                                             observed_target)
 
         limits = []
@@ -646,8 +595,7 @@ def coefficient_pvalues(self,
         if parameter is None:
             parameter = np.zeros(observed_target.shape[0])
 
-        _intervals = optimization_intervals(self,
-                                            sample,
+        _intervals = optimization_intervals([(self, sample)],
                                             observed_target)
         pvals = []
 
@@ -673,44 +621,25 @@ def crude_lipschitz(self):
             lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
         return lipschitz
 
-    def log_density(self, internal_state, opt_state):
-        '''
-        Log of randomization density at current state.
-        Parameters
-        ----------
-        internal_state : sequence
-           Sequence of internal scores for each view (i.e.
-           in their own coordinate systems).
-
-        Returns
-        -------
-        density : np.float
-            Has number of rows as `opt_state` if 2-dimensional.
-        '''
-
-        value = np.zeros(opt_state.shape[0])
-
-        for i in range(self.nqueries):
-            log_dens = self.objectives[i].log_density
-            value += log_dens(internal_state[i], opt_state[:, self.opt_slice[i]]) # may have to broadcast shape here
-        return np.squeeze(value)
-
 class optimization_intervals(object):
 
     def __init__(self,
-                 opt_sampler,
-                 opt_sample,
-                 observed):
+                 opt_sampling_info, # a sequence of (opt_sampler, opt_sample) objects
+                 observed,
+                 target_cov=None):
 
-        self._logden = opt_sampler.log_density(opt_sampler.observed_internal, opt_sample)
+        self.opt_sampling_info = opt_sampling_info
+        self._logden = 0
+        for opt_sampler, opt_sample in opt_sampling_info:
+            self._logden += opt_sampler.log_density(opt_sampler.observed_internal_state, opt_sample)
 
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
-        # setup_target has been called on opt_sampler
-        self.opt_sampler = opt_sampler
-        self.opt_sample = opt_sample
-
-        self.target_cov = opt_sampler.target_cov
+        if target_cov is None:
+            self.target_cov = 0
+            for opt_sampler, opt_sample in opt_sampling_info:
+                self.target_cov += opt_sampler.target_cov
+            self.target_cov /= len(opt_sampling_info)
         self._normal_sample = np.random.multivariate_normal(mean=np.zeros(self.target_cov.shape[0]), 
                                                             cov=self.target_cov, 
                                                             size=(opt_sample.shape[0],))
@@ -737,16 +666,12 @@ def pivot(self,
 
         nuisance = []
         score_cov = []
-        for i in range(len(self.opt_sampler.objectives)):
-            cur_score_cov = linear_func.dot(self.opt_sampler.score_cov[i])
+        for opt_sampler, opt_sample in self.opt_sampling_info:
+            cur_score_cov = linear_func.dot(opt_sampler.score_cov)
 
             # cur_nuisance is in the view's internal coordinates
-            cur_nuisance = self.opt_sampler.observed_internal[i] - cur_score_cov * observed_stat / target_cov
-
-            score_linear, score_offset = self.opt_sampler.score_info[i]
-
+            cur_nuisance = opt_sampler.observed_internal_state - cur_score_cov * observed_stat / target_cov
             nuisance.append(cur_nuisance)
-
             score_cov.append(cur_score_cov / target_cov)
 
 
@@ -809,9 +734,12 @@ def _weights(self,
         # In this function, \hat{\theta}_i will change with the Monte Carlo sample
 
         internal_sample = []
-        for i in range(len(self.opt_sampler.log_densities)):
-            internal_sample.append(np.multiply.outer(sample_stat, score_cov[i]) + nuisance[i][None, :]) # these are now internal coordinates
-        _lognum = self.opt_sampler.log_density(internal_sample, self.opt_sample)
+        _lognum = 0
+        for i, opt_info in enumerate(self.opt_sampling_info):
+            internal_sample = np.multiply.outer(sample_stat, score_cov[i]) + nuisance[i][None, :] # these are now internal coordinates
+            opt_sampler, opt_sample = opt_info
+            _lognum += opt_sampler.log_density(internal_sample, opt_sample)
+
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
 
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
index 27afbfcc4..15a12bd19 100644
--- a/selection/randomized/tests/test_optimization_sampler.py
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -25,7 +25,7 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
                                                                 [False, True]):
 
         inst, const = const_info
-        X, Y = inst()[:2]
+        X, Y = inst(signal=0.01)[:2]
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 80
@@ -54,10 +54,11 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
 
         conv.decompose_subgradient(conditioning_groups, marginalizing_groups)
 
-        opt_sampler = optimization_sampler(conv._queries)
-        S = opt_sampler.sample(ndraw,
-                               burnin,
-                               stepsize=1.e-10)
+        opt_samplers = [optimization_sampler(q) for q in conv._queries.objectives]
+        for opt_sampler in opt_samplers:
+            S = opt_sampler.sample(ndraw,
+                                   burnin,
+                                   stepsize=1.e-10)
 
         
         
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index aa22ebb8a..ce55f4694 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -172,7 +172,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
 
         selected_features = conv._view.selection_variable['variables']
 
-        opt_sampler = optimization_sampler(conv._queries)
+        opt_sampler = optimization_sampler(conv._view)
 
         S = opt_sampler.sample(ndraw,
                                burnin,
@@ -183,7 +183,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
 
         # let's also reconstruct the omegas to compare
 
-        S_omega = reconstruct_opt(opt_sampler, S)
+        S_omega = reconstruct_opt(conv._view, S)
 
         opt_samples = sample_opt_vars(X, 
                                       Y, 
@@ -201,7 +201,7 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
     return results
 
     
-def reconstruct_opt(opt_sampler, state):
+def reconstruct_opt(query, state):
     '''
     Reconstruction of randomization at current state.
     Parameters
@@ -222,12 +222,8 @@ def reconstruct_opt(opt_sampler, state):
     if state.ndim > 2:
         raise ValueError('expecting at most 2-dimensional array')
 
-    reconstructed = np.zeros((state.shape[0], opt_sampler.total_randomization_length))
-
-    for i in range(opt_sampler.nqueries):
-        reconstructed[:,opt_sampler.randomization_slice[i]] = reconstruct_full_from_internal(opt_sampler.objectives[i],  
-                                                                                             opt_sampler.observed_internal[i],
-                                                                                             state[:,opt_sampler.opt_slice[i]])
-
+    reconstructed = reconstruct_full_from_internal(query,
+                                                   query.observed_internal_state,
+                                                   state)
 
     return np.squeeze(reconstructed)

From 08d8cff05016569bb8189121612b217cfa63f4ae Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 12:36:43 -0700
Subject: [PATCH 241/617] making opt_sampler for each query -- need code to
 compute p-values from list of opt_samplers -- multiple_queries seems a good
 place

---
 selection/randomized/convenience.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 7ededfa6a..9c8fbc3e6 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -201,11 +201,10 @@ def summary(self,
 
         self._queries.setup_sampler(form_covariances=None)
         self._queries.setup_opt_state()
-        opt_sampler = optimization_sampler(self._view) # we should add extra views!
 
-        S = opt_sampler.sample(ndraw,
-                               burnin,
-                               stepsize=1.e-3)
+        opt_samplers = [optimization_sampler(q) for q in self._queries.objectives]
+        opt_samples = [opt_sampler.sample(ndraw,
+                                          burnin) for opt_sampler in opt_samplers]
 
         unpenalized_mle = restricted_Mest(self.loglike, selected_features)
         if self.parametric_cov_estimator == False:
@@ -217,12 +216,13 @@ def summary(self,
             target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
             form_covariances = glm_parametric_covariance(self.loglike)
 
-        opt_sampler.setup_target(target_info, form_covariances, parametric=self.parametric_cov_estimator)
+        for opt_sampler in opt_samplers:
+            opt_sampler.setup_target(target_info, form_covariances, parametric=self.parametric_cov_estimator)
 
-        pvalues = opt_sampler.coefficient_pvalues(unpenalized_mle, parameter=null_value, sample=S)
+        pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, parameter=null_value, sample=opt_samples[0])
         intervals = None
         if compute_intervals:
-            intervals = opt_sampler.confidence_intervals(unpenalized_mle, sample=S)
+            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, sample=opt_samples[0])
 
         return pvalues, intervals
 

From 6ecc9f72e64959208b3c0ed0d0414d83cf248253 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 12:40:00 -0700
Subject: [PATCH 242/617] changed name to cov_info -- we should also just be
 able to set this attribute by hand...

---
 selection/randomized/query.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index bfefb0c09..ca990e1e7 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -314,7 +314,7 @@ def __init__(self,
 
         # make sure we setup the queries
 
-        self.score_info = query.setup_sampler()
+        self.cov_info = query.setup_sampler()
         self.nboot = query.nboot
         self.observed_opt_state = query.observed_opt_state.copy()
         self.observed_internal_state = query.observed_internal_state.copy()
@@ -406,18 +406,15 @@ def setup_target(self,
         that will be used in computing weights for the sampler.
         """
 
-        self.score_cov = []
-        self.log_densities = []
-
         # we should pararallelize this over all views at once ?
 
         if parametric == False:
             self.target_cov, self.score_cov = form_covariances(target_info,  
-                                                               cross_terms=[self.score_info],
+                                                               cross_terms=[self.cov_info],
                                                                nsample=self.nboot)
         else:
             self.target_cov, self.score_cov = form_covariances(target_info,
-                                                               cross_terms=[self.score_info])
+                                                               cross_terms=[self.cov_info])
             
 
     def hypothesis_test(self,

From bc720b225321451efc40002e1689cfa641580a9a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 14:10:32 -0700
Subject: [PATCH 243/617] more explicit signature for constructor of
 opt_sampler

---
 selection/randomized/convenience.py           | 32 +++++--
 selection/randomized/query.py                 | 92 +++++++------------
 .../tests/test_optimization_sampler.py        | 21 ++++-
 selection/randomized/tests/test_sampling.py   | 11 ++-
 4 files changed, 86 insertions(+), 70 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 9c8fbc3e6..85aaf13b0 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -202,11 +202,8 @@ def summary(self,
         self._queries.setup_sampler(form_covariances=None)
         self._queries.setup_opt_state()
 
-        opt_samplers = [optimization_sampler(q) for q in self._queries.objectives]
-        opt_samples = [opt_sampler.sample(ndraw,
-                                          burnin) for opt_sampler in opt_samplers]
-
         unpenalized_mle = restricted_Mest(self.loglike, selected_features)
+
         if self.parametric_cov_estimator == False:
             n = self.loglike.data[0].shape[0]
             form_covariances = glm_nonparametric_bootstrap(n, n)
@@ -216,13 +213,32 @@ def summary(self,
             target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
             form_covariances = glm_parametric_covariance(self.loglike)
 
-        for opt_sampler in opt_samplers:
-            opt_sampler.setup_target(target_info, form_covariances, parametric=self.parametric_cov_estimator)
+        opt_samplers = []
+        for q in self._queries.objectives:
+            cov_info = q.setup_sampler()
+            if self.parametric_cov_estimator == False:
+                target_cov, score_cov = form_covariances(target_info,  
+                                                         cross_terms=[cov_info],
+                                                         nsample=q.nboot)
+            else:
+                target_cov, score_cov = form_covariances(target_info,  
+                                                         cross_terms=[cov_info])
+
+            opt_samplers.append(optimization_sampler(q.observed_opt_state,
+                                                     q.observed_internal_state,
+                                                     q.score_transform,
+                                                     q.opt_transform,
+                                                     q.projection,
+                                                     q.grad_log_density,
+                                                     q.log_density))
+
+        opt_samples = [opt_sampler.sample(ndraw,
+                                          burnin) for opt_sampler in opt_samplers]
 
-        pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, parameter=null_value, sample=opt_samples[0])
+        pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=null_value, sample=opt_samples[0])
         intervals = None
         if compute_intervals:
-            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, sample=opt_samples[0])
+            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_samples[0])
 
         return pvalues, intervals
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index ca990e1e7..73da248c2 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -288,7 +288,13 @@ class optimization_sampler(object):
     '''
 
     def __init__(self,
-                 query):
+                 observed_opt_state,
+                 observed_internal_state,
+                 score_transform,
+                 opt_transform,
+                 projection,
+                 grad_log_density,
+                 log_density):
 
         '''
         Parameters
@@ -314,31 +320,13 @@ def __init__(self,
 
         # make sure we setup the queries
 
-        self.cov_info = query.setup_sampler()
-        self.nboot = query.nboot
-        self.observed_opt_state = query.observed_opt_state.copy()
-        self.observed_internal_state = query.observed_internal_state.copy()
-        self.score_linear, self.score_offset = query.score_transform
-        self.opt_linear, self.opt_offset = query.opt_transform
-        self.projection_map = query.projection
-        self.grad_log_density = query.grad_log_density
-        self.log_density = query.log_density
-
-    def projection(self, opt_state):
-        '''
-        Projection map of projected Langevin sampler.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Typically, the projection will only act on
-           `opt_vars`.
-        Returns
-        -------
-        projected_state : np.float
-        '''
-
-        return self.projection_map(opt_state)
+        self.observed_opt_state = observed_opt_state.copy()
+        self.observed_internal_state = observed_internal_state.copy()
+        self.score_linear, self.score_offset = score_transform
+        self.opt_linear, self.opt_offset = opt_transform
+        self.projection = projection
+        self.grad_log_density = grad_log_density
+        self.log_density = log_density
 
     def gradient(self, opt_state):
         """
@@ -397,29 +385,11 @@ def sample(self, ndraw, burnin, stepsize=None):
                 samples.append(target_langevin.state.copy())
         return np.asarray(samples)
 
-    def setup_target(self, 
-                     target_info, 
-                     form_covariances, 
-                     parametric=False):
-        """
-        This computes the matrices used in the linear decomposition
-        that will be used in computing weights for the sampler.
-        """
-
-        # we should pararallelize this over all views at once ?
-
-        if parametric == False:
-            self.target_cov, self.score_cov = form_covariances(target_info,  
-                                                               cross_terms=[self.cov_info],
-                                                               nsample=self.nboot)
-        else:
-            self.target_cov, self.score_cov = form_covariances(target_info,
-                                                               cross_terms=[self.cov_info])
-            
-
     def hypothesis_test(self,
                         test_stat,
                         observed_value,
+                        target_cov,
+                        score_cov,
                         ndraw=10000,
                         burnin=2000,
                         stepsize=None,
@@ -490,6 +460,8 @@ def hypothesis_test(self,
 
     def confidence_intervals(self,
                              observed_target,
+                             target_cov,
+                             score_cov,
                              ndraw=10000,
                              burnin=2000,
                              stepsize=None,
@@ -530,7 +502,7 @@ def confidence_intervals(self,
         if sample is None:
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
 
-        _intervals = optimization_intervals([(self, sample)],
+        _intervals = optimization_intervals([(self, sample, target_cov, score_cov)],
                                             observed_target)
 
         limits = []
@@ -544,6 +516,8 @@ def confidence_intervals(self,
 
     def coefficient_pvalues(self,
                             observed_target,
+                            target_cov,
+                            score_cov,
                             parameter=None,
                             ndraw=10000,
                             burnin=2000,
@@ -592,7 +566,7 @@ def coefficient_pvalues(self,
         if parameter is None:
             parameter = np.zeros(observed_target.shape[0])
 
-        _intervals = optimization_intervals([(self, sample)],
+        _intervals = optimization_intervals([(self, sample, target_cov, score_cov)],
                                             observed_target)
         pvals = []
 
@@ -627,15 +601,15 @@ def __init__(self,
 
         self.opt_sampling_info = opt_sampling_info
         self._logden = 0
-        for opt_sampler, opt_sample in opt_sampling_info:
+        for opt_sampler, opt_sample, _, _ in opt_sampling_info:
             self._logden += opt_sampler.log_density(opt_sampler.observed_internal_state, opt_sample)
 
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
         if target_cov is None:
             self.target_cov = 0
-            for opt_sampler, opt_sample in opt_sampling_info:
-                self.target_cov += opt_sampler.target_cov
+            for opt_sampler, opt_sample, target_cov, _ in opt_sampling_info:
+                self.target_cov += target_cov
             self.target_cov /= len(opt_sampling_info)
         self._normal_sample = np.random.multivariate_normal(mean=np.zeros(self.target_cov.shape[0]), 
                                                             cov=self.target_cov, 
@@ -662,19 +636,19 @@ def pivot(self,
         target_cov = linear_func.dot(self.target_cov.dot(linear_func))
 
         nuisance = []
-        score_cov = []
-        for opt_sampler, opt_sample in self.opt_sampling_info:
-            cur_score_cov = linear_func.dot(opt_sampler.score_cov)
+        translate_dirs = []
+        for opt_sampler, opt_sample, _, score_cov in self.opt_sampling_info:
+            cur_score_cov = linear_func.dot(score_cov)
 
             # cur_nuisance is in the view's internal coordinates
             cur_nuisance = opt_sampler.observed_internal_state - cur_score_cov * observed_stat / target_cov
             nuisance.append(cur_nuisance)
-            score_cov.append(cur_score_cov / target_cov)
+            translate_dirs.append(cur_score_cov / target_cov)
 
 
         weights = self._weights(sample_stat + candidate,  # normal sample under candidate
                                 nuisance,                 # nuisance sufficient stats for each view
-                                score_cov)                # points will be moved like sample * score_cov
+                                translate_dirs)               # points will be moved like sample * score_cov
         
         pivot = np.mean((sample_stat + candidate <= observed_stat) * weights) / np.mean(weights)
 
@@ -712,7 +686,7 @@ def _rootL(gamma):
     def _weights(self, 
                  sample_stat,
                  nuisance,
-                 score_cov):
+                 translate_dirs):
 
         # Here we should loop through the views
         # and move the score of each view 
@@ -733,8 +707,8 @@ def _weights(self,
         internal_sample = []
         _lognum = 0
         for i, opt_info in enumerate(self.opt_sampling_info):
-            internal_sample = np.multiply.outer(sample_stat, score_cov[i]) + nuisance[i][None, :] # these are now internal coordinates
-            opt_sampler, opt_sample = opt_info
+            opt_sampler, opt_sample = opt_info[:2]
+            internal_sample = np.multiply.outer(sample_stat, translate_dirs[i]) + nuisance[i][None, :] # these are now internal coordinates
             _lognum += opt_sampler.log_density(internal_sample, opt_sample)
 
         _logratio = _lognum - self._logden
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
index 15a12bd19..0bf44cfc6 100644
--- a/selection/randomized/tests/test_optimization_sampler.py
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -9,6 +9,7 @@
                                poisson_instance)
 from ...tests.flags import SMALL_SAMPLES
 from ...tests.decorators import set_sampling_params_iftrue 
+from ..glm import glm_nonparametric_bootstrap, pairs_bootstrap_glm
 
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_optimization_sampler(ndraw=1000, burnin=200):
@@ -54,7 +55,25 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
 
         conv.decompose_subgradient(conditioning_groups, marginalizing_groups)
 
-        opt_samplers = [optimization_sampler(q) for q in conv._queries.objectives]
+        form_covariances = glm_nonparametric_bootstrap(n, n)
+        boot_target, boot_target_observed = pairs_bootstrap_glm(conv.loglike, selected_features, inactive=None)
+        target_info = boot_target
+
+        opt_samplers = []
+        for q in conv._queries.objectives:
+            cov_info = q.setup_sampler()
+            target_cov, score_cov = form_covariances(target_info,  
+                                                     cross_terms=[cov_info],
+                                                     nsample=q.nboot)
+
+            opt_samplers.append(optimization_sampler(q.observed_opt_state,
+                                                     q.observed_internal_state,
+                                                     q.score_transform,
+                                                     q.opt_transform,
+                                                     q.projection,
+                                                     q.grad_log_density,
+                                                     q.log_density))
+
         for opt_sampler in opt_samplers:
             S = opt_sampler.sample(ndraw,
                                    burnin,
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index ce55f4694..34608a2cc 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -171,8 +171,15 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
         print("signs", signs)
 
         selected_features = conv._view.selection_variable['variables']
-
-        opt_sampler = optimization_sampler(conv._view)
+        q = conv._view
+
+        opt_sampler = optimization_sampler(q.observed_opt_state,
+                                           q.observed_internal_state,
+                                           q.score_transform,
+                                           q.opt_transform,
+                                           q.projection,
+                                           q.grad_log_density,
+                                           q.log_density)
 
         S = opt_sampler.sample(ndraw,
                                burnin,

From 7a6d65ae90ba916b8d768613ac2af4ed5f66a3c9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 14:30:10 -0700
Subject: [PATCH 244/617] grad_log_density is now assumeed to always be the
 derivative with respect to opt_variables

---
 selection/randomized/M_estimator.py           |  4 +++-
 selection/randomized/query.py                 | 24 +++++--------------
 .../randomized/tests/test_convenience.py      | 11 +--------
 3 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index ed5988bd9..5dedc6635 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -484,6 +484,7 @@ def grad_log_density(self, internal_state, opt_state):
             marginalizing over the sub-gradient
 
             full_state is 
+            density should be expressed in terms of opt_state coordinates
         """
 
         if not self._setup:
@@ -509,7 +510,8 @@ def fraction(full_state_plus, full_state_minus, inactive_marginal_groups):
                 weights[self.inactive_marginal_groups] = fraction(full_state_plus, full_state_minus, self.inactive_marginal_groups)
             weights[~self.inactive_marginal_groups] = self.randomization._derivative_log_density(full_state)[~self.inactive_marginal_groups]
 
-            return -weights
+            opt_linear = self.opt_transform[0]
+            return -opt_linear.T.dot(weights)
         else:
             return query.grad_log_density(self, internal_state, opt_state)
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 73da248c2..d5c139b91 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -70,8 +70,12 @@ def log_density(self, internal_state, opt_state):
         return self.randomization.log_density(full_state)
 
     def grad_log_density(self, internal_state, opt_state):
+        """
+        Gradient in opt_state coordinates
+        """
         full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
-        return self.randomization.gradient(full_state)
+        opt_linear = self.opt_transform[0]
+        return opt_linear.T.dot(self.randomization.gradient(full_state))
 
      # implemented by subclasses
 
@@ -325,25 +329,9 @@ def __init__(self,
         self.score_linear, self.score_offset = score_transform
         self.opt_linear, self.opt_offset = opt_transform
         self.projection = projection
-        self.grad_log_density = grad_log_density
+        self.gradient = lambda opt: - grad_log_density(self.observed_internal_state, opt)
         self.log_density = log_density
 
-    def gradient(self, opt_state):
-        """
-        Gradient only w.r.t. opt variables
-        """
-
-        opt_grad = np.zeros_like(opt_state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        # this presumes grad_log_density is expressed not in internal coordinates
-        # but score coordinates -- hence the chain rule with self.opt_linear
-
-        opt_grad = self.opt_linear.T.dot(self.grad_log_density(self.observed_internal_state, 
-                                                               opt_state))
-        return -opt_grad
-
     def sample(self, ndraw, burnin, stepsize=None):
         '''
         Sample `target` from selective density
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index cd917c25b..c0d6c7f91 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -10,7 +10,7 @@
 from ...tests.flags import SMALL_SAMPLES
 from ...tests.decorators import set_sampling_params_iftrue 
 
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=2, burnin=2)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=20)
 def test_lasso_constructors(ndraw=1000, burnin=200):
     """
     Smoke tests for lasso convenience constructors
@@ -65,15 +65,6 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
                      ndraw=ndraw,
                      burnin=burnin)
 
-        target_sampler, target_observed = glm_target(conv.loglike,
-                                                     selected_features,
-                                                     conv._queries,
-                                                     bootstrap=False)
-
-        S = target_sampler.sample(ndraw,
-                                  burnin)
-
-
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_step_constructors(ndraw=1000, burnin=200):
     """

From 915ab8c311cdd2a721d5553fbbbc65807e76e4e3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 17:30:23 -0700
Subject: [PATCH 245/617] WIP: each query has a sampler property now --
 decompose_subgradient just changes this property

---
 selection/randomized/M_estimator.py           | 286 +++++++++++++-----
 selection/randomized/convenience.py           |  19 +-
 selection/randomized/greedy_step.py           |  58 +++-
 selection/randomized/query.py                 |  74 +++--
 selection/randomized/reconstruction.py        |  24 +-
 selection/randomized/target.py                |   9 +-
 .../randomized/tests/test_convenience.py      |   2 +-
 .../tests/test_optimization_sampler.py        |   8 +-
 selection/randomized/tests/test_sampling.py   |  17 +-
 9 files changed, 334 insertions(+), 163 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 5dedc6635..08527866a 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -1,3 +1,7 @@
+from __future__ import print_function
+import functools
+from copy import copy
+
 import numpy as np
 import scipy
 from scipy import matrix
@@ -5,7 +9,7 @@
 import regreg.api as rr
 import regreg.affine as ra
 
-from .query import query 
+from .query import query, optimization_sampler
 from .reconstruction import reconstruct_full_from_internal
 from .randomization import split
 
@@ -279,6 +283,75 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         self.form_VQLambda()
         self.nboot = nboot
 
+
+#         if not self._setup:
+#             raise ValueError('setup_sampler should be called before using this function')
+
+#         if ('subgradient' not in self.selection_variable and 
+#             'scaling' not in self.selection_variable): # have not conditioned on any thing else
+
+#         elif ('subgradient' not in self.selection_variable and
+#               'scaling' in self.selection_variable): # conditioned on the initial scalings
+#                                                      # only the subgradient in opt_state
+#             new_state = self.group_lasso_dual.bound_prox(opt_state)
+#         elif ('subgradient' in self.selection_variable and
+#               'scaling' not in self.selection_variable): # conditioned on the subgradient
+#                                                          # only the scaling in opt_state
+#             new_state = np.maximum(opt_state, 0)
+#         else:
+#             new_state = opt_state
+#         return new_state
+
+
+    def get_sampler(self):
+        # setup the default optimization sampler
+
+        if not hasattr(self, "_sampler"):
+            def projection(group_lasso_dual, subgrad_slice, scaling_slice, opt_state):
+                """
+                Full projection for Langevin.
+
+                The state here will be only the state of the optimization variables.
+                """
+
+                new_state = opt_state.copy() # not really necessary to copy
+                new_state[scaling_slice] = np.maximum(opt_state[scaling_slice], 0)
+                new_state[subgrad_slice] = group_lasso_dual.bound_prox(opt_state[subgrad_slice])
+                return new_state
+
+            projection = functools.partial(projection, self.group_lasso_dual, self.subgrad_slice, self.scaling_slice)
+
+            def grad_log_density(query,
+                                 opt_linear,
+                                 rand_gradient,
+                                 internal_state,
+                                 opt_state):
+                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                return opt_linear.T.dot(rand_gradient(full_state).T)
+
+            grad_log_density = functools.partial(grad_log_density, self, self.opt_transform[0], self.randomization.gradient)
+
+            def log_density(query,
+                            opt_linear,
+                            rand_log_density,
+                            internal_state,
+                            opt_state):
+                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                return rand_log_density(full_state)
+
+            log_density = functools.partial(log_density, self, self.opt_transform[0], self.randomization.log_density)
+
+            self._sampler = optimization_sampler(self.observed_opt_state,
+                                                 self.observed_internal_state.copy(),
+                                                 self.score_transform,
+                                                 self.opt_transform,
+                                                 projection,
+                                                 grad_log_density,
+                                                 log_density)
+        return self._sampler
+
+    sampler = property(get_sampler, query.set_sampler)
+
     def form_VQLambda(self):
         nactive_groups = len(self.active_directions_list)
         nactive_vars = sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
@@ -307,7 +380,6 @@ def null(A, eps=1e-12):
 
         return self.VQLambda
 
-
     def derivative_logdet_jacobian(self, scalings):
         nactive_groups = len(self.active_directions_list)
         nactive_vars = np.sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
@@ -329,34 +401,34 @@ def derivative_logdet_jacobian(self, scalings):
     def setup_sampler(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
         pass
 
-    def projection(self, opt_state):
-        """
-        Full projection for Langevin.
-
-        The state here will be only the state of the optimization variables.
-        """
-
-        if not self._setup:
-            raise ValueError('setup_sampler should be called before using this function')
-
-        if ('subgradient' not in self.selection_variable and 
-            'scaling' not in self.selection_variable): # have not conditioned on any thing else
-            new_state = opt_state.copy() # not really necessary to copy
-            new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
-            new_state[self.subgrad_slice] = self.group_lasso_dual.bound_prox(opt_state[self.subgrad_slice])
-        elif ('subgradient' not in self.selection_variable and
-              'scaling' in self.selection_variable): # conditioned on the initial scalings
-                                                     # only the subgradient in opt_state
-            new_state = self.group_lasso_dual.bound_prox(opt_state)
-        elif ('subgradient' in self.selection_variable and
-              'scaling' not in self.selection_variable): # conditioned on the subgradient
-                                                         # only the scaling in opt_state
-            new_state = np.maximum(opt_state, 0)
-        else:
-            new_state = opt_state
-        return new_state
-
-    # optional things to condition on
+#     def projection(self, opt_state):
+#         """
+#         Full projection for Langevin.
+
+#         The state here will be only the state of the optimization variables.
+#         """
+
+#         if not self._setup:
+#             raise ValueError('setup_sampler should be called before using this function')
+
+#         if ('subgradient' not in self.selection_variable and 
+#             'scaling' not in self.selection_variable): # have not conditioned on any thing else
+#             new_state = opt_state.copy() # not really necessary to copy
+#             new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
+#             new_state[self.subgrad_slice] = self.group_lasso_dual.bound_prox(opt_state[self.subgrad_slice])
+#         elif ('subgradient' not in self.selection_variable and
+#               'scaling' in self.selection_variable): # conditioned on the initial scalings
+#                                                      # only the subgradient in opt_state
+#             new_state = self.group_lasso_dual.bound_prox(opt_state)
+#         elif ('subgradient' in self.selection_variable and
+#               'scaling' not in self.selection_variable): # conditioned on the subgradient
+#                                                          # only the scaling in opt_state
+#             new_state = np.maximum(opt_state, 0)
+#         else:
+#             new_state = opt_state
+#         return new_state
+
+#     # optional things to condition on
 
     def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=None):
         """
@@ -380,35 +452,30 @@ def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=N
         if not self._setup:
             raise ValueError('setup_sampler should be called before using this function')
 
-
         condition_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
         moving_inactive_groups = np.zeros_like(groups, dtype=bool)
         moving_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
-        self._inactive_groups = ~(self._active_groups+self._unpenalized)
+        _inactive_groups = ~(self._active_groups+self._unpenalized)
 
         inactive_marginal_groups = np.zeros_like(self._inactive, dtype=bool)
         limits_marginal_groups = np.zeros_like(self._inactive)
 
         for i, g in enumerate(groups):
-            if (self._inactive_groups[i]) and conditioning_groups[i]:
+            if (_inactive_groups[i]) and conditioning_groups[i]:
                 group = self.penalty.groups == g
                 condition_inactive_groups[i] = True
                 condition_inactive_variables[group] = True
-            elif (self._inactive_groups[i]) and (~conditioning_groups[i]) and (~marginalizing_groups[i]):
+            elif (_inactive_groups[i]) and (~conditioning_groups[i]) and (~marginalizing_groups[i]):
                 group = self.penalty.groups == g
                 moving_inactive_groups[i] = True
                 moving_inactive_variables[group] = True
-            if (self._inactive_groups[i]) and marginalizing_groups[i]:
+            if (_inactive_groups[i]) and marginalizing_groups[i]:
                 group = self.penalty.groups == g
                 inactive_marginal_groups[i] = True
                 limits_marginal_groups[i] = self.penalty.weights[g]
 
-        if inactive_marginal_groups is not None:
-            if inactive_marginal_groups.sum()>0:
-                self._marginalize_subgradient = True
-
-        self.inactive_marginal_groups = inactive_marginal_groups
-        self.limits_marginal_groups = limits_marginal_groups
+        inactive_marginal_groups = inactive_marginal_groups
+        limits_marginal_groups = limits_marginal_groups
 
         opt_linear, opt_offset = self.opt_transform
 
@@ -431,8 +498,6 @@ def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=N
                                                        moving_inactive_variables.sum())]
         observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive_variables]
 
-        self.observed_opt_state = observed_opt_state
-
         condition_linear = np.zeros((opt_linear.shape[0], (self._active_groups.sum() +
                                                            self._unpenalized_groups.sum() +
                                                            condition_inactive_variables.sum())))
@@ -445,14 +510,88 @@ def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=N
 
         new_offset = condition_linear[:,subgrad_condition_idx].dot(self.initial_subgrad[condition_inactive_variables]) + opt_offset
 
-        self.opt_transform = (new_linear, new_offset)
+        new_opt_transform = (new_linear, new_offset)
 
-        # for group LASSO this should not induce a bigger jacobian as
-        # the subgradients are in the interior of a ball
+        def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, inactive_marginal_groups):
+            return (np.divide(_pdf(full_state_plus) - _pdf(full_state_minus),
+                              _cdf(full_state_plus) - _cdf(full_state_minus)))[inactive_marginal_groups]
 
-        self.selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
+        def new_grad_log_density(query, 
+                                 limits_marginal_groups,
+                                 inactive_marginal_groups,
+                                 _cdf,
+                                 _pdf,
+                                 opt_linear,
+                                 deriv_log_dens,
+                                 internal_state, 
+                                 opt_state):
 
-        self.num_opt_var = new_linear.shape[1]
+            full_state = reconstruct_full_from_internal(new_opt_transform, query.score_transform, internal_state, opt_state)
+
+            p = query.penalty.shape[0]
+            weights = np.zeros(p)
+
+            if inactive_marginal_groups.sum()>0:
+                full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
+                full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
+                weights[inactive_marginal_groups] = _fraction(_cdf, _pdf, full_state_plus, full_state_minus, inactive_marginal_groups)
+            weights[~inactive_marginal_groups] = deriv_log_dens(full_state)[~inactive_marginal_groups]
+            return -opt_linear.T.dot(weights)
+
+        new_grad_log_density = functools.partial(new_grad_log_density,
+                                                 self,
+                                                 limits_marginal_groups,
+                                                 inactive_marginal_groups,
+                                                 self.randomization._cdf,
+                                                 self.randomization._pdf,
+                                                 new_opt_transform[0],
+                                                 self.randomization._derivative_log_density)
+
+        def new_log_density(query, 
+                            limits_marginal_groups,
+                            inactive_marginal_groups,
+                            _cdf,
+                            _pdf,
+                            opt_linear,
+                            log_dens,
+                            internal_state, 
+                            opt_state):
+
+            full_state = reconstruct_full_from_internal(new_opt_transform, query.score_transform, internal_state, opt_state)
+            full_state = np.atleast_2d(full_state)
+            p = query.penalty.shape[0]
+            dens = 0
+
+            if inactive_marginal_groups.sum()>0:
+                full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
+                full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
+                dens += np.log(_cdf(full_state_plus) - _cdf(full_state_minus)).sum()
+
+            dens += log_dens(full_state[:,~inactive_marginal_groups])
+            return np.squeeze(dens) # should this be negative to match the gradient log density?
+
+        new_log_density = functools.partial(new_log_density,
+                                            self,
+                                            limits_marginal_groups,
+                                            inactive_marginal_groups,
+                                            self.randomization._cdf,
+                                            self.randomization._pdf,
+                                            self.opt_transform[0],
+                                            self.randomization._log_density)
+
+        new_projection = lambda opt: opt # this is wrong, but I am running a smoke test first
+
+        new_selection_variable = copy(self.selection_variable)
+        new_selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
+
+        self.sampler = optimization_sampler(observed_opt_state,
+                                            self.observed_internal_state.copy(),
+                                            self.score_transform,
+                                            new_opt_transform,
+                                            new_projection,
+                                            new_grad_log_density,
+                                            new_log_density,
+                                            selection_info=(self, new_selection_variable))
 
     def condition_on_scalings(self):
         """
@@ -478,42 +617,41 @@ def condition_on_scalings(self):
         self.scaling_slice = np.zeros(new_linear.shape[1], np.bool)
         self.num_opt_var = new_linear.shape[1]
 
+#     def grad_log_density(self, internal_state, opt_state):
+#         """
+#             marginalizing over the sub-gradient
 
-    def grad_log_density(self, internal_state, opt_state):
-        """
-            marginalizing over the sub-gradient
-
-            full_state is 
-            density should be expressed in terms of opt_state coordinates
-        """
+#             full_state is 
+#             density should be expressed in terms of opt_state coordinates
+#         """
 
-        if not self._setup:
-            raise ValueError('setup_sampler should be called before using this function')
+#         if not self._setup:
+#             raise ValueError('setup_sampler should be called before using this function')
 
-        if self._marginalize_subgradient:
+#         if self._marginalize_subgradient:
 
-            full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
+#             full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
 
-            p = self.penalty.shape[0]
-            weights = np.zeros(p)
+#             p = self.penalty.shape[0]
+#             weights = np.zeros(p)
 
-            if self.inactive_marginal_groups.sum()>0:
-                full_state_plus = full_state + np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
-                full_state_minus = full_state - np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
+#             if self.inactive_marginal_groups.sum()>0:
+#                 full_state_plus = full_state + np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
+#                 full_state_minus = full_state - np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
 
 
-            def fraction(full_state_plus, full_state_minus, inactive_marginal_groups):
-                return (np.divide(self.randomization._pdf(full_state_plus) - self.randomization._pdf(full_state_minus),
-                       self.randomization._cdf(full_state_plus) - self.randomization._cdf(full_state_minus)))[inactive_marginal_groups]
+#             def fraction(full_state_plus, full_state_minus, inactive_marginal_groups):
+#                 return (np.divide(self.randomization._pdf(full_state_plus) - self.randomization._pdf(full_state_minus),
+#                        self.randomization._cdf(full_state_plus) - self.randomization._cdf(full_state_minus)))[inactive_marginal_groups]
 
-            if self.inactive_marginal_groups.sum() > 0:
-                weights[self.inactive_marginal_groups] = fraction(full_state_plus, full_state_minus, self.inactive_marginal_groups)
-            weights[~self.inactive_marginal_groups] = self.randomization._derivative_log_density(full_state)[~self.inactive_marginal_groups]
+#             if self.inactive_marginal_groups.sum() > 0:
+#                 weights[self.inactive_marginal_groups] = fraction(full_state_plus, full_state_minus, self.inactive_marginal_groups)
+#             weights[~self.inactive_marginal_groups] = self.randomization._derivative_log_density(full_state)[~self.inactive_marginal_groups]
 
-            opt_linear = self.opt_transform[0]
-            return -opt_linear.T.dot(weights)
-        else:
-            return query.grad_log_density(self, internal_state, opt_state)
+#             opt_linear = self.opt_transform[0]
+#             return -opt_linear.T.dot(weights)
+#         else:
+#             return query.grad_log_density(self, internal_state, opt_state)
 
 def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
     """
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 85aaf13b0..d5def8e9e 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -16,7 +16,7 @@
                   glm_parametric_covariance,
                   pairs_bootstrap_glm)
 from .randomization import randomization
-from .query import multiple_queries, optimization_sampler
+from .query import multiple_queries
 from .M_estimator import restricted_Mest
 
 class lasso(object):
@@ -152,12 +152,9 @@ def decompose_subgradient(self,
 
         if not hasattr(self, "_view"):
             raise ValueError("fit method should be run first")
-
-        self._view.decompose_subgradient(conditioning_groups=conditioning_groups,
+        self._view.decompose_subgradient(conditioning_groups=conditioning_groups, 
                                          marginalizing_groups=marginalizing_groups)
 
-        self._queries.setup_opt_state()
-
     def summary(self,
                 selected_features,
                 null_value=None,
@@ -199,8 +196,8 @@ def summary(self,
         if null_value is None:
             null_value = np.zeros(self.loglike.shape[0])
 
-        self._queries.setup_sampler(form_covariances=None)
-        self._queries.setup_opt_state()
+        #self._queries.setup_sampler(form_covariances=None)
+        #self._queries.setup_opt_state()
 
         unpenalized_mle = restricted_Mest(self.loglike, selected_features)
 
@@ -224,13 +221,7 @@ def summary(self,
                 target_cov, score_cov = form_covariances(target_info,  
                                                          cross_terms=[cov_info])
 
-            opt_samplers.append(optimization_sampler(q.observed_opt_state,
-                                                     q.observed_internal_state,
-                                                     q.score_transform,
-                                                     q.opt_transform,
-                                                     q.projection,
-                                                     q.grad_log_density,
-                                                     q.log_density))
+            opt_samplers.append(q.sampler)
 
         opt_samples = [opt_sampler.sample(ndraw,
                                           burnin) for opt_sampler in opt_samplers]
diff --git a/selection/randomized/greedy_step.py b/selection/randomized/greedy_step.py
index 896616a91..86b3da405 100644
--- a/selection/randomized/greedy_step.py
+++ b/selection/randomized/greedy_step.py
@@ -1,8 +1,10 @@
+import functools
 import numpy as np
 import regreg.api as rr
 
-from .query import query
+from .query import query, optimization_sampler
 from .M_estimator import restricted_Mest
+from .reconstruction import reconstruct_full_from_internal
 
 class greedy_score_step(query):
 
@@ -124,7 +126,7 @@ def solve(self, nboot=2000):
         self.nboot = nboot
         self.ndim = self.loss.shape[0]
 
-    def setup_sampler(self):
+        # setup opt state and transforms
 
         self.observed_opt_state = np.hstack([self.observed_subgradients,
                                              self.observed_scaling])
@@ -142,11 +144,49 @@ def setup_sampler(self):
         self._solved = True
         self._setup = True
 
-    def projection(self, opt_state):
-        """
-        Full projection for Langevin.
-
-        The state here will be only the state of the optimization variables.
-        """
-        return self.group_lasso_dual_epigraph.cone_prox(opt_state)
 
+    def setup_sampler(self):
+        pass
+
+    def get_sampler(self):
+        # now setup optimization sampler
+
+        if not hasattr(self, "_sampler"):
+            def projection(epigraph, opt_state):
+                """
+                Full projection for Langevin.
+
+                The state here will be only the state of the optimization variables.
+                """
+                return epigraph.cone_prox(opt_state)
+            projection = functools.partial(projection, self.group_lasso_dual_epigraph)
+
+            def grad_log_density(query,
+                                 opt_linear,
+                                 rand_gradient,
+                                 internal_state,
+                                 opt_state):
+                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                return opt_linear.T.dot(rand_gradient(full_state))
+
+            grad_log_density = functools.partial(grad_log_density, self, self.opt_transform[0], self.randomization.gradient)
+
+            def log_density(query,
+                            opt_linear,
+                            rand_log_density,
+                            internal_state,
+                            opt_state):
+                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                return rand_log_density(full_state)
+            log_density = functools.partial(log_density, self, self.opt_transform[0], self.randomization.log_density)
+
+            self._sampler = optimization_sampler(self.observed_opt_state,
+                                                 self.observed_internal_state.copy(),
+                                                 self.score_transform,
+                                                 self.opt_transform,
+                                                 projection,
+                                                 grad_log_density,
+                                                 log_density)
+        return self._sampler
+
+    sampler = property(get_sampler, query.set_sampler)
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index d5c139b91..58a7051af 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -10,8 +10,7 @@
 from ..sampling.langevin import projected_langevin
 from .target import (targeted_sampler,
                      bootstrapped_target_sampler)
-from .reconstruction import (reconstruct_opt,
-                             reconstruct_full_from_internal)
+from .reconstruction import reconstruct_full_from_internal
 
 
 class query(object):
@@ -62,40 +61,49 @@ def linear_decomposition(self, target_score_cov, target_cov, observed_target_sta
 
         return (composition_linear_part, composition_offset)
 
-    # the default log conditional density of state given data 
-    # with no conditioning or marginalizing
+    def get_sampler(self):
+        if hasattr(self, "_sampler"):
+            return self._sampler
 
-    def log_density(self, internal_state, opt_state):
-        full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
-        return self.randomization.log_density(full_state)
+    def set_sampler(self, sampler):
+        self._sampler = sampler
 
-    def grad_log_density(self, internal_state, opt_state):
-        """
-        Gradient in opt_state coordinates
-        """
-        full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
-        opt_linear = self.opt_transform[0]
-        return opt_linear.T.dot(self.randomization.gradient(full_state))
+    sampler = property(get_sampler, set_sampler)
 
-     # implemented by subclasses
+    # implemented by subclasses
 
-    def grad_log_jacobian(self, opt_state):
-        """
-        log_jacobian depends only on data through
-        Hessian at \bar{\beta}_E which we
-        assume is close to Hessian at \bar{\beta}_E^*
-        """
-        # needs to be implemented for group lasso
-        return self.derivative_logdet_jacobian(opt_state[self.scaling_slice])
+    # the default log conditional density of state given data 
+    # with no conditioning or marginalizing
 
-    def jacobian(self, opt_state):
-        """
-        log_jacobian depends only on data through
-        Hessian at \bar{\beta}_E which we
-        assume is close to Hessian at \bar{\beta}_E^*
-        """
-        # needs to be implemented for group lasso
-        return 1.
+#     def log_density(self, internal_state, opt_state):
+#         full_state = reconstruct_full_from_internal(self.opt_transform, self.score_transform, internal_state, opt_state)
+#         return self.randomization.log_density(full_state)
+
+#     def grad_log_density(self, internal_state, opt_state):
+#         """
+#         Gradient in opt_state coordinates
+#         """
+#         full_state = reconstruct_full_from_internal(self.opt_transform, self.score_transform, internal_state, opt_state)
+#         opt_linear = self.opt_transform[0]
+#         return opt_linear.T.dot(self.randomization.gradient(full_state))
+
+#     def grad_log_jacobian(self, opt_state):
+#         """
+#         log_jacobian depends only on data through
+#         Hessian at \bar{\beta}_E which we
+#         assume is close to Hessian at \bar{\beta}_E^*
+#         """
+#         # needs to be implemented for group lasso
+#         return self.derivative_logdet_jacobian(opt_state[self.scaling_slice])
+
+#     def jacobian(self, opt_state):
+#         """
+#         log_jacobian depends only on data through
+#         Hessian at \bar{\beta}_E which we
+#         assume is close to Hessian at \bar{\beta}_E^*
+#         """
+#         # needs to be implemented for group lasso
+#         return 1.
 
     def solve(self):
 
@@ -298,7 +306,8 @@ def __init__(self,
                  opt_transform,
                  projection,
                  grad_log_density,
-                 log_density):
+                 log_density,
+                 selection_info=None):
 
         '''
         Parameters
@@ -331,6 +340,7 @@ def __init__(self,
         self.projection = projection
         self.gradient = lambda opt: - grad_log_density(self.observed_internal_state, opt)
         self.log_density = log_density
+        self.selection_info = selection_info # a way to record what view and what was conditioned on -- not used in calculations
 
     def sample(self, ndraw, burnin, stepsize=None):
         '''
diff --git a/selection/randomized/reconstruction.py b/selection/randomized/reconstruction.py
index dc827aa73..9e790395d 100644
--- a/selection/randomized/reconstruction.py
+++ b/selection/randomized/reconstruction.py
@@ -28,48 +28,42 @@ def reconstruct_internal(data_state, data_transform):
     else:
         return np.squeeze(data_offset)
 
-def reconstruct_full_from_data(query, data_state, data_transform, opt_state):
+def reconstruct_full_from_data(opt_transform, score_transform, data_state, data_transform, opt_state):
     """
     Reconstruct original randomization state from state data
     and optimization state.
     """
 
-    if not query._setup:
-        raise ValueError('setup_sampler should be called before using this function')
-
     internal_state = reconstruct_internal(data_state, data_transform)
-    return np.squeeze(reconstruct_full_from_internal(query, internal_state, opt_state))
+    return np.squeeze(reconstruct_full_from_internal(opt_transform, score_transform, internal_state, opt_state))
 
-def reconstruct_opt(query, opt_state):
+def reconstruct_opt(opt_transform, opt_state):
     """
     Reconstruct part of the original randomization state 
     in terms of optimization state.
     """
-    if not query._setup:
-        raise ValueError('setup_sampler should be called on query before using this function')
-
-    opt_linear, opt_offset = query.opt_transform
+    opt_linear, opt_offset = opt_transform
     if opt_linear is not None:
         opt_state = np.atleast_2d(opt_state)
         return np.squeeze(opt_linear.dot(opt_state.T) + opt_offset[:, None]).T
     else:
         return opt_offset
 
-def reconstruct_score(query, internal_state):
+def reconstruct_score(score_transform, internal_state):
     """
     Reconstruct part of the original randomization state 
     determined by the score of the loss from 
     a query's internal coordinates.
     """
-    score_linear, score_offset = query.score_transform
+    score_linear, score_offset = score_transform
     return score_linear.dot(internal_state.T).T + score_offset
 
-def reconstruct_full_from_internal(query, internal_state, opt_state):
+def reconstruct_full_from_internal(opt_transform, score_transform, internal_state, opt_state):
     """
     Reconstruct original randomization state from internal state data
     and optimization state.
     """
-    randomization_internal = reconstruct_score(query, internal_state)
-    randomization_opt = reconstruct_opt(query, opt_state)
+    randomization_internal = reconstruct_score(score_transform, internal_state)
+    randomization_opt = reconstruct_opt(opt_transform, opt_state)
     return randomization_internal + randomization_opt
 
diff --git a/selection/randomized/target.py b/selection/randomized/target.py
index 6513ff435..a5f68bbf5 100644
--- a/selection/randomized/target.py
+++ b/selection/randomized/target.py
@@ -211,7 +211,8 @@ def gradient(self, state):
 
         for i in range(self.nqueries):
 
-            randomization_state = reconstruct_full_from_data(self.objectives[i],
+            randomization_state = reconstruct_full_from_data(self.objectives[i].opt_transform,
+                                                             self.objectives[i].score_transform,
                                                              target_state, 
                                                              self.target_transform[i], 
                                                              opt_state[self.opt_slice[i]])
@@ -506,7 +507,8 @@ def reconstruct(self, state):
         reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
 
         for i in range(self.nqueries):
-            reconstructed[:, self.randomization_slice[i]] = reconstruct_full_from_data(self.objectives[i],
+            reconstructed[:, self.randomization_slice[i]] = reconstruct_full_from_data(self.objectives[i].opt_transform,
+                                                                                       self.objectives[i].score_transform,
                                                                                        target_state,
                                                                                        self.target_transform[i],
                                                                                        opt_state[:, self.opt_slice[i]])
@@ -590,7 +592,8 @@ def gradient(self, state):
 
         for i in range(self.nqueries):
 
-            randomization_state = reconstruct_full_from_data(self.objectives[i],
+            randomization_state = reconstruct_full_from_data(self.objectives[i].opt_transform,
+                                                             self.objectives[i].score_transform,
                                                              boot_state, 
                                                              self.boot_transform[i], 
                                                              opt_state[self.opt_slice[i]])
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index c0d6c7f91..9d4517396 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -27,7 +27,7 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
                                                             [False, True]):
 
         inst, const = const_info
-        X, Y = inst(n=10, p=20, signal=1, s=3)[:2]
+        X, Y = inst(n=100, p=120, signal=0.1, s=3)[:2]
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 20
diff --git a/selection/randomized/tests/test_optimization_sampler.py b/selection/randomized/tests/test_optimization_sampler.py
index 0bf44cfc6..e82efd7bf 100644
--- a/selection/randomized/tests/test_optimization_sampler.py
+++ b/selection/randomized/tests/test_optimization_sampler.py
@@ -66,13 +66,7 @@ def test_optimization_sampler(ndraw=1000, burnin=200):
                                                      cross_terms=[cov_info],
                                                      nsample=q.nboot)
 
-            opt_samplers.append(optimization_sampler(q.observed_opt_state,
-                                                     q.observed_internal_state,
-                                                     q.score_transform,
-                                                     q.opt_transform,
-                                                     q.projection,
-                                                     q.grad_log_density,
-                                                     q.log_density))
+            opt_samplers.append(q.sampler)
 
         for opt_sampler in opt_samplers:
             S = opt_sampler.sample(ndraw,
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index 34608a2cc..a1f44fdf1 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -173,13 +173,13 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
         selected_features = conv._view.selection_variable['variables']
         q = conv._view
 
-        opt_sampler = optimization_sampler(q.observed_opt_state,
-                                           q.observed_internal_state,
-                                           q.score_transform,
-                                           q.opt_transform,
-                                           q.projection,
-                                           q.grad_log_density,
-                                           q.log_density)
+        opt_sampler = q.sampler # optimization_sampler(q.observed_opt_state,
+#                                            q.observed_internal_state,
+#                                            q.score_transform,
+#                                            q.opt_transform,
+#                                            q.projection,
+#                                            q.grad_log_density,
+#                                            q.log_density)
 
         S = opt_sampler.sample(ndraw,
                                burnin,
@@ -229,7 +229,8 @@ def reconstruct_opt(query, state):
     if state.ndim > 2:
         raise ValueError('expecting at most 2-dimensional array')
 
-    reconstructed = reconstruct_full_from_internal(query,
+    reconstructed = reconstruct_full_from_internal(query.opt_transform,
+                                                   query.score_transform,
                                                    query.observed_internal_state,
                                                    state)
 

From 141e854191e117e8ec92e411fe233e3ec8f9f698 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 17:32:21 -0700
Subject: [PATCH 246/617] removing projection and density methods from query --
 these will be held in its sampler property

---
 selection/randomized/query.py | 37 -----------------------------------
 1 file changed, 37 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 58a7051af..dfc432fd8 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -72,39 +72,6 @@ def set_sampler(self, sampler):
 
     # implemented by subclasses
 
-    # the default log conditional density of state given data 
-    # with no conditioning or marginalizing
-
-#     def log_density(self, internal_state, opt_state):
-#         full_state = reconstruct_full_from_internal(self.opt_transform, self.score_transform, internal_state, opt_state)
-#         return self.randomization.log_density(full_state)
-
-#     def grad_log_density(self, internal_state, opt_state):
-#         """
-#         Gradient in opt_state coordinates
-#         """
-#         full_state = reconstruct_full_from_internal(self.opt_transform, self.score_transform, internal_state, opt_state)
-#         opt_linear = self.opt_transform[0]
-#         return opt_linear.T.dot(self.randomization.gradient(full_state))
-
-#     def grad_log_jacobian(self, opt_state):
-#         """
-#         log_jacobian depends only on data through
-#         Hessian at \bar{\beta}_E which we
-#         assume is close to Hessian at \bar{\beta}_E^*
-#         """
-#         # needs to be implemented for group lasso
-#         return self.derivative_logdet_jacobian(opt_state[self.scaling_slice])
-
-#     def jacobian(self, opt_state):
-#         """
-#         log_jacobian depends only on data through
-#         Hessian at \bar{\beta}_E which we
-#         assume is close to Hessian at \bar{\beta}_E^*
-#         """
-#         # needs to be implemented for group lasso
-#         return 1.
-
     def solve(self):
 
         raise NotImplementedError('abstract method')
@@ -123,10 +90,6 @@ def setup_sampler(self):
         """
         raise NotImplementedError('abstract method -- only keyword arguments')
 
-    def projection(self, opt_state):
-
-        raise NotImplementedError('abstract method -- projection of optimization variables')
-
 class multiple_queries(object):
 
     '''

From cb57979a56035472df62ee8399944be97c1d23b2 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 17:44:15 -0700
Subject: [PATCH 247/617] WIP: threshold_score has a sampler that it doesn't
 need...

---
 selection/randomized/query.py           |   2 +-
 selection/randomized/threshold_score.py | 129 +++++++++++++++++-------
 2 files changed, 93 insertions(+), 38 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index dfc432fd8..7648bf6b2 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -331,7 +331,7 @@ def sample(self, ndraw, burnin, stepsize=None):
         '''
 
         if stepsize is None:
-            stepsize = 1./len(self.observed_opt_state) 
+            stepsize = 1./max(len(self.observed_opt_state), 1)
 
         target_langevin = projected_langevin(self.observed_opt_state.copy(),
                                              self.gradient,
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index 829bf6f42..62882d841 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -1,7 +1,9 @@
+import functools
+
 import numpy as np
 import regreg.api as rr
 
-from .query import query
+from .query import query, optimization_sampler
 from .reconstruction import reconstruct_full_from_internal
 from .M_estimator import restricted_Mest
 
@@ -125,35 +127,6 @@ def solve(self, nboot=2000):
         self.nboot = nboot
         self.ndim = self.loss.shape[0]
 
-    def grad_log_density(self, internal_state, opt_state):
-        """
-        marginalizing over the sub-gradient
-        """
-
-        if not self._setup:
-            raise ValueError('setup_sampler should be called before using this function')
-
-        full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
-
-        threshold = self.threshold
-        weights = np.zeros_like(self.boundary, np.float)
-
-        weights[self.boundary] = ((self.randomization._density(threshold[self.boundary] - full_state[self.boundary])
-                                   - self.randomization._density(-threshold[self.boundary] - full_state[self.boundary])) /
-                                  (1 - self.randomization._cdf(threshold[self.boundary] - full_state[self.boundary]) + 
-                                   self.randomization._cdf(-threshold[self.boundary] - full_state[self.boundary])))
-
-
-        weights[~self.boundary] = ((-self.randomization._density(threshold[~self.boundary] - 
-                                                                 full_state[~self.boundary]) + 
-                                     self.randomization._density(-threshold[~self.boundary] - full_state[~self.boundary])) /
-                                   (self.randomization._cdf(threshold[~self.boundary] - full_state[~self.boundary]) - 
-                                    self.randomization._cdf(-threshold[~self.boundary] - full_state[~self.boundary])))
-
-        return weights ## tested
-
-    def setup_sampler(self):
-
         # must set observed_opt_state, opt_transform and score_transform
 
         p = self.boundary.shape[0]  # shorthand
@@ -165,11 +138,93 @@ def setup_sampler(self):
 
         self._setup = True
 
-    def projection(self, opt_state):
-        """
-        Full projection for Langevin.
-        The state here will be only the state of the optimization variables.
-        for now, groups are singletons
-        """
-        return opt_state
+    def get_sampler(self):
+
+        if not hasattr(self, "_sampler"):
+
+            def grad_log_density(boundary, 
+                                 opt_transform,
+                                 score_transform,
+                                 threshold,
+                                 _density,
+                                 _cdf,
+                                 internal_state, 
+                                 opt_state):
+                """
+                marginalizing over the sub-gradient
+                """
+
+                full_state = reconstruct_full_from_internal(opt_transform, score_transform, internal_state, opt_state)
+
+                weights = np.zeros_like(boundary, np.float)
+
+                weights[boundary] = ((_density(threshold[boundary] - full_state[boundary])
+                                           - _density(-threshold[boundary] - full_state[boundary])) /
+                                          (1 - _cdf(threshold[boundary] - full_state[boundary]) + 
+                                           _cdf(-threshold[boundary] - full_state[boundary])))
+
+
+                weights[~boundary] = ((-_density(threshold[~boundary] - 
+                                                 full_state[~boundary]) + 
+                                             _density(-threshold[~boundary] - full_state[~boundary])) /
+                                           (_cdf(threshold[~boundary] - full_state[~boundary]) - 
+                                            _cdf(-threshold[~boundary] - full_state[~boundary])))
+
+                opt_linear = opt_transform[0]
+                return opt_linear.T.dot(weights) ## tested
+
+            grad_log_density = functools.partial(grad_log_density,
+                                                 self.boundary,
+                                                 self.opt_transform,
+                                                 self.score_transform,
+                                                 self.threshold,
+                                                 self.randomization._density,
+                                                 self.randomization._cdf)
+
+            def log_density(boundary, 
+                            opt_transform,
+                            score_transform,
+                            threshold,
+                            _density,
+                            _cdf,
+                            internal_state, 
+                            opt_state):
+                """
+                marginalizing over the sub-gradient
+                """
+
+                full_state = reconstruct_full_from_internal(opt_transform, score_transform, internal_state, opt_state)
+                logdens = 0
+                weights = np.zeros_like(boundary, np.float)
+
+                logdens += np.log(1 - _cdf(threshold[boundary] - full_state[boundary]) + 
+                                  _cdf(-threshold[boundary] - full_state[boundary]))
+                logdens += np.log(_cdf(threshold[~boundary] - full_state[~boundary]) - 
+                                   _cdf(-threshold[~boundary] - full_state[~boundary]))
+                return logdens
+            
+
+            log_density = functools.partial(log_density,
+                                            self.boundary,
+                                            self.opt_transform,
+                                            self.score_transform,
+                                            self.threshold,
+                                            self.randomization._density,
+                                            self.randomization._cdf)
+            projection = lambda opt: opt
+
+            self._sampler = optimization_sampler(self.observed_opt_state,
+                                                 self.observed_internal_state.copy(),
+                                                 self.score_transform,
+                                                 self.opt_transform,
+                                                 projection,
+                                                 grad_log_density,
+                                                 log_density)
+        return self._sampler
+
+    sampler = property(get_sampler, query.set_sampler)
+
+    def setup_sampler(self):
+        pass
+
 

From 8c7286686c7f8b28f07ca5403905fe161cfe5dc7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 22:47:56 -0700
Subject: [PATCH 248/617] now views can have no opt variables and sampling
 works -- TODO need the correct projection for M_estimator

---
 selection/randomized/M_estimator.py     |  8 ++--
 selection/randomized/query.py           | 52 +++++++++++++--------
 selection/randomized/threshold_score.py | 61 ++++++-------------------
 3 files changed, 50 insertions(+), 71 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 08527866a..9460aeb5c 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -560,15 +560,15 @@ def new_log_density(query,
             full_state = reconstruct_full_from_internal(new_opt_transform, query.score_transform, internal_state, opt_state)
             full_state = np.atleast_2d(full_state)
             p = query.penalty.shape[0]
-            dens = 0
+            logdens = 0
 
             if inactive_marginal_groups.sum()>0:
                 full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
                 full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
-                dens += np.log(_cdf(full_state_plus) - _cdf(full_state_minus)).sum()
+                logdens += np.log(_cdf(full_state_plus) - _cdf(full_state_minus)).sum()
 
-            dens += log_dens(full_state[:,~inactive_marginal_groups])
-            return np.squeeze(dens) # should this be negative to match the gradient log density?
+            logdens += log_dens(full_state[:,~inactive_marginal_groups])
+            return np.squeeze(logdens) # should this be negative to match the gradient log density?
 
         new_log_density = functools.partial(new_log_density,
                                             self,
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 7648bf6b2..4838410e4 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -283,19 +283,6 @@ def __init__(self,
            to reflect only what is needed.)
         '''
 
-        # sampler will draw samples for bootstrap
-        # these are arguments to target_info and score_bootstrap
-        # nonparamteric bootstrap is np.random.choice(n, size=(n,), replace=True)
-        # residual bootstrap might be X_E.dot(\bar{\beta}_E)
-        # + np.random.choice(resid, size=(n,), replace=True)
-
-        # if target_set is not None, we assume that
-        # these coordinates (specified by a list of coordinates) of target
-        # is assumed to be independent of the rest
-        # the corresponding block of `target_cov` is zeroed out
-
-        # make sure we setup the queries
-
         self.observed_opt_state = observed_opt_state.copy()
         self.observed_internal_state = observed_internal_state.copy()
         self.score_linear, self.score_offset = score_transform
@@ -330,6 +317,9 @@ def sample(self, ndraw, burnin, stepsize=None):
         gradient : np.float
         '''
 
+        if self.observed_opt_state.shape in ((), (0,)): # no opt variables to sample:
+            return None
+
         if stepsize is None:
             stepsize = 1./max(len(self.observed_opt_state), 1)
 
@@ -462,9 +452,11 @@ def confidence_intervals(self,
 
         if sample is None:
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
+        else:
+            ndraw = sample.shape[0]
 
         _intervals = optimization_intervals([(self, sample, target_cov, score_cov)],
-                                            observed_target)
+                                            observed_target, ndraw)
 
         limits = []
 
@@ -523,12 +515,14 @@ def coefficient_pvalues(self,
 
         if sample is None:
             sample = self.sample(ndraw, burnin, stepsize=stepsize)
+        else:
+            ndraw = sample.shape[0]
 
         if parameter is None:
             parameter = np.zeros(observed_target.shape[0])
 
         _intervals = optimization_intervals([(self, sample, target_cov, score_cov)],
-                                            observed_target)
+                                            observed_target, ndraw)
         pvals = []
 
         for i in range(observed_target.shape[0]):
@@ -556,11 +550,30 @@ def crude_lipschitz(self):
 class optimization_intervals(object):
 
     def __init__(self,
-                 opt_sampling_info, # a sequence of (opt_sampler, opt_sample) objects
+                 opt_sampling_info, # a sequence of (opt_sampler, opt_sample, target_cov, score_cov) objects
+                                    # in theory all target_cov should be about the same...
                  observed,
+                 nsample, # how large a normal sample
                  target_cov=None):
 
-        self.opt_sampling_info = opt_sampling_info
+        # not all opt_samples will be of the same size as nsample 
+        # let's repeat them as necessary
+        
+        tiled_sampling_info = []
+        for opt_sampler, opt_sample, t_cov, score_cov in opt_sampling_info: 
+            if opt_sample is not None:
+                if opt_sample.shape[0] < nsample:
+                    if opt_sample.ndim == 1:
+                        tiled_opt_sample = np.tile(opt_sample, np.ceil(nsample / opt_sample.shape[0]))[:nsample]
+                    else:
+                        tiled_opt_sample = np.tile(opt_sample, (np.ceil(nsample / opt_sample.shape[0]), 1))[:nsample]
+                else:
+                    tiled_opt_sample = opt_sample[:nsample]
+            else:
+                tiled_sample = None
+            tiled_sampling_info.append((opt_sampler, opt_sample, t_cov, score_cov))
+
+        self.opt_sampling_info = tiled_sampling_info
         self._logden = 0
         for opt_sampler, opt_sample, _, _ in opt_sampling_info:
             self._logden += opt_sampler.log_density(opt_sampler.observed_internal_state, opt_sample)
@@ -569,12 +582,13 @@ def __init__(self,
 
         if target_cov is None:
             self.target_cov = 0
-            for opt_sampler, opt_sample, target_cov, _ in opt_sampling_info:
+            for _, _, target_cov, _ in opt_sampling_info:
                 self.target_cov += target_cov
             self.target_cov /= len(opt_sampling_info)
+
         self._normal_sample = np.random.multivariate_normal(mean=np.zeros(self.target_cov.shape[0]), 
                                                             cov=self.target_cov, 
-                                                            size=(opt_sample.shape[0],))
+                                                            size=(nsample,))
 
     def pivot(self,
               linear_func,
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index 62882d841..de6cac4f8 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -4,7 +4,7 @@
 import regreg.api as rr
 
 from .query import query, optimization_sampler
-from .reconstruction import reconstruct_full_from_internal
+from .reconstruction import reconstruct_full_from_internal, reconstruct_score
 from .M_estimator import restricted_Mest
 
 class threshold_score(query):
@@ -142,47 +142,7 @@ def get_sampler(self):
 
         if not hasattr(self, "_sampler"):
 
-            def grad_log_density(boundary, 
-                                 opt_transform,
-                                 score_transform,
-                                 threshold,
-                                 _density,
-                                 _cdf,
-                                 internal_state, 
-                                 opt_state):
-                """
-                marginalizing over the sub-gradient
-                """
-
-                full_state = reconstruct_full_from_internal(opt_transform, score_transform, internal_state, opt_state)
-
-                weights = np.zeros_like(boundary, np.float)
-
-                weights[boundary] = ((_density(threshold[boundary] - full_state[boundary])
-                                           - _density(-threshold[boundary] - full_state[boundary])) /
-                                          (1 - _cdf(threshold[boundary] - full_state[boundary]) + 
-                                           _cdf(-threshold[boundary] - full_state[boundary])))
-
-
-                weights[~boundary] = ((-_density(threshold[~boundary] - 
-                                                 full_state[~boundary]) + 
-                                             _density(-threshold[~boundary] - full_state[~boundary])) /
-                                           (_cdf(threshold[~boundary] - full_state[~boundary]) - 
-                                            _cdf(-threshold[~boundary] - full_state[~boundary])))
-
-                opt_linear = opt_transform[0]
-                return opt_linear.T.dot(weights) ## tested
-
-            grad_log_density = functools.partial(grad_log_density,
-                                                 self.boundary,
-                                                 self.opt_transform,
-                                                 self.score_transform,
-                                                 self.threshold,
-                                                 self.randomization._density,
-                                                 self.randomization._cdf)
-
             def log_density(boundary, 
-                            opt_transform,
                             score_transform,
                             threshold,
                             _density,
@@ -193,25 +153,30 @@ def log_density(boundary,
                 marginalizing over the sub-gradient
                 """
 
-                full_state = reconstruct_full_from_internal(opt_transform, score_transform, internal_state, opt_state)
+                score_state = np.atleast_2d(reconstruct_score(score_transform, internal_state))
                 logdens = 0
                 weights = np.zeros_like(boundary, np.float)
 
-                logdens += np.log(1 - _cdf(threshold[boundary] - full_state[boundary]) + 
-                                  _cdf(-threshold[boundary] - full_state[boundary]))
-                logdens += np.log(_cdf(threshold[~boundary] - full_state[~boundary]) - 
-                                   _cdf(-threshold[~boundary] - full_state[~boundary]))
+                logdens += np.log(1 - _cdf(threshold[boundary] - score_state[:, boundary]) + 
+                                  _cdf(-threshold[boundary] - score_state[:, boundary])).sum()
+                logdens += np.log(_cdf(threshold[~boundary] - score_state[:, ~boundary]) - 
+                                   _cdf(-threshold[~boundary] - score_state[:, ~boundary])).sum()
                 return logdens
             
 
             log_density = functools.partial(log_density,
                                             self.boundary,
-                                            self.opt_transform,
                                             self.score_transform,
                                             self.threshold,
                                             self.randomization._density,
                                             self.randomization._cdf)
-            projection = lambda opt: opt
+
+            # the gradient and projection are used for 
+            # Langevin sampling of opt variables
+            # but this view has no opt variables
+
+            grad_log_density = None
+            projection = None
 
             self._sampler = optimization_sampler(self.observed_opt_state,
                                                  self.observed_internal_state.copy(),

From e6960dc09609fec9e2076395ba4e25833ddcd8e5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Sep 2017 23:30:17 -0700
Subject: [PATCH 249/617] finished new_projection

---
 selection/randomized/M_estimator.py           | 25 ++++++++++++++-----
 .../randomized/tests/test_convenience.py      |  2 +-
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 9460aeb5c..1ee877b64 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -167,7 +167,8 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         # we are implicitly assuming that
         # loss is a pairs model
 
-        _sqrt_scaling = np.sqrt(scaling)
+        self.scaling = scaling
+        _sqrt_scaling = np.sqrt(self.scaling)
 
         _beta_unpenalized = restricted_Mest(loss, overall, solve_args=solve_args)
 
@@ -474,9 +475,6 @@ def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=N
                 inactive_marginal_groups[i] = True
                 limits_marginal_groups[i] = self.penalty.weights[g]
 
-        inactive_marginal_groups = inactive_marginal_groups
-        limits_marginal_groups = limits_marginal_groups
-
         opt_linear, opt_offset = self.opt_transform
 
         new_linear = np.zeros((opt_linear.shape[0], (self._active_groups.sum() +
@@ -579,8 +577,23 @@ def new_log_density(query,
                                             self.opt_transform[0],
                                             self.randomization._log_density)
 
-        new_projection = lambda opt: opt # this is wrong, but I am running a smoke test first
-
+        new_groups = self.penalty.groups[moving_inactive_groups]
+        _sqrt_scaling = np.sqrt(self.scaling)
+        new_weights = dict([(g, self.penalty.weights[g] / _sqrt_scaling) for g in self.penalty.weights.keys() if g in np.unique(new_groups)])
+        new_group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, bound=1.)
+
+        def new_projection(group_lasso_dual,
+                           noverall,
+                           opt_state):
+            new_state = opt_state.copy()
+            new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
+            new_state[noverall:] = group_lasso_dual.bound_prox(opt_state[noverall:])
+            return new_state
+
+        new_projection = functools.partial(new_projection,
+                                           new_group_lasso_dual,
+                                           self._overall.sum())
+                                           
         new_selection_variable = copy(self.selection_variable)
         new_selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
 
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index 9d4517396..db8e99d04 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -27,7 +27,7 @@ def test_lasso_constructors(ndraw=1000, burnin=200):
                                                             [False, True]):
 
         inst, const = const_info
-        X, Y = inst(n=100, p=120, signal=0.1, s=3)[:2]
+        X, Y = inst(n=100, p=120, signal=1, s=10)[:2]
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 20

From fa7e79b75344eb444070672daadb126ba4044e2c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 23 Sep 2017 07:44:16 -0700
Subject: [PATCH 250/617] removing references to target from query -- have not
 removed target module yet

---
 selection/randomized/convenience.py           |   3 +-
 selection/randomized/glm.py                   | 112 ---
 selection/randomized/query.py                 |  87 ---
 selection/randomized/target.py                | 647 ------------------
 .../randomized/tests/test_convenience.py      |   1 -
 .../tests/test_opt_weighted_intervals.py      |   2 +-
 6 files changed, 2 insertions(+), 850 deletions(-)
 delete mode 100644 selection/randomized/target.py

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index d5def8e9e..8167e3f70 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -7,8 +7,7 @@
 import numpy as np
 import regreg.api as rr
 
-from .glm import (target as glm_target, 
-                  glm_group_lasso,
+from .glm import (glm_group_lasso,
                   glm_group_lasso_parametric,
                   glm_greedy_step,
                   glm_threshold_score,
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 862024663..b814d03c4 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -391,118 +391,6 @@ def _parametric_cov_glm(glm_loss,
     Sigma_full = np.dot(mat, np.dot(_W, mat.T))
     return Sigma_full
 
-def target(loss, 
-           active, 
-           queries,
-           subset=None, 
-           bootstrap=False,
-           solve_args={'min_its':50, 'tol':1.e-10},
-           reference=None,
-           parametric=False):
-    """
-    Form target from self.loss
-    restricting to active variables.
-
-    If subset is not None, then target returns
-    only those coordinates of the active
-    variables. 
-
-    Parameters
-    ----------
-
-    query : `query`
-       A query with a glm loss.
-
-    active : np.bool
-       Indicators of active variables.
-
-    queries : `multiple_queries`
-       Sampler returned for this queries.
-
-    subset : np.bool
-       Indicator of subset of variables
-       to be returned. Includes both
-       active and inactive variables.
-
-    bootstrap : bool
-       If True, sampler returned uses bootstrap
-       otherwise uses a plugin CLT.
-
-    reference : np.float (optional)
-       Optional reference parameter. Defaults
-       to the observed reference parameter. 
-       Must have shape `active.sum()`.
-
-    solve_args : dict
-       Args used to solve restricted M estimator.
-
-    Returns
-    -------
-
-    target_sampler : `targeted_sampler`
-
-    """
-
-    unpenalized_mle = restricted_Mest(loss, active, solve_args=solve_args)
-    X, Y = loss.data
-    n, _ = X.shape
-
-    # workout which inactive ones to return
-
-    if subset is None:
-        subset = active
-
-    active_subset = (active * subset)[active]
-    nactive = active.sum()
-    nactive_subset = active_subset.sum()
-    inactive = ~active * subset
-
-    boot_target, boot_target_observed = pairs_bootstrap_glm(loss, active, inactive=inactive)
-
-    def _subsetter(value):
-        if nactive_subset > 0:        
-            return np.hstack([value[active_subset], value[nactive:]])
-        else:
-            return value[nactive:]
-
-    def _target(indices):
-        return _subsetter(boot_target(indices))
-    target_observed = _subsetter(boot_target_observed)
-
-    if parametric==False:
-        form_covariances = glm_nonparametric_bootstrap(n, n)
-    else:
-        form_covariances = glm_parametric_covariance(loss)
-
-    queries.setup_sampler(form_covariances)
-    queries.setup_opt_state()
-
-    if reference is None:
-        reference = target_observed
-
-    if parametric:
-        linear_func = np.identity(target_observed.shape[0])
-        _target = (active,linear_func)
-
-    if bootstrap:
-        alpha_mat = set_alpha_matrix(loss, active, inactive=inactive)
-        alpha_subset = np.ones(alpha_mat.shape[0], np.bool)
-        alpha_subset[:nactive] = active_subset
-        alpha_mat = alpha_mat[alpha_subset]
-
-        target_sampler = queries.setup_bootstrapped_target(_target,
-                                                           target_observed,
-                                                           alpha_mat,
-                                                           reference=reference)
-    else:
-
-        target_sampler = queries.setup_target(_target,
-                                              target_observed,
-                                              reference=reference,
-                                              parametric=parametric)
-
-    return target_sampler, target_observed
-
 #### Subclasses of different randomized views
 
 class glm_group_lasso(M_estimator):
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 4838410e4..965744d63 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -8,11 +8,8 @@
 
 from ..distributions.api import discrete_family, intervals_from_sample
 from ..sampling.langevin import projected_langevin
-from .target import (targeted_sampler,
-                     bootstrapped_target_sampler)
 from .reconstruction import reconstruct_full_from_internal
 
-
 class query(object):
 
     def __init__(self, randomization):
@@ -171,90 +168,6 @@ def setup_sampler(self, form_covariances):
             curr_randomization_length = curr_randomization_length + randomization_length
         self.total_randomization_length = curr_randomization_length
 
-    def setup_opt_state(self):
-        self.num_opt_var = 0
-        self.opt_slice = []
-
-        for objective in self.objectives:
-            self.opt_slice.append(slice(self.num_opt_var, self.num_opt_var + objective.num_opt_var))
-            self.num_opt_var += objective.num_opt_var
-        self.observed_opt_state = np.zeros(self.num_opt_var)
-        for i in range(len(self.objectives)):
-            if self.objectives[i].num_opt_var > 0:
-                self.observed_opt_state[self.opt_slice[i]] = self.objectives[i].observed_opt_state
-
-    def setup_target(self,
-                     target_info,
-                     observed_target_state,
-                     reference=None,
-                     target_set=None,
-                     parametric=False):
-        '''
-        Parameters
-        ----------
-        target_info : object
-           Passed as first argument to `self.form_covariances`.
-
-        observed_target_state : np.float
-           Observed value of the target estimator.
-
-        reference : np.float (optional)
-           Reference parameter for Gaussian approximation
-           of target.
-
-        target_set : sequence (optional)
-           Which coordinates of target are really
-           of interest. If not None, then coordinates
-           not in target_set are assumed to have 0
-           mean in the sampler.
-
-        Notes
-        -----
-
-        The variable `target_set` can be used for
-        a selected model test when some functionals
-        are assumed to have 0 mean in the limiting
-        Gaussian approximation. This can
-        sometimes mean an increase in power.
-
-        Returns
-        -------
-
-        target : targeted_sampler
-            An instance of `targeted_sampler` that
-            can be used to sample, test hypotheses,
-            form intervals.
-        '''
-
-        self.setup_opt_state()
-
-        return targeted_sampler(self,
-                                target_info,
-                                observed_target_state,
-                                self.form_covariances,
-                                target_set=target_set,
-                                reference=reference,
-                                parametric=parametric)
-
-    def setup_bootstrapped_target(self,
-                                  target_bootstrap,
-                                  observed_target_state,
-                                  target_alpha,
-                                  target_set=None,
-                                  reference=None,
-                                  boot_size=None):
-
-        self.setup_opt_state()
-
-        return bootstrapped_target_sampler(self,
-                                           target_bootstrap,
-                                           observed_target_state,
-                                           target_alpha,
-                                           target_set=target_set,
-                                           reference=reference,
-                                           boot_size=boot_size)
-
-
 class optimization_sampler(object):
 
     '''
diff --git a/selection/randomized/target.py b/selection/randomized/target.py
deleted file mode 100644
index a5f68bbf5..000000000
--- a/selection/randomized/target.py
+++ /dev/null
@@ -1,647 +0,0 @@
-from itertools import product
-import numpy as np
-
-from regreg.affine import power_L
-
-from ..distributions.api import discrete_family, intervals_from_sample
-from ..sampling.langevin import projected_langevin
-from .reconstruction import reconstruct_full_from_data, reconstruct_internal
-
-class targeted_sampler(object):
-
-    '''
-    Object to sample from target of a selective sampler.
-    '''
-
-    def __init__(self,
-                 multi_view,
-                 target_info,
-                 observed_target_state,
-                 form_covariances,
-                 reference=None,
-                 target_set=None,
-                 parametric=False):
-
-        '''
-        Parameters
-        ----------
-
-        multi_view : `multiple_queries`
-           Instance of `multiple_queries`. Attributes
-           `objectives`, `score_info` are key
-           attributed. (Should maybe change constructor
-           to reflect only what is needed.)
-
-        target_info : object
-           Passed as first argument to `self.form_covariances`.
-
-        observed_target_state : np.float
-           Observed value of the target estimator.
-
-        form_covariances : callable
-           Used in linear decomposition of each score
-           and the target.
-
-        reference : np.float (optional)
-           Reference parameter for Gaussian approximation
-           of target.
-
-        target_set : sequence (optional)
-           Which coordinates of target are really
-           of interest. If not None, then coordinates
-           not in target_set are assumed to have 0
-           mean in the sampler.
-
-        parametric : bool
-           Use parametric covariance estimate?
-
-        Notes
-        -----
-        The callable `form_covariances`
-        should accept `target_info` as first argument
-        and a keyword argument `cross_terms` which
-        correspond to the `score_info` of each
-        objective of `multi_view`. This used in
-        a linear decomposition of each score into
-        a piece correlated with `target` and
-        an independent piece.
-        The independent piece is treated as a
-        nuisance parameter and conditioned on
-        (i.e. is fixed within the sampler).
-        '''
-
-        # sampler will draw samples for bootstrap
-        # these are arguments to target_info and score_bootstrap
-        # nonparamteric bootstrap is np.random.choice(n, size=(n,), replace=True)
-        # residual bootstrap might be X_E.dot(\bar{\beta}_E)
-        # + np.random.choice(resid, size=(n,), replace=True)
-
-        # if target_set is not None, we assume that
-        # these coordinates (specified by a list of coordinates) of target
-        # is assumed to be independent of the rest
-        # the corresponding block of `target_cov` is zeroed out
-
-        # we need these attributes of multi_view
-
-        self.nqueries = len(multi_view.objectives)
-        self.opt_slice = multi_view.opt_slice
-        self.objectives = multi_view.objectives
-
-        self.observed_target_state = observed_target_state
-        self.shape = observed_target_state.shape
-
-        self.total_randomization_length = multi_view.total_randomization_length
-        self.randomization_slice = multi_view.randomization_slice
-
-        self.score_cov = []
-        target_cov_sum = 0
-        for i in range(self.nqueries):
-            if parametric == False:
-                target_cov, cross_cov = multi_view.form_covariances(target_info,  
-                                                                    cross_terms=[multi_view.score_info[i]],
-                                                                    nsample=multi_view.nboot[i])
-            else:
-                target_cov, cross_cov = multi_view.form_covariances(target_info, 
-                                                                    cross_terms=[multi_view.score_info[i]])
-
-            target_cov_sum += target_cov
-            self.score_cov.append(cross_cov)
-
-        self.target_cov = target_cov_sum / self.nqueries
-
-        # XXX we're not really using this target_set in our tests
-
-        # zero out some coordinates of target_cov
-        # to enforce independence of target and null statistics
-
-        if target_set is not None:
-            null_set = set(range(self.target_cov.shape[0])).difference(target_set)
-            for t, n in product(target_set, null_set):
-                self.target_cov[t, n] = 0.
-                self.target_cov[n, t] = 0.
-
-        self.target_transform = []
-
-        for i in range(self.nqueries):
-            self.target_transform.append(
-                self.objectives[i].linear_decomposition(self.score_cov[i],
-                                                        self.target_cov,
-                                                        self.observed_target_state))
-
-        self.target_cov = np.atleast_2d(self.target_cov)
-        self.target_inv_cov = np.linalg.inv(self.target_cov)
-
-        # size of reference? should it only be target_set?
-
-        if reference is None:
-            reference = np.zeros(self.target_inv_cov.shape[0])
-        self.reference = reference
-
-        # need to vectorize the state for Langevin
-
-        self.overall_opt_slice = slice(0, multi_view.num_opt_var)
-        self.target_slice = slice(multi_view.num_opt_var,
-                                  multi_view.num_opt_var + self._reference_inv.shape[0])
-        self.keep_slice = self.target_slice
-
-        # set the observed state
-
-        self.observed_state = np.zeros(multi_view.num_opt_var + self._reference_inv.shape[0])
-        self.observed_state[self.target_slice] = self.observed_target_state
-        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
-
-        # added for the reconstruction map in case we marginalize over optimization variables
-
-        randomization_length_total = 0
-        self.randomization_slice = []
-        for i in range(self.nqueries):
-            self.randomization_slice.append(
-                slice(randomization_length_total, randomization_length_total + self.objectives[i].ndim))
-            randomization_length_total += self.objectives[i].ndim
-
-        self.randomization_length_total = randomization_length_total
-
-    def set_reference(self, reference):
-        self._reference = np.atleast_1d(reference)
-        self._reference_inv = self.target_inv_cov.dot(self.reference).flatten()
-
-    def get_reference(self):
-        return self._reference
-
-    reference = property(get_reference, set_reference)
-
-    def projection(self, state):
-        '''
-        Projection map of projected Langevin sampler.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Typically, the projection will only act on
-           `opt_vars`.
-        Returns
-        -------
-        projected_state : np.float
-        '''
-
-        opt_state = state[self.overall_opt_slice]
-        new_opt_state = np.zeros_like(opt_state)
-        for i in range(self.nqueries):
-            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
-        state[self.overall_opt_slice] = new_opt_state
-        return state
-
-    def gradient(self, state):
-        '''
-        Gradient of log-density at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        target_state, opt_state = state[self.target_slice], state[self.overall_opt_slice]
-        target_grad, opt_grad = np.zeros_like(target_state), np.zeros_like(opt_state)
-        full_grad = np.zeros_like(state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        for i in range(self.nqueries):
-
-            randomization_state = reconstruct_full_from_data(self.objectives[i].opt_transform,
-                                                             self.objectives[i].score_transform,
-                                                             target_state, 
-                                                             self.target_transform[i], 
-                                                             opt_state[self.opt_slice[i]])
-
-            internal_state = reconstruct_internal(target_state, self.target_transform[i])
-            grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]]) 
-            target_linear, target_offset = self.target_transform[i]
-            opt_linear, opt_offset = self.objectives[i].opt_transform
-            if target_linear is not None:
-                target_grad += target_linear.T.dot(grad)
-            if opt_linear is not None:
-                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
-
-        target_grad = -target_grad
-        target_grad += self._reference_inv - self.target_inv_cov.dot(target_state)
-        full_grad[self.target_slice] = target_grad
-        full_grad[self.overall_opt_slice] = -opt_grad
-
-        return full_grad
-
-
-    def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
-        '''
-        Sample `target` from selective density
-        using projected Langevin sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-
-        Parameters
-        ----------
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        keep_opt : bool
-           Should we return optimization variables
-           as well as the target?
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if stepsize is None:
-            stepsize = 1. / self.crude_lipschitz()
-
-        if keep_opt:
-            keep_slice = slice(None, None, None)
-        else:
-            keep_slice = self.keep_slice
-
-        target_langevin = projected_langevin(self.observed_state.copy(),
-                                             self.gradient,
-                                             self.projection,
-                                             stepsize)
-
-        samples = []
-
-        for i in range(ndraw + burnin):
-            target_langevin.next()
-            if (i >= burnin):
-                samples.append(target_langevin.state[keep_slice].copy())
-        return np.asarray(samples)
-
-    def hypothesis_test(self,
-                        test_stat,
-                        observed_value,
-                        ndraw=10000,
-                        burnin=2000,
-                        stepsize=None,
-                        sample=None,
-                        parameter=None,
-                        alternative='twosided'):
-
-        '''
-        Sample `target` from selective density
-        using projected Langevin sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-        Parameters
-        ----------
-        test_stat : callable
-           Test statistic to evaluate on sample from
-           selective distribution.
-        observed_value : float
-           Observed value of test statistic.
-           Used in p-value calculation.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc. If not None,
-           `ndraw, burnin, stepsize` are ignored.
-        parameter : np.float (optional)
-           If not None, defaults to `self.reference`.
-           Otherwise, sample is reweighted using Gaussian tilting.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        if parameter is None:
-            parameter = self.reference
-
-        sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
-
-
-        delta = self.target_inv_cov.dot(parameter - self.reference)
-        W = np.exp(sample.dot(delta))
-
-        family = discrete_family(sample_test_stat, W)
-        pval = family.cdf(0, observed_value)
-
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * min(pval, 1 - pval)
-
-    def confidence_intervals(self,
-                             observed,
-                             ndraw=10000,
-                             burnin=2000,
-                             stepsize=None,
-                             sample=None,
-                             level=0.9):
-        '''
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        level : float (optional)
-            Specify the
-            confidence level.
-        Notes
-        -----
-        Construct selective confidence intervals
-        for each parameter of the target.
-        Returns
-        -------
-        intervals : [(float, float)]
-            List of confidence intervals.
-        '''
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
-
-        return intervals_instance.confidence_intervals_all(level=level)
-
-    def coefficient_pvalues(self,
-                            observed,
-                            parameter=None,
-                            ndraw=10000,
-                            burnin=2000,
-                            stepsize=None,
-                            sample=None,
-                            alternative='twosided'):
-        '''
-        Construct selective p-values
-        for each parameter of the target.
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        parameter : np.float (optional)
-            A vector of parameters with shape `self.shape`
-            at which to evaluate p-values. Defaults
-            to `np.zeros(self.shape)`.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalues : np.float
-
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        if parameter is None:
-            parameter = np.zeros(self.shape)
-
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
-
-        pval = intervals_instance.pivots_all(parameter)
-
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * np.minimum(pval, 1 - pval)
-
-    def crude_lipschitz(self):
-        """
-        A crude Lipschitz constant for the
-        gradient of the log-density.
-        Returns
-        -------
-        lipschitz : float
-
-        """
-        lipschitz = power_L(self.target_inv_cov)
-        for transform, objective in zip(self.target_transform, self.objectives):
-            lipschitz += power_L(transform[0])**2 * objective.randomization.lipschitz
-            lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
-        return lipschitz
-
-
-    def reconstruct(self, state):
-        '''
-        Reconstruction of randomization at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be array with each row a state.
-        Returns
-        -------
-        reconstructed : np.float
-           Has shape of `opt_vars` with same number of rows
-           as `state`.
-
-        '''
-
-        state = np.atleast_2d(state)
-        if len(state.shape) > 2:
-            raise ValueError('expecting at most 2-dimensional array')
-
-        target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
-        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
-
-        for i in range(self.nqueries):
-            reconstructed[:, self.randomization_slice[i]] = reconstruct_full_from_data(self.objectives[i].opt_transform,
-                                                                                       self.objectives[i].score_transform,
-                                                                                       target_state,
-                                                                                       self.target_transform[i],
-                                                                                       opt_state[:, self.opt_slice[i]])
-
-        return np.squeeze(reconstructed)
-
-    def log_density(self, state):
-        '''
-        Log of randomization density at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be two-dimensional with each row a state.
-        Returns
-        -------
-        density : np.float
-            Has number of rows as `state` if 2-dimensional.
-        '''
-
-        reconstructed = self.reconstruct(state)
-        value = np.zeros(reconstructed.shape[0])
-
-        for i in range(self.nqueries):
-            log_dens = self.objectives[i].randomization.log_density
-            value += log_dens(reconstructed[:,self.opt_slice[i]])
-        return np.squeeze(value)
-
-class bootstrapped_target_sampler(targeted_sampler):
-
-    # make one of these for each hypothesis test
-
-    def __init__(self,
-                 multi_view,
-                 target_info,
-                 observed_target_state,
-                 target_alpha,
-                 target_set=None,
-                 reference=None,
-                 boot_size=None):
-
-        # sampler will draw bootstrapped weights for the target
-
-        if boot_size is None:
-            boot_size = target_alpha.shape[1]
-
-        targeted_sampler.__init__(self, multi_view,
-                                  target_info,
-                                  observed_target_state,
-                                  target_set,
-                                  reference)
-        # for bootstrap
-
-        self.boot_size = boot_size
-        self.target_alpha = target_alpha
-        self.boot_transform = []
-
-        for i in range(self.nqueries):
-            composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
-                                                                                                  self.target_cov,
-                                                                                                  self.observed_target_state)
-            boot_linear_part = np.dot(composition_linear_part, target_alpha)
-            boot_offset = composition_offset + np.dot(composition_linear_part, self.reference).flatten()
-            self.boot_transform.append((boot_linear_part, boot_offset))
-
-        # set the observed state for bootstrap
-
-        self.boot_slice = slice(multi_view.num_opt_var, multi_view.num_opt_var + self.boot_size)
-        self.observed_state = np.zeros(multi_view.num_opt_var + self.boot_size)
-        self.observed_state[self.boot_slice] = np.ones(self.boot_size)
-        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
-
-
-    def gradient(self, state):
-
-        boot_state, opt_state = state[self.boot_slice], state[self.overall_opt_slice]
-        boot_grad, opt_grad = np.zeros_like(boot_state), np.zeros_like(opt_state)
-        full_grad = np.zeros_like(state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        for i in range(self.nqueries):
-
-            randomization_state = reconstruct_full_from_data(self.objectives[i].opt_transform,
-                                                             self.objectives[i].score_transform,
-                                                             boot_state, 
-                                                             self.boot_transform[i], 
-                                                             opt_state[self.opt_slice[i]])
-
-            internal_state = reconstruct_internal(boot_state, self.boot_transform[i])
-            grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]])
-            boot_linear, boot_offset = self.boot_transform[i]
-            opt_linear, opt_offset = self.objectives[i].opt_transform
-            if boot_linear is not None:
-                boot_grad += boot_linear.T.dot(grad)
-            if opt_linear is not None:
-                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
-
-        boot_grad = -boot_grad
-        boot_grad -= boot_state
-
-        full_grad[self.boot_slice] = boot_grad
-        full_grad[self.overall_opt_slice] = -opt_grad
-
-        return full_grad
-
-    def sample(self, ndraw, burnin, stepsize = None, keep_opt=False):
-        if stepsize is None:
-            stepsize = 1. / self.observed_state.shape[0]
-
-        bootstrap_langevin = projected_langevin(self.observed_state.copy(),
-                                                self.gradient,
-                                                self.projection,
-                                                stepsize)
-        if keep_opt:
-            boot_slice = slice(None, None, None)
-        else:
-            boot_slice = self.boot_slice
-
-        samples = []
-        for i in range(ndraw + burnin):
-            bootstrap_langevin.next()
-            if (i >= burnin):
-                samples.append(bootstrap_langevin.state[boot_slice].copy())
-        samples = np.asarray(samples)
-
-        if keep_opt:
-            target_samples = samples[:,self.boot_slice].dot(self.target_alpha.T) + self.reference[None, :]
-            opt_sample0 = samples[0,self.overall_opt_slice]
-            result = np.zeros((samples.shape[0], opt_sample0.shape[0] + target_samples.shape[1]))
-            result[:,self.overall_opt_slice] = samples[:,self.overall_opt_slice]
-            result[:,self.target_slice] = target_samples
-            return result
-        else:
-            target_samples = samples.dot(self.target_alpha.T) + self.reference[None, :]
-            return target_samples
diff --git a/selection/randomized/tests/test_convenience.py b/selection/randomized/tests/test_convenience.py
index db8e99d04..63ed633c5 100644
--- a/selection/randomized/tests/test_convenience.py
+++ b/selection/randomized/tests/test_convenience.py
@@ -3,7 +3,6 @@
 import nose.tools as nt
 
 from ..convenience import lasso, step, threshold
-from ..glm import target as glm_target
 from ...tests.instance import (gaussian_instance,
                                logistic_instance,
                                poisson_instance)
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 51fc02376..9ff57adce 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -12,7 +12,7 @@
 import matplotlib.pyplot as plt
 
 from scipy.stats import t as tdist
-from ..glm import target as glm_target, glm_nonparametric_bootstrap, pairs_bootstrap_glm
+from ..glm import glm_nonparametric_bootstrap, pairs_bootstrap_glm
 from ..M_estimator import restricted_Mest
 
 @set_seed_iftrue(False, 200)

From 378a0e518d97ad4861195680c66682762f725de8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 23 Sep 2017 07:53:50 -0700
Subject: [PATCH 251/617] removing target_sampler tests from test_Mest.py

---
 selection/randomized/tests/test_Mest.py | 260 +-----------------------
 1 file changed, 10 insertions(+), 250 deletions(-)

diff --git a/selection/randomized/tests/test_Mest.py b/selection/randomized/tests/test_Mest.py
index 8e79da624..62e3929f5 100644
--- a/selection/randomized/tests/test_Mest.py
+++ b/selection/randomized/tests/test_Mest.py
@@ -93,10 +93,12 @@ def target_gradient(state):
             opt_state1 = state[opt_slice1]
             opt_state2 = state[opt_slice2]
             opt_linear1 = M_est1.opt_transform[0]
-            arg1 = reconstruct_internal(target, (A1, b1)); grad1 = M_est1.grad_log_density(arg1, opt_state1)
+            arg1 = reconstruct_internal(target, (A1, b1))
+            grad1 = M_est1.sampler.gradient(opt_state1)
 
             opt_linear2 = M_est2.opt_transform[0]
-            arg2 = reconstruct_internal(target, (A2, b2)); grad2 = M_est2.grad_log_density(arg2, opt_state2)
+            arg2 = reconstruct_internal(target, (A2, b2))
+            grad2 = M_est2.sampler.gradient(opt_state2)
 
             full_grad = np.zeros_like(state)
             full_grad[opt_slice1] = -opt_linear1.T.dot(grad1)
@@ -108,9 +110,9 @@ def target_gradient(state):
 
         def target_projection(state):
             opt_state1 = state[opt_slice1]
-            state[opt_slice1] = M_est1.projection(opt_state1)
+            state[opt_slice1] = M_est1.sampler.projection(opt_state1)
             opt_state2 = state[opt_slice2]
-            state[opt_slice2] = M_est2.projection(opt_state2)
+            state[opt_slice2] = M_est2.sampler.projection(opt_state2)
             return state
 
         target_langevin = projected_langevin(initial_state,
@@ -205,9 +207,9 @@ def target_gradient(state):
             target = state[target_slice]
             opt_state1 = state[opt_slice1]
 
-
             opt_linear1 = M_est1.opt_transform[0]
-            arg1 = reconstruct_internal(target, (A1, b1)); grad1 = M_est1.grad_log_density(arg1, opt_state1)
+            arg1 = reconstruct_internal(target, (A1, b1))
+            grad1 = M_est1.sampler.gradient(opt_state1)
 
             full_grad = np.zeros_like(state)
             full_grad[opt_slice1] = -opt_linear1.T.dot(grad1)
@@ -218,7 +220,7 @@ def target_gradient(state):
 
         def target_projection(state):
             opt_state1 = state[opt_slice1]
-            state[opt_slice1] = M_est1.projection(opt_state1)
+            state[opt_slice1] = M_est1.sampler.projection(opt_state1)
             return state
 
         target_langevin = projected_langevin(initial_state,
@@ -250,254 +252,12 @@ def target_projection(state):
         print('naive Z', naive_Z, naive_pval)
         return pval, naive_pval, False
 
-@register_report(['pvalue', 'active'])
-@wait_for_return_value()
-def test_logistic_selected_inactive_coordinate():
-    s, n, p = 5, 200, 20 
-
-    randomizer = randomization.laplace((p,), scale=1.)
-    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, signal=14)
-
-    nonzero = np.where(beta)[0]
-    lam_frac = 1.
-
-    loss = rr.glm.logistic(X, y)
-    epsilon = 1.
-
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
-    W = np.ones(p)*lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    print(lam)
-    # our randomization
-
-    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
-
-    mv = multiple_queries([M_est1])
-    mv.solve()
-
-    active = M_est1.selection_variable['variables']
-    nactive = active.sum()
-    scaling = np.linalg.svd(X)[1].max()**2
-
-    form_covariances = glm_nonparametric_bootstrap(n, n)
-
-    if set(nonzero).issubset(np.nonzero(active)[0]):
-
-        active_set = np.nonzero(active)[0]
-        inactive_selected = I = [i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero]
-        if not I:
-            return None
-
-        idx = I[0]
-        inactive = ~M_est1.selection_variable['variables']
-        boot_target, target_observed = pairs_bootstrap_glm(loss, active, inactive=inactive)
-
-        def null_target(indices):
-            result = boot_target(indices)
-            return np.hstack([result[idx], result[nactive:]])
-
-        null_observed = np.zeros(inactive.sum() + 1)
-        null_observed[0] = target_observed[idx]
-        null_observed[1:] = target_observed[nactive:]
-
-        # the null_observed[1:] is only used as a
-        # starting point for chain -- could be 0
-
-        mv.setup_sampler(form_covariances)
-        target_sampler = mv.setup_target(null_target, null_observed, target_set=[0])
-
-        test_stat = lambda x: x[0]
-        print(null_observed)
-        pval = target_sampler.hypothesis_test(test_stat, test_stat(null_observed), burnin=1000, ndraw=1000) # twosided by default
-
-        return pval, False
-
-@register_report(['pvalue', 'active'])
-@wait_for_return_value()
-def test_logistic_saturated_inactive_coordinate():
-    s, n, p = 5, 200, 20 
-
-    randomizer = randomization.laplace((p,), scale=1.)
-    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, signal=14)
-
-    nonzero = np.where(beta)[0]
-    lam_frac = 1.
-
-    loss = rr.glm.logistic(X, y)
-    epsilon = 1.
-
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
-    W = np.ones(p)*lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    print(lam)
-    # our randomization
-
-    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
-
-    mv = multiple_queries([M_est1])
-    mv.solve()
-
-    active = M_est1.selection_variable['variables']
-    nactive = active.sum()
-    if set(nonzero).issubset(np.nonzero(active)[0]):
-
-        active_set = np.nonzero(active)[0]
-        inactive_selected = I = [i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero]
-
-        if not I:
-            return None
-        idx = I[0]
-        inactive = ~M_est1.selection_variable['variables']
-        boot_target, target_observed = pairs_bootstrap_glm(loss, active, inactive=inactive)
-
-        def null_target(indices):
-            result = boot_target(indices)
-            return result[idx]
-
-        null_observed = np.zeros(1)
-        null_observed[0] = target_observed[idx]
-
-        # the null_observed[1:] is only used as a
-        # starting point for chain -- could be 0
-        # null_observed[1:] = target_observed[nactive:]
-
-        form_covariances = glm_nonparametric_bootstrap(n, n)
-        mv.setup_sampler(form_covariances)
-        target_sampler = mv.setup_target(null_target, null_observed)
-
-        test_stat = lambda x: x[0]
-        pval = target_sampler.hypothesis_test(test_stat, test_stat(null_observed), burnin=10000, ndraw=10000) # twosided by default
-        return pval, False
-
-@register_report(['pvalue', 'active'])
-@wait_for_return_value()
-def test_logistic_selected_active_coordinate():
-    s, n, p = 5, 200, 20 
-
-    randomizer = randomization.laplace((p,), scale=1.)
-    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, signal=14)
-
-    nonzero = np.where(beta)[0]
-    lam_frac = 1.
-
-    loss = rr.glm.logistic(X, y)
-    epsilon = 1.
-
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
-    W = np.ones(p)*lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    print(lam)
-    # our randomization
-
-    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
-
-    mv = multiple_queries([M_est1])
-    mv.solve()
-
-    active = M_est1.selection_variable['variables']
-    nactive = active.sum()
-    if set(nonzero).issubset(np.nonzero(active)[0]):
-
-        active_set = np.nonzero(active)[0]
-        inactive_selected = I = [i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero]
-        active_selected = A = [i for i in np.arange(active_set.shape[0]) if active_set[i] in nonzero]
-
-        idx = A[0]
-        inactive = ~M_est1.selection_variable['variables']
-        boot_target, target_observed = pairs_bootstrap_glm(loss, active, inactive=inactive)
-
-        def active_target(indices):
-            result = boot_target(indices)
-            return np.hstack([result[idx], result[nactive:]])
-
-        active_observed = np.zeros(inactive.sum() + 1)
-        active_observed[0] = target_observed[idx]
-        active_observed[1:] = target_observed[nactive:]
-
-        # the active_observed[1:] is only used as a
-        # starting point for chain -- could be 0
-        # active_observed[1:] = target_observed[nactive:]
-
-        form_covariances = glm_nonparametric_bootstrap(n, n)
-        mv.setup_sampler(form_covariances)
-        target_sampler = mv.setup_target(active_target, active_observed, target_set=[0])
-        test_stat = lambda x: x[0]
-        pval = target_sampler.hypothesis_test(test_stat, test_stat(active_observed), burnin=10000, ndraw=10000) # twosided by default
-        return pval, True
-
-@register_report(['pvalue', 'active'])
-@wait_for_return_value()
-def test_logistic_saturated_active_coordinate():
-    s, n, p = 5, 200, 20 
-
-    randomizer = randomization.laplace((p,), scale=1.)
-    X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=0.1, signal=14)
-
-    nonzero = np.where(beta)[0]
-    lam_frac = 1.
-
-    loss = rr.glm.logistic(X, y)
-    epsilon = 1.
-
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
-    W = np.ones(p)*lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    print(lam)
-    # our randomization
-
-    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
-
-    mv = multiple_queries([M_est1])
-    mv.solve()
-
-    active = M_est1.selection_variable['variables']
-    nactive = active.sum()
-    if set(nonzero).issubset(np.nonzero(active)[0]):
-
-        active_set = np.nonzero(active)[0]
-        inactive_selected = I = [i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero]
-        active_selected = A = [i for i in np.arange(active_set.shape[0]) if active_set[i] in nonzero]
-
-        idx = A[0]
-        inactive = ~M_est1.selection_variable['variables']
-        boot_target, target_observed = pairs_bootstrap_glm(loss, active, inactive=inactive)
-
-        def active_target(indices):
-            result = boot_target(indices)
-            return result[idx]
-
-        active_observed = np.zeros(1)
-        active_observed[0] = target_observed[idx]
-
-        # the active_observed[1:] is only used as a
-        # starting point for chain -- could be 0
-        # active_observed[1:] = target_observed[nactive:]
-
-        form_covariances = glm_nonparametric_bootstrap(n, n)
-
-        mv.setup_sampler(form_covariances)
-        target_sampler = mv.setup_target(active_target, active_observed)
-        test_stat = lambda x: x[0]
-        pval = target_sampler.hypothesis_test(test_stat, test_stat(active_observed), burnin=10000, ndraw=10000) # twosided by default
-        return pval, True
 
 def report(niter=50):
     
     # these are all our null tests
     fn_names = ['test_overall_null_two_queries',
-                'test_one_inactive_coordinate_handcoded',
-                'test_logistic_selected_inactive_coordinate',
-                'test_logistic_saturated_inactive_coordinate',
-                'test_logistic_selected_active_coordinate',
-                'test_logistic_saturated_active_coordinate']
+                'test_one_inactive_coordinate_handcoded']
 
     dfs = []
     for fn in fn_names:

From 37078e872a4ba3aa7d343d42d78ab897ade4a3d0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 23 Sep 2017 07:54:45 -0700
Subject: [PATCH 252/617] conditioning tested in test_convenience

---
 selection/randomized/tests/test_condition.py | 125 -------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 selection/randomized/tests/test_condition.py

diff --git a/selection/randomized/tests/test_condition.py b/selection/randomized/tests/test_condition.py
deleted file mode 100644
index de287d2c8..000000000
--- a/selection/randomized/tests/test_condition.py
+++ /dev/null
@@ -1,125 +0,0 @@
-from __future__ import print_function, division
-import numpy as np
-
-import regreg.api as rr
-import selection.tests.reports as reports
-
-
-from selection.tests.flags import SET_SEED, SMALL_SAMPLES
-from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.tests.decorators import (wait_for_return_value, 
-                                        set_seed_iftrue, 
-                                        set_sampling_params_iftrue,
-                                        register_report)
-import selection.tests.reports as reports
-
-from selection.api import (randomization, 
-                           glm_group_lasso, 
-                           pairs_bootstrap_glm, 
-                           multiple_queries, 
-                           discrete_family, 
-                           projected_langevin, 
-                           glm_group_lasso_parametric, 
-                           glm_target)
-
-from selection.randomized.glm import glm_parametric_covariance, glm_nonparametric_bootstrap, restricted_Mest, set_alpha_matrix
-
-@register_report(['truth', 'active'])
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
-@set_seed_iftrue(SET_SEED)
-@wait_for_return_value()
-def test_condition(s=0,
-                   n=100,
-                   p=200,
-                   rho=0.1,
-                   signal=10,
-                   lam_frac = 1.4,
-                   ndraw=10000, burnin=2000,
-                   loss='logistic',
-                   nviews=4,
-                   scalings=True):
-
-    if loss=="gaussian":
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=rho, signal=signal, sigma=1)
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-        loss = rr.glm.gaussian(X, y)
-    elif loss=="logistic":
-        X, y, beta, _ = logistic_instance(n=n, p=p, s=s, rho=rho, signal=signal)
-        loss = rr.glm.logistic(X, y)
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
-
-    randomizer = randomization.laplace((p,), scale=0.6)
-
-    epsilon = 1. / np.sqrt(n)
-
-    W = np.ones(p)*lam
-    W[0] = 0 # use at least some unpenalized
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    views = []
-    for i in range(nviews):
-        views.append(glm_group_lasso(loss, epsilon, penalty, randomizer))
-
-    queries = multiple_queries(views)
-    queries.solve()
-
-    active_union = np.zeros(p, np.bool)
-    for view in views:
-        active_union += view.selection_variable['variables']
-
-    nactive = np.sum(active_union)
-    print("nactive", nactive)
-
-    nonzero = np.where(beta)[0]
-
-    if set(nonzero).issubset(np.nonzero(active_union)[0]):
-        if nactive==s:
-            return None
-
-        if scalings: # try condition on some scalings
-            for i in range(nviews//2):
-                conditioning_groups = np.zeros(p, bool)
-                conditioning_groups[:p//2] = True
-                marginalizing_groups = np.ones(p, bool)
-                marginalizing_groups[:p//2] = False
-                views[i].decompose_subgradient(conditioning_groups=conditioning_groups,
-                                               marginalizing_groups=marginalizing_groups)
-                views[i].condition_on_scalings()
-        else:
-            for i in range(nviews):
-               views[i].decompose_subgradient(conditioning_groups=np.zeros(p, bool),
-                                               marginalizing_groups=np.ones(p, bool))
-
-        active_set = np.nonzero(active_union)[0]
-        target_sampler, target_observed = glm_target(loss,
-                                                     active_union,
-                                                     queries)
-
-        test_stat = lambda x: np.linalg.norm(x - beta[active_union])
-        observed_test_value = test_stat(target_observed)
-
-        pivots = target_sampler.hypothesis_test(test_stat,
-                                               observed_test_value,
-                                               alternative='twosided',
-                                               parameter = beta[active_union],
-                                               ndraw=ndraw,
-                                               burnin=burnin)
-
-        return [pivots], [False]
-
-def report(niter=50, **kwargs):
-
-    condition_report = reports.reports['test_condition']
-    runs = reports.collect_multiple_runs(condition_report['test'],
-                                         condition_report['columns'],
-                                         niter,
-                                         reports.summarize_all,
-                                         **kwargs)
-
-    fig = reports.pivot_plot_simple(runs)
-    fig.savefig('conditional_pivots.pdf')
-
-
-if __name__ == '__main__':
-    report()

From 11133bb32d42d1b30e0c7cc60e668d402922b08c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 23 Sep 2017 08:13:12 -0700
Subject: [PATCH 253/617] removing setup_sampler methods no longer really used

---
 selection/randomized/M_estimator.py | 32 ----------------------
 selection/randomized/api.py         |  3 +--
 selection/randomized/convenience.py |  3 ---
 selection/randomized/glm.py         |  4 +--
 selection/randomized/query.py       | 41 -----------------------------
 5 files changed, 2 insertions(+), 81 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 1ee877b64..1ebd12918 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -399,38 +399,6 @@ def derivative_logdet_jacobian(self, scalings):
         der[self.scaling_slice] = np.array([np.matrix.trace(jacobian_inv_blocks[i]) for i in range(scalings.shape[0])])
         return der
 
-    def setup_sampler(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}):
-        pass
-
-#     def projection(self, opt_state):
-#         """
-#         Full projection for Langevin.
-
-#         The state here will be only the state of the optimization variables.
-#         """
-
-#         if not self._setup:
-#             raise ValueError('setup_sampler should be called before using this function')
-
-#         if ('subgradient' not in self.selection_variable and 
-#             'scaling' not in self.selection_variable): # have not conditioned on any thing else
-#             new_state = opt_state.copy() # not really necessary to copy
-#             new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
-#             new_state[self.subgrad_slice] = self.group_lasso_dual.bound_prox(opt_state[self.subgrad_slice])
-#         elif ('subgradient' not in self.selection_variable and
-#               'scaling' in self.selection_variable): # conditioned on the initial scalings
-#                                                      # only the subgradient in opt_state
-#             new_state = self.group_lasso_dual.bound_prox(opt_state)
-#         elif ('subgradient' in self.selection_variable and
-#               'scaling' not in self.selection_variable): # conditioned on the subgradient
-#                                                          # only the scaling in opt_state
-#             new_state = np.maximum(opt_state, 0)
-#         else:
-#             new_state = opt_state
-#         return new_state
-
-#     # optional things to condition on
-
     def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=None):
         """
         ADD DOCSTRING
diff --git a/selection/randomized/api.py b/selection/randomized/api.py
index 1eea5850f..d9aaa8d8b 100644
--- a/selection/randomized/api.py
+++ b/selection/randomized/api.py
@@ -7,8 +7,7 @@
                   pairs_bootstrap_glm, 
                   pairs_inactive_score_glm,
                   glm_nonparametric_bootstrap,
-                  glm_parametric_covariance,
-                  target as glm_target)
+                  glm_parametric_covariance)
 
 from .randomization import randomization
 
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 8167e3f70..8358831d0 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -195,9 +195,6 @@ def summary(self,
         if null_value is None:
             null_value = np.zeros(self.loglike.shape[0])
 
-        #self._queries.setup_sampler(form_covariances=None)
-        #self._queries.setup_opt_state()
-
         unpenalized_mle = restricted_Mest(self.loglike, selected_features)
 
         if self.parametric_cov_estimator == False:
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index b814d03c4..07a76b89a 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -396,7 +396,6 @@ def _parametric_cov_glm(glm_loss,
 class glm_group_lasso(M_estimator):
 
     def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
-        M_estimator.setup_sampler(self, scaling=scaling, solve_args=solve_args)
 
         bootstrap_score = pairs_bootstrap_glm(self.loss,
                                               self.selection_variable['variables'],
@@ -423,7 +422,7 @@ class glm_group_lasso_parametric(M_estimator):
     # this setup_sampler returns only the active set
 
     def setup_sampler(self):
-        M_estimator.setup_sampler(self)
+
         return self.selection_variable['variables']
 
 
@@ -462,7 +461,6 @@ def __init__(self, X, Y, epsilon, penalty, randomization, solve_args={'min_its':
                              randomization, solve_args=solve_args)
 
     def setup_sampler(self):
-        M_estimator.setup_sampler(self)
 
         X, Y = self.loss.data
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 965744d63..fe3fea2b7 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -127,47 +127,6 @@ def solve(self):
             if not objective._solved:
                 objective.solve()
 
-    def setup_sampler(self, form_covariances):
-        '''
-        Parameters
-        ----------
-        form_covariances : callable
-           A callable used to decompose
-           target of inference and the score
-           of each objective.
-        Notes
-        -----
-        This function sets the initial
-        `opt_state` of all optimization
-        variables in each view.
-        We also store a reference to `form_covariances`
-        which is called in the
-        construction of `targeted_sampler`.
-        Returns
-        -------
-        None
-        '''
-
-        self.form_covariances = form_covariances
-
-        nqueries = self.nqueries = len(self.objectives)
-
-        self.score_info = []
-        self.nboot = []
-        for objective in self.objectives:
-            score_ = objective.setup_sampler()
-            self.score_info.append(score_)
-            self.nboot.append(objective.nboot)
-
-        curr_randomization_length = 0
-        self.randomization_slice = []
-        for objective in self.objectives:
-            randomization_length = objective.randomization.shape[0]
-            self.randomization_slice.append(slice(curr_randomization_length,
-                                                  curr_randomization_length + randomization_length))
-            curr_randomization_length = curr_randomization_length + randomization_length
-        self.total_randomization_length = curr_randomization_length
-
 class optimization_sampler(object):
 
     '''

From 45a785cd966957655fe6951c51724fb573df8c45 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 23 Sep 2017 08:24:00 -0700
Subject: [PATCH 254/617] small edit to target

---
 selection/randomized/target.py | 649 +++++++++++++++++++++++++++++++++
 1 file changed, 649 insertions(+)
 create mode 100644 selection/randomized/target.py

diff --git a/selection/randomized/target.py b/selection/randomized/target.py
new file mode 100644
index 000000000..776e9fcf3
--- /dev/null
+++ b/selection/randomized/target.py
@@ -0,0 +1,649 @@
+from itertools import product
+import numpy as np
+
+from regreg.affine import power_L
+
+from ..distributions.api import discrete_family, intervals_from_sample
+from ..sampling.langevin import projected_langevin
+from .reconstruction import reconstruct_full_from_data, reconstruct_internal
+
+class targeted_sampler(object):
+
+    '''
+    Object to sample from target of a selective sampler.
+    '''
+
+    def __init__(self,
+                 multi_view,
+                 target_info,
+                 observed_target_state,
+                 form_covariances,
+                 reference=None,
+                 target_set=None,
+                 parametric=False):
+
+        '''
+        Parameters
+        ----------
+
+        multi_view : `multiple_queries`
+           Instance of `multiple_queries`. Attributes
+           `objectives`, `score_info` are key
+           attributed. (Should maybe change constructor
+           to reflect only what is needed.)
+
+        target_info : object
+           Passed as first argument to `self.form_covariances`.
+
+        observed_target_state : np.float
+           Observed value of the target estimator.
+
+        form_covariances : callable
+           Used in linear decomposition of each score
+           and the target.
+
+        reference : np.float (optional)
+           Reference parameter for Gaussian approximation
+           of target.
+
+        target_set : sequence (optional)
+           Which coordinates of target are really
+           of interest. If not None, then coordinates
+           not in target_set are assumed to have 0
+           mean in the sampler.
+
+        parametric : bool
+           Use parametric covariance estimate?
+
+        Notes
+        -----
+        The callable `form_covariances`
+        should accept `target_info` as first argument
+        and a keyword argument `cross_terms` which
+        correspond to the `score_info` of each
+        objective of `multi_view`. This used in
+        a linear decomposition of each score into
+        a piece correlated with `target` and
+        an independent piece.
+        The independent piece is treated as a
+        nuisance parameter and conditioned on
+        (i.e. is fixed within the sampler).
+        '''
+
+        # sampler will draw samples for bootstrap
+        # these are arguments to target_info and score_bootstrap
+        # nonparamteric bootstrap is np.random.choice(n, size=(n,), replace=True)
+        # residual bootstrap might be X_E.dot(\bar{\beta}_E)
+        # + np.random.choice(resid, size=(n,), replace=True)
+
+        # if target_set is not None, we assume that
+        # these coordinates (specified by a list of coordinates) of target
+        # is assumed to be independent of the rest
+        # the corresponding block of `target_cov` is zeroed out
+
+        # we need these attributes of multi_view
+
+        self.nqueries = len(multi_view.objectives)
+        self.opt_slice = multi_view.opt_slice
+        self.objectives = multi_view.objectives
+
+        self.observed_target_state = observed_target_state
+        self.shape = observed_target_state.shape
+
+        self.total_randomization_length = multi_view.total_randomization_length
+        self.randomization_slice = multi_view.randomization_slice
+
+        self.score_cov = []
+        target_cov_sum = 0
+        for i in range(self.nqueries):
+            if parametric == False:
+                target_cov, cross_cov = multi_view.form_covariances(target_info,  
+                                                                    cross_terms=[multi_view.score_info[i]],
+                                                                    nsample=multi_view.nboot[i])
+            else:
+                target_cov, cross_cov = multi_view.form_covariances(target_info, 
+                                                                    cross_terms=[multi_view.score_info[i]])
+
+            target_cov_sum += target_cov
+            self.score_cov.append(cross_cov)
+
+        self.target_cov = target_cov_sum / self.nqueries
+
+        # XXX we're not really using this target_set in our tests
+
+        # zero out some coordinates of target_cov
+        # to enforce independence of target and null statistics
+
+        if target_set is not None:
+            null_set = set(range(self.target_cov.shape[0])).difference(target_set)
+            for t, n in product(target_set, null_set):
+                self.target_cov[t, n] = 0.
+                self.target_cov[n, t] = 0.
+
+        self.target_transform = []
+
+        for i in range(self.nqueries):
+            self.target_transform.append(
+                self.objectives[i].linear_decomposition(self.score_cov[i],
+                                                        self.target_cov,
+                                                        self.observed_target_state))
+
+        self.target_cov = np.atleast_2d(self.target_cov)
+        self.target_inv_cov = np.linalg.inv(self.target_cov)
+
+        # size of reference? should it only be target_set?
+
+        if reference is None:
+            reference = np.zeros(self.target_inv_cov.shape[0])
+        self.reference = reference
+
+        # need to vectorize the state for Langevin
+
+        self.overall_opt_slice = slice(0, multi_view.num_opt_var)
+        self.target_slice = slice(multi_view.num_opt_var,
+                                  multi_view.num_opt_var + self._reference_inv.shape[0])
+        self.keep_slice = self.target_slice
+
+        # set the observed state
+
+        self.observed_state = np.zeros(multi_view.num_opt_var + self._reference_inv.shape[0])
+        self.observed_state[self.target_slice] = self.observed_target_state
+        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
+
+        # added for the reconstruction map in case we marginalize over optimization variables
+
+        randomization_length_total = 0
+        self.randomization_slice = []
+        for i in range(self.nqueries):
+            self.randomization_slice.append(
+                slice(randomization_length_total, randomization_length_total + self.objectives[i].ndim))
+            randomization_length_total += self.objectives[i].ndim
+
+        self.randomization_length_total = randomization_length_total
+
+    def set_reference(self, reference):
+        self._reference = np.atleast_1d(reference)
+        self._reference_inv = self.target_inv_cov.dot(self.reference).flatten()
+
+    def get_reference(self):
+        return self._reference
+
+    reference = property(get_reference, set_reference)
+
+    def projection(self, state):
+        '''
+        Projection map of projected Langevin sampler.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Typically, the projection will only act on
+           `opt_vars`.
+        Returns
+        -------
+        projected_state : np.float
+        '''
+
+        opt_state = state[self.overall_opt_slice]
+        new_opt_state = np.zeros_like(opt_state)
+        for i in range(self.nqueries):
+            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
+        state[self.overall_opt_slice] = new_opt_state
+        return state
+
+    def gradient(self, state):
+        '''
+        Gradient of log-density at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        target_state, opt_state = state[self.target_slice], state[self.overall_opt_slice]
+        target_grad, opt_grad = np.zeros_like(target_state), np.zeros_like(opt_state)
+        full_grad = np.zeros_like(state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+
+            randomization_state = reconstruct_full_from_data(self.objectives[i].opt_transform,
+                                                             self.objectives[i].score_transform,
+                                                             target_state, 
+                                                             self.target_transform[i], 
+                                                             opt_state[self.opt_slice[i]])
+
+            internal_state = reconstruct_internal(target_state, self.target_transform[i])
+            grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]]) 
+            target_linear, target_offset = self.target_transform[i]
+            opt_linear, opt_offset = self.objectives[i].opt_transform
+            if target_linear is not None:
+                target_grad += target_linear.T.dot(grad)
+            if opt_linear is not None:
+                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
+
+        target_grad = -target_grad
+        target_grad += self._reference_inv - self.target_inv_cov.dot(target_state)
+        full_grad[self.target_slice] = target_grad
+        full_grad[self.overall_opt_slice] = -opt_grad
+
+        return full_grad
+
+
+    def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+
+        Parameters
+        ----------
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        keep_opt : bool
+           Should we return optimization variables
+           as well as the target?
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if stepsize is None:
+            stepsize = 1. / self.crude_lipschitz()
+
+        if keep_opt:
+            keep_slice = slice(None, None, None)
+        else:
+            keep_slice = self.keep_slice
+
+        target_langevin = projected_langevin(self.observed_state.copy(),
+                                             self.gradient,
+                                             self.projection,
+                                             stepsize)
+
+        samples = []
+
+        for i in range(ndraw + burnin):
+            target_langevin.next()
+            if (i >= burnin):
+                samples.append(target_langevin.state[keep_slice].copy())
+        return np.asarray(samples)
+
+    def hypothesis_test(self,
+                        test_stat,
+                        observed_value,
+                        ndraw=10000,
+                        burnin=2000,
+                        stepsize=None,
+                        sample=None,
+                        parameter=None,
+                        alternative='twosided'):
+
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+        Parameters
+        ----------
+        test_stat : callable
+           Test statistic to evaluate on sample from
+           selective distribution.
+        observed_value : float
+           Observed value of test statistic.
+           Used in p-value calculation.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc. If not None,
+           `ndraw, burnin, stepsize` are ignored.
+        parameter : np.float (optional)
+           If not None, defaults to `self.reference`.
+           Otherwise, sample is reweighted using Gaussian tilting.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        gradient : np.float
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        if parameter is None:
+            parameter = self.reference
+
+        sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
+
+
+        delta = self.target_inv_cov.dot(parameter - self.reference)
+        W = np.exp(sample.dot(delta))
+
+        family = discrete_family(sample_test_stat, W)
+        pval = family.cdf(0, observed_value)
+
+        if alternative == 'greater':
+            return 1 - pval
+        elif alternative == 'less':
+            return pval
+        else:
+            return 2 * min(pval, 1 - pval)
+
+    def confidence_intervals(self,
+                             observed,
+                             ndraw=10000,
+                             burnin=2000,
+                             stepsize=None,
+                             sample=None,
+                             level=0.9):
+        '''
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        level : float (optional)
+            Specify the
+            confidence level.
+        Notes
+        -----
+        Construct selective confidence intervals
+        for each parameter of the target.
+        Returns
+        -------
+        intervals : [(float, float)]
+            List of confidence intervals.
+        '''
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        nactive = observed.shape[0]
+        intervals_instance = intervals_from_sample(self.reference,
+                                                   sample,
+                                                   observed,
+                                                   self.target_cov)
+
+        return intervals_instance.confidence_intervals_all(level=level)
+
+    def coefficient_pvalues(self,
+                            observed,
+                            parameter=None,
+                            ndraw=10000,
+                            burnin=2000,
+                            stepsize=None,
+                            sample=None,
+                            alternative='twosided'):
+        '''
+        Construct selective p-values
+        for each parameter of the target.
+        Parameters
+        ----------
+        observed : np.float
+            A vector of parameters with shape `self.shape`,
+            representing coordinates of the target.
+        parameter : np.float (optional)
+            A vector of parameters with shape `self.shape`
+            at which to evaluate p-values. Defaults
+            to `np.zeros(self.shape)`.
+        ndraw : int
+           How long a chain to return?
+        burnin : int
+           How many samples to discard?
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+        sample : np.array (optional)
+           If not None, assumed to be a sample of shape (-1,) + `self.shape`
+           representing a sample of the target from parameters `self.reference`.
+           Allows reuse of the same sample for construction of confidence
+           intervals, hypothesis tests, etc.
+        alternative : ['greater', 'less', 'twosided']
+            What alternative to use.
+        Returns
+        -------
+        pvalues : np.float
+
+        '''
+
+        if alternative not in ['greater', 'less', 'twosided']:
+            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+
+        if sample is None:
+            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+
+        if parameter is None:
+            parameter = np.zeros(self.shape)
+
+        nactive = observed.shape[0]
+        intervals_instance = intervals_from_sample(self.reference,
+                                                   sample,
+                                                   observed,
+                                                   self.target_cov)
+
+        pval = intervals_instance.pivots_all(parameter)
+
+        if alternative == 'greater':
+            return 1 - pval
+        elif alternative == 'less':
+            return pval
+        else:
+            return 2 * np.minimum(pval, 1 - pval)
+
+    def crude_lipschitz(self):
+        """
+        A crude Lipschitz constant for the
+        gradient of the log-density.
+        Returns
+        -------
+        lipschitz : float
+
+        """
+        lipschitz = power_L(self.target_inv_cov)
+        for transform, objective in zip(self.target_transform, self.objectives):
+            lipschitz += power_L(transform[0])**2 * objective.randomization.lipschitz
+            lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
+        return lipschitz
+
+
+    def reconstruct(self, state):
+        '''
+        Reconstruction of randomization at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be array with each row a state.
+        Returns
+        -------
+        reconstructed : np.float
+           Has shape of `opt_vars` with same number of rows
+           as `state`.
+
+        '''
+
+        state = np.atleast_2d(state)
+        if len(state.shape) > 2:
+            raise ValueError('expecting at most 2-dimensional array')
+
+        target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
+        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
+
+        for i in range(self.nqueries):
+            reconstructed[:, self.randomization_slice[i]] = reconstruct_full_from_data(self.objectives[i].opt_transform,
+                                                                                       self.objectives[i].score_transform,
+                                                                                       target_state,
+                                                                                       self.target_transform[i],
+                                                                                       opt_state[:, self.opt_slice[i]])
+
+        return np.squeeze(reconstructed)
+
+    def log_density(self, state):
+        '''
+        Log of randomization density at current state.
+        Parameters
+        ----------
+        state : np.float
+           State of sampler made up of `(target, opt_vars)`.
+           Can be two-dimensional with each row a state.
+        Returns
+        -------
+        density : np.float
+            Has number of rows as `state` if 2-dimensional.
+        '''
+
+        reconstructed = self.reconstruct(state)
+        value = np.zeros(reconstructed.shape[0])
+
+        for i in range(self.nqueries):
+            log_dens = self.objectives[i].randomization.log_density
+            value += log_dens(reconstructed[:,self.opt_slice[i]])
+        return np.squeeze(value)
+
+class bootstrapped_target_sampler(targeted_sampler):
+
+    # make one of these for each hypothesis test
+
+    def __init__(self,
+                 multi_view,
+                 target_info,
+                 observed_target_state,
+                 target_alpha,
+                 target_set=None,
+                 reference=None,
+                 boot_size=None):
+
+        # sampler will draw bootstrapped weights for the target
+
+        if boot_size is None:
+            boot_size = target_alpha.shape[1]
+
+        targeted_sampler.__init__(self, multi_view,
+                                  target_info,
+                                  observed_target_state,
+                                  target_set,
+                                  reference)
+        # for bootstrap
+
+        self.boot_size = boot_size
+        self.target_alpha = target_alpha
+        self.boot_transform = []
+
+        for i in range(self.nqueries):
+            composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
+                                                                                                  self.target_cov,
+                                                                                                  self.observed_target_state)
+            boot_linear_part = np.dot(composition_linear_part, target_alpha)
+            boot_offset = composition_offset + np.dot(composition_linear_part, self.reference).flatten()
+            self.boot_transform.append((boot_linear_part, boot_offset))
+
+        # set the observed state for bootstrap
+
+        self.boot_slice = slice(multi_view.num_opt_var, multi_view.num_opt_var + self.boot_size)
+        self.observed_state = np.zeros(multi_view.num_opt_var + self.boot_size)
+        self.observed_state[self.boot_slice] = np.ones(self.boot_size)
+        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
+
+
+    def gradient(self, state):
+
+        boot_state, opt_state = state[self.boot_slice], state[self.overall_opt_slice]
+        boot_grad, opt_grad = np.zeros_like(boot_state), np.zeros_like(opt_state)
+        full_grad = np.zeros_like(state)
+
+        # randomization_gradient are gradients of a CONVEX function
+
+        for i in range(self.nqueries):
+
+            randomization_state = reconstruct_full_from_data(self.objectives[i].opt_transform,
+                                                             self.objectives[i].score_transform,
+                                                             boot_state, 
+                                                             self.boot_transform[i], 
+                                                             opt_state[self.opt_slice[i]])
+
+            internal_state = reconstruct_internal(boot_state, self.boot_transform[i])
+            grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]])
+            boot_linear, boot_offset = self.boot_transform[i]
+            opt_linear, opt_offset = self.objectives[i].opt_transform
+            if boot_linear is not None:
+                boot_grad += boot_linear.T.dot(grad)
+            if opt_linear is not None:
+                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
+
+        boot_grad = -boot_grad
+        boot_grad -= boot_state
+
+        full_grad[self.boot_slice] = boot_grad
+        full_grad[self.overall_opt_slice] = -opt_grad
+
+        return full_grad
+
+    def sample(self, ndraw, burnin, stepsize = None, keep_opt=False):
+        if stepsize is None:
+            stepsize = 1. / self.observed_state.shape[0]
+
+        bootstrap_langevin = projected_langevin(self.observed_state.copy(),
+                                                self.gradient,
+                                                self.projection,
+                                                stepsize)
+        if keep_opt:
+            boot_slice = slice(None, None, None)
+        else:
+            boot_slice = self.boot_slice
+
+        samples = []
+        for i in range(ndraw + burnin):
+            bootstrap_langevin.next()
+            if (i >= burnin):
+                samples.append(bootstrap_langevin.state[boot_slice].copy())
+        samples = np.asarray(samples)
+
+        if keep_opt:
+            target_samples = samples[:,self.boot_slice].dot(self.target_alpha.T) + self.reference[None, :]
+            opt_sample0 = samples[0,self.overall_opt_slice]
+            result = np.zeros((samples.shape[0], opt_sample0.shape[0] + target_samples.shape[1]))
+            result[:,self.overall_opt_slice] = samples[:,self.overall_opt_slice]
+            result[:,self.target_slice] = target_samples
+            return result
+        else:
+            target_samples = samples.dot(self.target_alpha.T) + self.reference[None, :]
+            return target_samples
+
+# test rebase

From b172c9fb862afafa14888716230b50a2ba3dc12d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 27 Sep 2017 22:34:30 -0700
Subject: [PATCH 255/617] testing wide QP solver

---
 selection/algorithms/tests/test_compareR.py | 38 +++++++++++++--------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 0f210a051..4d51c59b4 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -56,7 +56,7 @@ def test_fixed_lambda():
         coef0=out$coef0
         vars=out$vars
         print(coef(lm(y ~ x[,out$vars])))
-        out 
+        out
         """ % (s, lam)
 
         rpy.r(R_code)
@@ -125,7 +125,7 @@ def test_forward_step():
     steps = []
     for i in range(x.shape[1]):
         FS.step()
-        steps.extend(FS.model_pivots(i+1, 
+        steps.extend(FS.model_pivots(i+1,
                                      which_var=FS.variables[-1:],
                                      alternative='onesided'))
 
@@ -176,7 +176,7 @@ def test_forward_step_all():
     steps = []
     for i in range(5):
         FS.step()
-    steps = FS.model_pivots(5, 
+    steps = FS.model_pivots(5,
                             alternative='onesided')
 
     np.testing.assert_array_equal(selected_vars, [i + 1 for i, p in steps])
@@ -305,19 +305,18 @@ def test_logistic():
 
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
-def test_solve_QP(): 
+def test_solve_QP():
     """
     Check the R coordinate descent LASSO solver
     """
 
     n, p = 100, 200
-    lam = 10
-    np.random.seed(0)
+    lam = 0.1
 
     X = np.random.standard_normal((n, p))
     Y = np.random.standard_normal(n)
 
-    loss = rr.squared_error(X, Y)
+    loss = rr.squared_error(X, Y, coef=1./n)
     pen = rr.l1norm(p, lagrange=lam)
     problem = rr.simple_problem(loss, pen)
     soln = problem.solve(min_its=500, tol=1.e-12)
@@ -325,31 +324,42 @@ def test_solve_QP():
     import rpy2.robjects.numpy2ri
     rpy2.robjects.numpy2ri.activate()
 
-    tol = 1.e-5
     rpy.r.assign('X', X)
     rpy.r.assign('Y', Y)
     rpy.r.assign('lam', lam)
-    
+
     R_code = """
 
     library(selectiveInference)
     p = ncol(X)
+    n = nrow(X)
     soln_R = rep(0, p)
-    grad = -t(X) %*% Y
-    ever_active = c(1, rep(0, p-1))
+    grad = -t(X) %*% Y / n
+    ever_active = as.integer(c(1, rep(0, p-1)))
     nactive = as.integer(1)
     kkt_tol = 1.e-12
-    objective_tol = 1.e-12
+    objective_tol = 1.e-16
     maxiter = 500
-    soln_R = selectiveInference:::solve_QP(t(X) %*% X, lam, maxiter, soln_R, -t(X) %*% Y, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln
+    soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln
+
+    # test wide solver
+    Xtheta = rep(0, n)
+    nactive = as.integer(1)
+    ever_active = as.integer(c(1, rep(0, p-1)))
+    soln_R_wide = rep(0, p)
+    grad = - t(X) %*% Y / n
+    soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R_wide, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p)$soln
 
-    """ 
+    """
 
     rpy.r(R_code)
 
     soln_R = np.asarray(rpy.r('soln_R'))
+    soln_R_wide = np.asarray(rpy.r('soln_R_wide'))
     rpy2.robjects.numpy2ri.deactivate()
 
+    tol = 1.e-5
     yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
+    yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver'
 
 

From bfc33544ca292e23d1bcfc41c722b02815603916 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Wed, 4 Oct 2017 12:28:33 -0700
Subject: [PATCH 256/617] dont keep the whole hessian

---
 selection/randomized/M_estimator.py           | 21 ++++++++++++-------
 .../tests/test_opt_weighted_intervals.py      | 10 ++++-----
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 1ebd12918..9933fcbf7 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -174,7 +174,7 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
 
         beta_full = np.zeros(overall.shape)
         beta_full[overall] = _beta_unpenalized
-        _hessian = loss.hessian(beta_full)
+        #_hessian = loss.hessian(beta_full)
         self._beta_full = beta_full
 
         # observed state for score in internal coordinates
@@ -197,7 +197,10 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
 
         Mest_slice = slice(0, overall.sum())
-        _Mest_hessian = _hessian[:, overall]
+        # _Mest_hessian = _hessian[:,overall]
+        X, y = loss.data
+        W = self.loss.saturated_loss.hessian(beta_full)
+        _Mest_hessian = np.dot(X.T, X[:, overall] * W[overall])
         _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
 
         # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
@@ -213,7 +216,8 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         if len(active_directions)==0:
             _opt_hessian=0
         else:
-            _opt_hessian = (_hessian + epsilon * np.identity(p)).dot(active_directions)
+            #_opt_hessian = (_hessian + epsilon * np.identity(p)).dot(active_directions)
+            _opt_hessian = np.dot(_Mest_hessian, active_directions[overall]) + epsilon * active_directions
         _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling
 
         self.observed_opt_state[scaling_slice] *= _sqrt_scaling
@@ -223,8 +227,9 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
         unpenalized_directions = np.identity(p)[:,unpenalized]
         if unpenalized.sum():
-            _opt_linear_term[:, unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
-
+            #_opt_linear_term[:, unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
+            _opt_linear_term[:, unpenalized_slice] = (np.dot(_Mest_hessian, unpenalized_directions[overall])
+                                                      + epsilon * unpenalized_directions) / _sqrt_scaling
         self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling
 
         # subgrad piece
@@ -279,9 +284,9 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         self.unpenalized_slice = unpenalized_slice
         self.ndim = loss.shape[0]
 
-        self.Q = ((_hessian + epsilon * np.identity(p))[:,active])[active,:]
-        self.Qinv = np.linalg.inv(self.Q)
-        self.form_VQLambda()
+        #self.Q = ((_hessian + epsilon * np.identity(p))[:,active])[active,:]
+        #self.Qinv = np.linalg.inv(self.Q)
+        #self.form_VQLambda()
         self.nboot = nboot
 
 
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 9ff57adce..f7b57f555 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -25,13 +25,13 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
         inst, const = const_info
 
-        X, Y, beta = inst(n=100, p=10, s=0, signal=1., sigma=5.)[:3]
+        X, Y, beta = inst(n=100, p=20, s=2, signal=5., sigma=5.)[:3]
         n, p = X.shape
 
-        W = np.ones(X.shape[1]) * 5
+        W = np.ones(X.shape[1]) * 7
         conv = const(X, Y, W, randomizer=rand, parametric_cov_estimator=True)
         signs = conv.fit()
-        print("signs", signs)
+        #print("signs", signs)
 
         #marginalizing_groups = np.zeros(p, np.bool)
         #marginalizing_groups[:int(p/2)] = True
@@ -41,7 +41,7 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         #                           conditioning_groups=conditioning_groups)
 
         selected_features = conv._view.selection_variable['variables']
-
+        print("nactive", selected_features.sum())
         sel_pivots, sel_ci = conv.summary(selected_features,
                                           null_value=beta[selected_features],
                                           ndraw=ndraw,
@@ -64,7 +64,7 @@ def compute_coverage(sel_ci, true_vec):
     return coverage
 
 
-def main(ndraw=20000, burnin=5000, nsim=10):
+def main(ndraw=20000, burnin=5000, nsim=50):
     np.random.seed(1)
 
     sel_pivots_all = list()

From b5b62318d94ea16a7a492065daf57814cf66e93a Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Wed, 4 Oct 2017 23:22:17 -0700
Subject: [PATCH 257/617] marg subgrad

---
 selection/randomized/M_estimator.py                  |  7 +++++--
 .../randomized/tests/test_opt_weighted_intervals.py  | 12 ++++++------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 9933fcbf7..987fce162 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -528,7 +528,10 @@ def new_log_density(query,
                             internal_state, 
                             opt_state):
 
-            full_state = reconstruct_full_from_internal(new_opt_transform, query.score_transform, internal_state, opt_state)
+            full_state = reconstruct_full_from_internal(new_opt_transform,
+                                                        query.score_transform,
+                                                        internal_state,
+                                                        opt_state)
             full_state = np.atleast_2d(full_state)
             p = query.penalty.shape[0]
             logdens = 0
@@ -536,7 +539,7 @@ def new_log_density(query,
             if inactive_marginal_groups.sum()>0:
                 full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
                 full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
-                logdens += np.log(_cdf(full_state_plus) - _cdf(full_state_minus)).sum()
+                logdens += np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:,inactive_marginal_groups].sum()
 
             logdens += log_dens(full_state[:,~inactive_marginal_groups])
             return np.squeeze(logdens) # should this be negative to match the gradient log density?
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index f7b57f555..b46eab72e 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -33,12 +33,12 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         signs = conv.fit()
         #print("signs", signs)
 
-        #marginalizing_groups = np.zeros(p, np.bool)
-        #marginalizing_groups[:int(p/2)] = True
-        #conditioning_groups = ~marginalizing_groups
-        #conditioning_groups[-int(p/4):] = False
-        #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
-        #                           conditioning_groups=conditioning_groups)
+        marginalizing_groups = np.zeros(p, np.bool)
+        marginalizing_groups[:int(p/2)] = True
+        conditioning_groups = ~marginalizing_groups
+        conditioning_groups[-int(p/4):] = False
+        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+                                   conditioning_groups=conditioning_groups)
 
         selected_features = conv._view.selection_variable['variables']
         print("nactive", selected_features.sum())

From a4b9acdf629fa05b8c843d038ad1df0d1b68ec20 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Oct 2017 10:10:32 -0700
Subject: [PATCH 258/617] moving some tests

---
 .../randomized_tests}/test_estimation.py      |  0
 .../randomized_tests}/test_reconstruction.py  |  0
 .../tests/test_decompose_subgrad.py           | 27 -------------------
 3 files changed, 27 deletions(-)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_estimation.py (100%)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_reconstruction.py (100%)

diff --git a/selection/randomized/tests/test_estimation.py b/sandbox/randomized_tests/test_estimation.py
similarity index 100%
rename from selection/randomized/tests/test_estimation.py
rename to sandbox/randomized_tests/test_estimation.py
diff --git a/selection/randomized/tests/test_reconstruction.py b/sandbox/randomized_tests/test_reconstruction.py
similarity index 100%
rename from selection/randomized/tests/test_reconstruction.py
rename to sandbox/randomized_tests/test_reconstruction.py
diff --git a/selection/randomized/tests/test_decompose_subgrad.py b/selection/randomized/tests/test_decompose_subgrad.py
index 7ebbe056b..23b580e59 100644
--- a/selection/randomized/tests/test_decompose_subgrad.py
+++ b/selection/randomized/tests/test_decompose_subgrad.py
@@ -3,7 +3,6 @@
 import nose.tools as nt
 
 from ..convenience import lasso, step, threshold
-from ..glm import target as glm_target
 
 def test_marginalize():
 
@@ -38,17 +37,6 @@ def test_marginalize():
 
     L.decompose_subgradient(marginalizing_groups = marginalizing_groups)
 
-    A2, b2 = L._view.opt_transform
-    opt_state2 = L._view.observed_opt_state.copy()
-    state2 = A2.dot(opt_state2) + b2
-
-    opt_state3 = opt_state1.copy()
-    opt_state3[3:] = 0.
-    state3 = A1.dot(opt_state3) + b1
-
-    np.testing.assert_allclose(state1[:3], state2[:3])  # coordinates that are not marginalized over agree before and after marginalizing
-    np.testing.assert_allclose(state3, state2) # when marginalizing, the transform is such that the marginalized subgradients were 0
-
 def test_condition():
 
     n, p = 20, 5
@@ -83,12 +71,6 @@ def test_condition():
 
     L.decompose_subgradient(conditioning_groups = conditioning_groups)
 
-    A2, b2 = L._view.opt_transform
-    state2 = A2.dot(L._view.observed_opt_state) + b2
-
-    np.testing.assert_allclose(state1, state2) # when conditioning, the transform is such that the marginalized subgradients were 
-                                               # what we had originally observed
-
 def test_both():
 
 
@@ -127,12 +109,3 @@ def test_both():
     L.decompose_subgradient(marginalizing_groups = marginalizing_groups,
                             conditioning_groups = conditioning_groups)
 
-    A2, b2 = L._view.opt_transform
-    opt_state2 = L._view.observed_opt_state.copy()
-    state2 = A2.dot(opt_state2) + b2
-
-    opt_state3 = opt_state1.copy()
-    opt_state3[3:5] = 0.
-    state3 = A1.dot(opt_state3) + b1
-
-    np.testing.assert_allclose(state3, state2) # when marginalizing, the transform is such that the marginalized subgradients were 0

From df124124cb5807d3969a8fe47e8b8aa457ea4482 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Thu, 5 Oct 2017 10:34:29 -0700
Subject: [PATCH 259/617] moved coverages in doc folder

---
 doc/__init__.py                               |  0
 doc/examples/__init__.py                      |  0
 doc/examples/compute_coverages.py             | 47 +++++++++++++++++++
 .../tests/test_opt_weighted_intervals.py      | 47 ++-----------------
 4 files changed, 50 insertions(+), 44 deletions(-)
 create mode 100644 doc/__init__.py
 create mode 100644 doc/examples/__init__.py
 create mode 100644 doc/examples/compute_coverages.py

diff --git a/doc/__init__.py b/doc/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/doc/examples/__init__.py b/doc/examples/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/doc/examples/compute_coverages.py b/doc/examples/compute_coverages.py
new file mode 100644
index 000000000..eade5e6aa
--- /dev/null
+++ b/doc/examples/compute_coverages.py
@@ -0,0 +1,47 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+from statsmodels.distributions import ECDF
+from selection.randomized.tests.test_opt_weighted_intervals import test_opt_weighted_intervals
+
+
+def compute_coverage(sel_ci, true_vec):
+    nactive = true_vec.shape[0]
+    coverage = np.zeros(nactive)
+    for i in range(nactive):
+        if true_vec[i]>=sel_ci[i,0] and true_vec[i]<=sel_ci[i,1]:
+            coverage[i]=1
+    return coverage
+
+
+def main(ndraw=20000, burnin=5000, nsim=50):
+    np.random.seed(1)
+
+    sel_pivots_all = list()
+    sel_ci_all = list()
+    rand_all = []
+    for i in range(nsim):
+        for idx, (rand, sel_pivots, sel_ci, true_vec) in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
+            if i==0:
+                sel_pivots_all.append([])
+                rand_all.append(rand)
+                sel_ci_all.append([])
+            sel_pivots_all[idx].append(sel_pivots)
+            print(sel_ci)
+            sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
+
+    xval = np.linspace(0, 1, 200)
+
+    for idx in range(len(rand_all)):
+        fig = plt.figure(num=idx, figsize=(8,8))
+        plt.clf()
+        sel_pivots_all[idx] = [item for sublist in sel_pivots_all[idx] for item in sublist]
+        plt.plot(xval, ECDF(sel_pivots_all[idx])(xval), label='selective')
+        plt.plot(xval, xval, 'k-', lw=1)
+        plt.legend(loc='lower right')
+
+        sel_ci_all[idx] = [item for sublist in sel_ci_all[idx] for item in sublist]
+        print(sel_ci_all)
+        plt.title(''.join(["coverage ", str(np.mean(sel_ci_all[idx]))]))
+        plt.savefig(''.join(["fig", rand_all[idx], '.pdf']))
+
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index b46eab72e..114135d94 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -33,10 +33,10 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         signs = conv.fit()
         #print("signs", signs)
 
-        marginalizing_groups = np.zeros(p, np.bool)
-        marginalizing_groups[:int(p/2)] = True
+        marginalizing_groups = np.ones(p, np.bool)
+        #marginalizing_groups[:int(p/2)] = True
         conditioning_groups = ~marginalizing_groups
-        conditioning_groups[-int(p/4):] = False
+        #conditioning_groups[-int(p/4):] = False
         conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
                                    conditioning_groups=conditioning_groups)
 
@@ -53,45 +53,4 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
     return results
 
 
-from statsmodels.distributions import ECDF
-
-def compute_coverage(sel_ci, true_vec):
-    nactive = true_vec.shape[0]
-    coverage = np.zeros(nactive)
-    for i in range(nactive):
-        if true_vec[i]>=sel_ci[i,0] and true_vec[i]<=sel_ci[i,1]:
-            coverage[i]=1
-    return coverage
-
-
-def main(ndraw=20000, burnin=5000, nsim=50):
-    np.random.seed(1)
-
-    sel_pivots_all = list()
-    sel_ci_all = list()
-    rand_all = []
-    for i in range(nsim):
-        for idx, (rand, sel_pivots, sel_ci, true_vec) in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
-            if i==0:
-                sel_pivots_all.append([])
-                rand_all.append(rand)
-                sel_ci_all.append([])
-            sel_pivots_all[idx].append(sel_pivots)
-            print(sel_ci)
-            sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
-
-    xval = np.linspace(0, 1, 200)
-
-    for idx in range(len(rand_all)):
-        fig = plt.figure(num=idx, figsize=(8,8))
-        plt.clf()
-        sel_pivots_all[idx] = [item for sublist in sel_pivots_all[idx] for item in sublist]
-        plt.plot(xval, ECDF(sel_pivots_all[idx])(xval), label='selective')
-        plt.plot(xval, xval, 'k-', lw=1)
-        plt.legend(loc='lower right')
-
-        sel_ci_all[idx] = [item for sublist in sel_ci_all[idx] for item in sublist]
-        print(sel_ci_all)
-        plt.title(''.join(["coverage ", str(np.mean(sel_ci_all[idx]))]))
-        plt.savefig(''.join(["fig", rand_all[idx], '.pdf']))
 

From c3740d0becc7db1734a900c260245ec772fb1b1d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Oct 2017 10:38:36 -0700
Subject: [PATCH 260/617] WIP: test_split just needs covariance set

---
 .../randomized_tests}/test_scaling.py         |  0
 .../randomized_tests}/test_threshold_score.py |  0
 .../test_without_screening.py                 |  0
 selection/randomized/tests/test_split.py      | 80 +++++++------------
 4 files changed, 31 insertions(+), 49 deletions(-)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_scaling.py (100%)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_threshold_score.py (100%)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_without_screening.py (100%)

diff --git a/selection/randomized/tests/test_scaling.py b/sandbox/randomized_tests/test_scaling.py
similarity index 100%
rename from selection/randomized/tests/test_scaling.py
rename to sandbox/randomized_tests/test_scaling.py
diff --git a/selection/randomized/tests/test_threshold_score.py b/sandbox/randomized_tests/test_threshold_score.py
similarity index 100%
rename from selection/randomized/tests/test_threshold_score.py
rename to sandbox/randomized_tests/test_threshold_score.py
diff --git a/selection/randomized/tests/test_without_screening.py b/sandbox/randomized_tests/test_without_screening.py
similarity index 100%
rename from selection/randomized/tests/test_without_screening.py
rename to sandbox/randomized_tests/test_without_screening.py
diff --git a/selection/randomized/tests/test_split.py b/selection/randomized/tests/test_split.py
index a80df1577..fbdbce0aa 100644
--- a/selection/randomized/tests/test_split.py
+++ b/selection/randomized/tests/test_split.py
@@ -3,17 +3,18 @@
 
 import regreg.api as rr
 
-from selection.tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
 import selection.tests.reports as reports
-from selection.tests.flags import SMALL_SAMPLES
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.instance import logistic_instance
 
-from selection.api import multiple_queries, glm_target
-from selection.randomized.glm import split_glm_group_lasso
-from selection.tests.instance import logistic_instance
+from ..glm import (split_glm_group_lasso,
+                   glm_nonparametric_bootstrap,
+                   glm_parametric_covariance,
+                   pairs_bootstrap_glm)
+from ..M_estimator import restricted_Mest
 
-from selection.randomized.query import naive_confidence_intervals
-
-@register_report(['mle', 'truth', 'pvalue', 'cover', 'naive_cover', 'active'])
+@register_report(['pvalue', 'cover', 'active'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @wait_for_return_value()
 def test_split(s=3,
@@ -25,7 +26,6 @@ def test_split(s=3,
                lam_frac=0.7,
                ndraw=10000, 
                burnin=2000, 
-               bootstrap=True,
                solve_args={'min_its':50, 'tol':1.e-10},
                reference_known=False): 
 
@@ -44,10 +44,9 @@ def test_split(s=3,
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
     M_est = split_glm_group_lasso(loss, epsilon, m, penalty)
-    mv = multiple_queries([M_est])
-    mv.solve()
+    M_est.solve()
 
-    M_est.selection_variable['variables'] = M_est.selection_variable['variables']
+    M_est.selection_variable['variables'] 
     nactive = np.sum(M_est.selection_variable['variables'])
 
     if nactive==0:
@@ -57,52 +56,35 @@ def test_split(s=3,
 
         active_set = np.nonzero(M_est.selection_variable['variables'])[0]
 
-        if bootstrap:
-            target_sampler, target_observed = glm_target(loss, 
-                                                         M_est.selection_variable['variables'],
-                                                         mv)
-
-        else:
-            target_sampler, target_observed = glm_target(loss, 
-                                                         M_est.selection_variable['variables'],
-                                                         mv,
-                                                         bootstrap=True)
-
-        reference_known = True
-        if reference_known:
-            reference = beta[M_est.selection_variable['variables']] 
-        else:
-            reference = target_observed
+        selected_features = np.zeros(p, np.bool)
+        selected_features[active_set] = True
 
-        target_sampler.reference = reference
+        unpenalized_mle = restricted_Mest(M_est.loss, selected_features)
 
-        target_sample = target_sampler.sample(ndraw=ndraw,
-                                              burnin=burnin)
+        form_covariances = glm_nonparametric_bootstrap(n, n)
+        boot_target, boot_target_observed = pairs_bootstrap_glm(M_est.loss, selected_features, inactive=None)
+        target_info = boot_target
 
+        cov_info = M_est.setup_sampler()
+        target_cov, score_cov = form_covariances(target_info,  
+                                                 cross_terms=[cov_info],
+                                                 nsample=M_est.nboot)
 
-        LU = target_sampler.confidence_intervals(target_observed,
-                                                 sample=target_sample).T
+        opt_sample = M_est.sampler.sample(ndraw,
+                                           burnin)
 
-        LU_naive = naive_confidence_intervals(target_sampler, target_observed)
+        ### TODO -- this only uses one view -- what about other queries?
 
-        pivots_mle = target_sampler.coefficient_pvalues(target_observed,
-                                                        parameter=target_sampler.reference,
-                                                        sample=target_sample)
-        
-        pivots_truth = target_sampler.coefficient_pvalues(target_observed,
-                                                          parameter=beta[M_est.selection_variable['variables']],
-                                                          sample=target_sample)
-        
-        true_vec = beta[M_est.selection_variable['variables']]
+        pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=null_value, sample=opt_sample)
+        intervals = None
+        if compute_intervals:
+            intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
 
-        pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                     parameter=np.zeros_like(true_vec),
-                                                     sample=target_sample)
+        reference = beta[M_est.selection_variable['variables']] 
 
-        L, U = LU
+        L, U = intervals
 
         covered = np.zeros(nactive, np.bool)
-        naive_covered = np.zeros(nactive, np.bool)
         active_var = np.zeros(nactive, np.bool)
 
         for j in range(nactive):
@@ -112,7 +94,7 @@ def test_split(s=3,
                 naive_covered[j] = 1
             active_var[j] = active_set[j] in nonzero
 
-        return pivots_mle, pivots_truth, pvalues, covered, naive_covered, active_var
+        return pvalues, covered, active_var
 
 def report(niter=50, **kwargs):
 

From 17fb8d432032709ba8d68a75030198cdfdcf5102 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Oct 2017 10:49:21 -0700
Subject: [PATCH 261/617] test_split working now -- moved split covariance
 estimator to glm from M_estimator

---
 selection/randomized/M_estimator.py      | 61 ++----------------------
 selection/randomized/glm.py              | 60 +++++++++++++++++++++--
 selection/randomized/tests/test_split.py | 18 ++++---
 3 files changed, 66 insertions(+), 73 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 1ebd12918..743a6b610 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -672,9 +672,11 @@ def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
 class M_estimator_split(M_estimator):
 
     def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10}):
+
         total_size = loss.saturated_loss.shape[0]
         self.randomization = split(loss.shape, subsample_size, total_size)
-        M_estimator.__init__(self,loss, epsilon, penalty, self.randomization, solve_args=solve_args)
+
+        M_estimator.__init__(self, loss, epsilon, penalty, self.randomization, solve_args=solve_args)
 
         total_size = loss.saturated_loss.shape[0]
         if subsample_size > total_size:
@@ -682,60 +684,3 @@ def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its'
 
         self.total_size, self.subsample_size = total_size, subsample_size
 
-
-    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B=2000):
-
-        M_estimator.setup_sampler(self, 
-                                  scaling=scaling,
-                                  solve_args=solve_args)
-        
-        # now we need to estimate covariance of
-        # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)
-
-        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0] # shorthand
-        
-        from .glm import pairs_bootstrap_score # need to correct these imports!!!
-
-        bootstrap_score = pairs_bootstrap_score(self.loss,
-                                                self._overall,
-                                                beta_active=self._beta_full[self._overall],
-                                                solve_args=solve_args)
-
-        # find unpenalized MLE on subsample
-
-        newq, oldq = rr.identity_quadratic(0, 0, 0, 0), self.randomized_loss.quadratic
-        self.randomized_loss.quadratic = newq
-        beta_active_subsample = restricted_Mest(self.randomized_loss,
-                                                self._overall)
-
-        bootstrap_score_split = pairs_bootstrap_score(self.loss,
-                                                      self._overall,
-                                                      beta_active=beta_active_subsample,
-                                                      solve_args=solve_args)
-        self.randomized_loss.quadratic = oldq
-
-        inv_frac = n / m
-        
-        def subsample_diff(m, n, indices):
-            subsample = np.random.choice(indices, size=m, replace=False)
-            full_score = bootstrap_score(indices) # a sum of n terms
-            randomized_score = bootstrap_score_split(subsample) # a sum of m terms
-            return full_score - randomized_score * inv_frac
-
-        first_moment = np.zeros(p)
-        second_moment = np.zeros((p, p))
-        
-        _n = np.arange(n)
-        for _ in range(B):
-            indices = np.random.choice(_n, size=n, replace=True)
-            randomized_score = subsample_diff(m, n, indices)
-            first_moment += randomized_score
-            second_moment += np.multiply.outer(randomized_score, randomized_score)
-
-        first_moment /= B
-        second_moment /= B
-
-        cov = second_moment - np.multiply.outer(first_moment,
-                                                first_moment)
-
-        self.randomization.set_covariance(cov)
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 07a76b89a..64918f73d 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -3,7 +3,7 @@
 import numpy as np
 from scipy.stats import norm as ndist
 
-from regreg.api import glm
+from regreg.api import glm, identity_quadratic
 
 from .M_estimator import restricted_Mest, M_estimator, M_estimator_split
 from .greedy_step import greedy_score_step
@@ -406,8 +406,58 @@ def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
 
 class split_glm_group_lasso(M_estimator_split):
 
-    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}):
-        M_estimator_split.setup_sampler(self, scaling=scaling, solve_args=solve_args)
+    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B=1000):
+
+        # now we need to estimate covariance of
+        # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)
+
+        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0] # shorthand
+        
+        from .glm import pairs_bootstrap_score # need to correct these imports!!!
+
+        bootstrap_score = pairs_bootstrap_score(self.loss,
+                                                self._overall,
+                                                beta_active=self._beta_full[self._overall],
+                                                solve_args=solve_args)
+
+        # find unpenalized MLE on subsample
+
+        newq, oldq = identity_quadratic(0, 0, 0, 0), self.randomized_loss.quadratic
+        self.randomized_loss.quadratic = newq
+        beta_active_subsample = restricted_Mest(self.randomized_loss,
+                                                self._overall)
+
+        bootstrap_score_split = pairs_bootstrap_score(self.loss,
+                                                      self._overall,
+                                                      beta_active=beta_active_subsample,
+                                                      solve_args=solve_args)
+        self.randomized_loss.quadratic = oldq
+
+        inv_frac = n / m
+        
+        def subsample_diff(m, n, indices):
+            subsample = np.random.choice(indices, size=m, replace=False)
+            full_score = bootstrap_score(indices) # a sum of n terms
+            randomized_score = bootstrap_score_split(subsample) # a sum of m terms
+            return full_score - randomized_score * inv_frac
+
+        first_moment = np.zeros(p)
+        second_moment = np.zeros((p, p))
+        
+        _n = np.arange(n)
+        for _ in range(B):
+            indices = np.random.choice(_n, size=n, replace=True)
+            randomized_score = subsample_diff(m, n, indices)
+            first_moment += randomized_score
+            second_moment += np.multiply.outer(randomized_score, randomized_score)
+
+        first_moment /= B
+        second_moment /= B
+
+        cov = second_moment - np.multiply.outer(first_moment,
+                                                first_moment)
+
+        self.randomization.set_covariance(cov)
 
         bootstrap_score = pairs_bootstrap_glm(self.loss,
                                               self.selection_variable['variables'],
@@ -432,7 +482,7 @@ class glm_greedy_step(greedy_score_step, glm):
     # greedy_score_step maximized over ~active
 
     def setup_sampler(self):
-        greedy_score_step.setup_sampler(self)
+
         bootstrap_score = pairs_inactive_score_glm(self.loss, 
                                                    self.active,
                                                    self.beta_active,
@@ -442,7 +492,7 @@ def setup_sampler(self):
 class glm_threshold_score(threshold_score):
 
     def setup_sampler(self):
-        threshold_score.setup_sampler(self)
+
         bootstrap_score = pairs_inactive_score_glm(self.loss, 
                                                    self.active,
                                                    self.beta_active,
diff --git a/selection/randomized/tests/test_split.py b/selection/randomized/tests/test_split.py
index fbdbce0aa..642bcfb87 100644
--- a/selection/randomized/tests/test_split.py
+++ b/selection/randomized/tests/test_split.py
@@ -73,16 +73,16 @@ def test_split(s=3,
         opt_sample = M_est.sampler.sample(ndraw,
                                            burnin)
 
-        ### TODO -- this only uses one view -- what about other queries?
+        pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, 
+                                                    target_cov, 
+                                                    score_cov, 
+                                                    parameter=np.zeros(selected_features.sum()), 
+                                                    sample=opt_sample)
+        intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
 
-        pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=null_value, sample=opt_sample)
-        intervals = None
-        if compute_intervals:
-            intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
+        true_vec = beta[M_est.selection_variable['variables']] 
 
-        reference = beta[M_est.selection_variable['variables']] 
-
-        L, U = intervals
+        L, U = intervals.T
 
         covered = np.zeros(nactive, np.bool)
         active_var = np.zeros(nactive, np.bool)
@@ -90,8 +90,6 @@ def test_split(s=3,
         for j in range(nactive):
             if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
                 covered[j] = 1
-            if (LU_naive[j,0] <= true_vec[j]) and (LU_naive[j,1] >= true_vec[j]):
-                naive_covered[j] = 1
             active_var[j] = active_set[j] in nonzero
 
         return pvalues, covered, active_var

From f5d6ab180dc4fb449864928079de5897f224b54e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Oct 2017 10:55:07 -0700
Subject: [PATCH 262/617] WIP: test_sqrt_lasso working so bootstrap is OK?

---
 selection/randomized/tests/test_sqrt_lasso.py | 106 +++++++++---------
 1 file changed, 50 insertions(+), 56 deletions(-)

diff --git a/selection/randomized/tests/test_sqrt_lasso.py b/selection/randomized/tests/test_sqrt_lasso.py
index 41b930911..49da3e1d5 100644
--- a/selection/randomized/tests/test_sqrt_lasso.py
+++ b/selection/randomized/tests/test_sqrt_lasso.py
@@ -3,14 +3,20 @@
 import regreg.api as rr
 from ..api import (randomization,
                    glm_group_lasso,
-                   multiple_queries,
-                   glm_target)
+                   multiple_queries)
+
 from ...tests.instance import (gaussian_instance,
                                       logistic_instance)
 from ...algorithms.sqrt_lasso import (sqlasso_objective,
                                       choose_lambda,
                                       l2norm_glm)
+
 from ..query import naive_confidence_intervals, naive_pvalues
+from ..M_estimator import restricted_Mest
+from ..glm import (split_glm_group_lasso,
+                   glm_nonparametric_bootstrap,
+                   glm_parametric_covariance,
+                   pairs_bootstrap_glm)
 
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
@@ -24,9 +30,6 @@ def choose_lambda_with_randomization(X, randomization, quantile=0.90, ndraw=1000
     dist2 = np.fabs(randomization.sample((ndraw,))).max(0)
     return np.percentile(dist1+dist2, 100*quantile)
 
-
-@register_report(['truth', 'cover', 'ci_length_clt', 'naive_pvalues', 'naive_cover', 'ci_length_naive',
-                    'active', 'BH_decisions', 'active_var'])
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
@@ -68,73 +71,64 @@ def test_sqrt_lasso(n=500, p=20, s=3, signal=10, K=5, rho=0.,
     W = lam_frac * np.ones(p) * lam_random
     penalty = rr.group_lasso(np.arange(p),
                              weights=dict(zip(np.arange(p), W)), lagrange=1. / np.sqrt(n))
-    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
+    M_est = glm_group_lasso(loss, epsilon, penalty, randomizer)
 
-    mv = multiple_queries([M_est1])
+    mv = multiple_queries([M_est])
     mv.solve()
 
-    #active = soln != 0
-    active_union = M_est1._overall
-    nactive = np.sum(active_union)
-    print("nactive", nactive)
+    active_set = M_est._overall
+    nactive = np.sum(active_set)
+
     if nactive==0:
         return None
 
     nonzero = np.where(beta)[0]
-    if set(nonzero).issubset(np.nonzero(active_union)[0]):
+    if set(nonzero).issubset(np.nonzero(active_set)[0]):
 
-        active_set = np.nonzero(active_union)[0]
-        true_vec = beta[active_union]
+        active_set = np.nonzero(active_set)[0]
+        true_vec = beta[active_set]
 
         if marginalize_subgrad == True:
-            M_est1.decompose_subgradient(conditioning_groups=np.zeros(p, dtype=bool),
-                                         marginalizing_groups=np.ones(p, bool))
-
-        target_sampler, target_observed = glm_target(loss,
-                                                     active_union,
-                                                     mv,
-                                                     bootstrap=bootstrap)
-
-        target_sample = target_sampler.sample(ndraw=ndraw,
-                                              burnin=burnin)
-        LU = target_sampler.confidence_intervals(target_observed,
-                                                 sample=target_sample,
-                                                 level=0.9)
-
-        #pivots_mle = target_sampler.coefficient_pvalues(target_observed,
-        #                                                parameter=target_sampler.reference,
-        #                                                sample=target_sample)
-        pivots_truth = target_sampler.coefficient_pvalues(target_observed,
-                                                          parameter=true_vec,
-                                                          sample=target_sample)
-        pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                     parameter=np.zeros_like(true_vec),
-                                                     sample=target_sample)
-
-        L, U = LU.T
-        sel_covered = np.zeros(nactive, np.bool)
-        sel_length = np.zeros(nactive)
-
-        LU_naive = naive_confidence_intervals(target_sampler, target_observed)
-        naive_covered = np.zeros(nactive, np.bool)
-        naive_length = np.zeros(nactive)
-        naive_pvals = naive_pvalues(target_sampler, target_observed, true_vec)
+            M_est.decompose_subgradient(conditioning_groups=np.zeros(p, dtype=bool),
+                                        marginalizing_groups=np.ones(p, bool))
+
+        selected_features = np.zeros(p, np.bool)
+        selected_features[active_set] = True
+
+        unpenalized_mle = restricted_Mest(M_est.loss, selected_features)
+
+        form_covariances = glm_nonparametric_bootstrap(n, n)
+        boot_target, boot_target_observed = pairs_bootstrap_glm(M_est.loss, selected_features, inactive=None)
+        target_info = boot_target
 
+        cov_info = M_est.setup_sampler()
+        target_cov, score_cov = form_covariances(target_info,  
+                                                 cross_terms=[cov_info],
+                                                 nsample=M_est.nboot)
+
+        opt_sample = M_est.sampler.sample(ndraw,
+                                           burnin)
+
+        pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, 
+                                                    target_cov, 
+                                                    score_cov, 
+                                                    parameter=np.zeros(selected_features.sum()), 
+                                                    sample=opt_sample)
+        intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
+
+        true_vec = beta[M_est.selection_variable['variables']] 
+
+        L, U = intervals.T
+
+        covered = np.zeros(nactive, np.bool)
         active_var = np.zeros(nactive, np.bool)
 
         for j in range(nactive):
             if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
-                sel_covered[j] = 1
-            if (LU_naive[j, 0] <= true_vec[j]) and (LU_naive[j, 1] >= true_vec[j]):
-                naive_covered[j] = 1
-            sel_length[j] = U[j]-L[j]
-            naive_length[j] = LU_naive[j,1]-LU_naive[j,0]
+                covered[j] = 1
             active_var[j] = active_set[j] in nonzero
 
-        print("individual coverage", np.true_divide(sel_covered.sum(),nactive))
-        from statsmodels.sandbox.stats.multicomp import multipletests
-        q = 0.1
-        BH_desicions = multipletests(pvalues, alpha=q, method="fdr_bh")[0]
-        return pivots_truth, sel_covered, sel_length, naive_pvals, naive_covered, naive_length, active_var, BH_desicions, active_var
+        return pvalues, covered, active_var
+
 
 

From 06db93ea261c1a01a9b64fc8efab36621e26bb2c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Oct 2017 11:12:35 -0700
Subject: [PATCH 263/617] split_compare working -- removed bootstrap comparison
 for now

---
 .../randomized/tests/test_split_compare.py    | 105 +++++++++---------
 1 file changed, 55 insertions(+), 50 deletions(-)

diff --git a/selection/randomized/tests/test_split_compare.py b/selection/randomized/tests/test_split_compare.py
index 85a39b0b0..9dc83a16f 100644
--- a/selection/randomized/tests/test_split_compare.py
+++ b/selection/randomized/tests/test_split_compare.py
@@ -5,22 +5,30 @@
 
 import selection.tests.reports as reports
 
-
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from selection.api import (randomization, 
-                           split_glm_group_lasso, 
-                           multiple_queries, 
-                           glm_target)
+                           split_glm_group_lasso)
+
 from ...tests.instance import logistic_instance
-from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-from ..glm import standard_split_ci
+from ...tests.decorators import (wait_for_return_value, 
+                                 register_report, 
+                                 set_sampling_params_iftrue)
+
+from ..glm import (standard_split_ci,
+                   glm_nonparametric_bootstrap,
+                   pairs_bootstrap_glm)
+
+from ..M_estimator import restricted_Mest
 from ..query import naive_confidence_intervals
 
-@register_report(['pivots_clt', 'pivots_boot', 
-                  'covered_clt', 'ci_length_clt', 
-                  'covered_boot', 'ci_length_boot', 
-                  'covered_split', 'ci_length_split', 
-                  'active', 'covered_naive'])
+@register_report(['pivots_clt', 
+                  'covered_clt', 
+                  'ci_length_clt', 
+                  'covered_split', 
+                  'ci_length_split', 
+                  'active', 
+                  'covered_naive',
+                  'ci_length_naive'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @wait_for_return_value()
 def test_split_compare(s=3,
@@ -48,17 +56,16 @@ def test_split_compare(s=3,
 
     m = int(split_frac * n)
 
-    M_est1 = split_glm_group_lasso(loss, epsilon, m, penalty)
-    mv = multiple_queries([M_est1])
-    mv.solve()
+    M_est = split_glm_group_lasso(loss, epsilon, m, penalty)
+    M_est.solve()
 
-    active_union = M_est1.selection_variable['variables'] #+ M_est2.selection_variable['variables']
+    active_union = M_est.selection_variable['variables']
     nactive = np.sum(active_union)
     print("nactive", nactive)
     if nactive==0:
         return None
 
-    leftout_indices = M_est1.randomized_loss.saturated_loss.case_weights == 0
+    leftout_indices = M_est.randomized_loss.saturated_loss.case_weights == 0
 
     screen = set(nonzero).issubset(np.nonzero(active_union)[0])
 
@@ -69,37 +76,30 @@ def test_split_compare(s=3,
         active_set = np.nonzero(active_union)[0]
         true_vec = beta[active_union]
 
-        ## bootstrap
-        target_sampler_boot, target_observed = glm_target(loss,
-                                                          active_union,
-                                                          mv,
-                                                          bootstrap=True)
-
-        target_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
-                                              burnin=burnin)
-        LU_boot = target_sampler_boot.confidence_intervals(target_observed,
-                                                 sample=target_sample_boot,
-                                                 level=0.9)
-        pivots_boot = target_sampler_boot.coefficient_pvalues(target_observed,
-                                                          parameter=true_vec,
-                                                          sample=target_sample_boot)
-
-        ## CLT plugin
-        target_sampler, _ = glm_target(loss,
-                                       active_union,
-                                       mv,
-                                       bootstrap=False)
-
-        target_sample = target_sampler.sample(ndraw=ndraw,
-                                              burnin=burnin)
-        LU = target_sampler.confidence_intervals(target_observed,
-                                                 sample=target_sample,
-                                                 level=0.9)
-        pivots = target_sampler.coefficient_pvalues(target_observed,
-                                                    parameter=true_vec,
-                                                    sample=target_sample)
-
-        LU_naive = naive_confidence_intervals(target_sampler, target_observed)
+        selected_features = np.zeros(p, np.bool)
+        selected_features[active_set] = True
+
+        unpenalized_mle = restricted_Mest(M_est.loss, selected_features)
+
+        form_covariances = glm_nonparametric_bootstrap(n, n)
+        target_info, target_observed = pairs_bootstrap_glm(M_est.loss, selected_features, inactive=None)
+
+        cov_info = M_est.setup_sampler()
+        target_cov, score_cov = form_covariances(target_info,  
+                                                 cross_terms=[cov_info],
+                                                 nsample=M_est.nboot)
+
+        opt_sample = M_est.sampler.sample(ndraw,
+                                          burnin)
+
+        pivots = M_est.sampler.coefficient_pvalues(unpenalized_mle, 
+                                                   target_cov, 
+                                                   score_cov, 
+                                                   parameter=true_vec,
+                                                   sample=opt_sample)
+        LU = intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
+
+        LU_naive = naive_confidence_intervals(np.diag(target_cov), target_observed)
 
         if X.shape[0] - leftout_indices.sum() > nactive:
             LU_split = standard_split_ci(rr.glm.logistic, X, y, active_union, leftout_indices)
@@ -121,7 +121,6 @@ def coverage(LU):
             return covered, ci_length
 
         covered, ci_length = coverage(LU)
-        covered_boot, ci_length_boot = coverage(LU_boot)
         covered_split, ci_length_split = coverage(LU_split)
         covered_naive, ci_length_naive = coverage(LU_naive)
 
@@ -129,8 +128,14 @@ def coverage(LU):
         for j in range(nactive):
             active_var[j] = active_set[j] in nonzero
 
-        return pivots, pivots_boot, covered, ci_length, covered_boot, ci_length_boot, \
-               covered_split, ci_length_split, active_var, covered_naive, ci_length_naive
+        return (pivots, 
+                covered, 
+                ci_length, 
+                covered_split, 
+                ci_length_split, 
+                active_var, 
+                covered_naive, 
+                ci_length_naive)
 
 
 def report(niter=3, **kwargs):

From 8cac02e40e34efbeb831989ab3760aa41b3810af Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Oct 2017 14:49:38 -0700
Subject: [PATCH 264/617] randomized tests all passing now, some moved to
 sandbox but several fixed

---
 .../randomized_tests}/test_greedy_step.py     |   0
 .../test_marginalize_subgrad.py               |   0
 .../test_multiple_queries.py                  |   0
 .../test_multiple_queries_CI.py               |   0
 .../randomized_tests}/test_nonrandomized.py   |   0
 .../test_randomization_to_zero.py             |   0
 selection/randomized/convenience.py           |   2 +
 selection/randomized/glm.py                   |   2 +-
 selection/randomized/query.py                 |  33 +++--
 selection/randomized/tests/test_Mest.py       |   3 +-
 selection/randomized/tests/test_cv.py         |  71 +++++-----
 selection/randomized/tests/test_fixedX.py     | 125 ++++++------------
 selection/randomized/tests/test_intervals.py  | 125 ++++++++----------
 .../randomized/tests/test_multiple_splits.py  | 124 ++++++++---------
 selection/randomized/tests/test_split.py      |   2 +-
 .../randomized/tests/test_split_compare.py    |   2 +-
 16 files changed, 220 insertions(+), 269 deletions(-)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_greedy_step.py (100%)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_marginalize_subgrad.py (100%)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_multiple_queries.py (100%)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_multiple_queries_CI.py (100%)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_nonrandomized.py (100%)
 rename {selection/randomized/tests => sandbox/randomized_tests}/test_randomization_to_zero.py (100%)

diff --git a/selection/randomized/tests/test_greedy_step.py b/sandbox/randomized_tests/test_greedy_step.py
similarity index 100%
rename from selection/randomized/tests/test_greedy_step.py
rename to sandbox/randomized_tests/test_greedy_step.py
diff --git a/selection/randomized/tests/test_marginalize_subgrad.py b/sandbox/randomized_tests/test_marginalize_subgrad.py
similarity index 100%
rename from selection/randomized/tests/test_marginalize_subgrad.py
rename to sandbox/randomized_tests/test_marginalize_subgrad.py
diff --git a/selection/randomized/tests/test_multiple_queries.py b/sandbox/randomized_tests/test_multiple_queries.py
similarity index 100%
rename from selection/randomized/tests/test_multiple_queries.py
rename to sandbox/randomized_tests/test_multiple_queries.py
diff --git a/selection/randomized/tests/test_multiple_queries_CI.py b/sandbox/randomized_tests/test_multiple_queries_CI.py
similarity index 100%
rename from selection/randomized/tests/test_multiple_queries_CI.py
rename to sandbox/randomized_tests/test_multiple_queries_CI.py
diff --git a/selection/randomized/tests/test_nonrandomized.py b/sandbox/randomized_tests/test_nonrandomized.py
similarity index 100%
rename from selection/randomized/tests/test_nonrandomized.py
rename to sandbox/randomized_tests/test_nonrandomized.py
diff --git a/selection/randomized/tests/test_randomization_to_zero.py b/sandbox/randomized_tests/test_randomization_to_zero.py
similarity index 100%
rename from selection/randomized/tests/test_randomization_to_zero.py
rename to sandbox/randomized_tests/test_randomization_to_zero.py
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 8358831d0..ec5e7690c 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -222,6 +222,8 @@ def summary(self,
         opt_samples = [opt_sampler.sample(ndraw,
                                           burnin) for opt_sampler in opt_samplers]
 
+        ### TODO -- this only uses one view -- what about other queries?
+
         pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=null_value, sample=opt_samples[0])
         intervals = None
         if compute_intervals:
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 64918f73d..77225441b 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -567,7 +567,7 @@ def glm_nonparametric_bootstrap(m, n):
     return functools.partial(bootstrap_cov, lambda: np.random.choice(n, size=(m,), replace=True))
 
 def resid_bootstrap(gaussian_loss,
-                    active,
+                    active, # boolean
                     inactive=None,
                     scaling=1.):
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index fe3fea2b7..31a300617 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -6,7 +6,7 @@
 
 from regreg.affine import power_L
 
-from ..distributions.api import discrete_family, intervals_from_sample
+from ..distributions.api import discrete_family
 from ..sampling.langevin import projected_langevin
 from .reconstruction import reconstruct_full_from_internal
 
@@ -563,35 +563,42 @@ def _weights(self,
 
         return np.exp(_logratio)
 
-def naive_confidence_intervals(target, observed, alpha=0.1):
+def naive_confidence_intervals(diag_cov, observed, alpha=0.1):
     """
     Compute naive Gaussian based confidence
     intervals for target.
     Parameters
     ----------
 
-    target : `targeted_sampler`
+    diag_cov : diagonal of a covariance matrix
+
     observed : np.float
         A vector of observed data of shape `target.shape`
+
     alpha : float (optional)
         1 - confidence level.
+
     Returns
     -------
     intervals : np.float
         Gaussian based confidence intervals.
     """
-    quantile = - ndist.ppf(alpha/float(2))
-    LU = np.zeros((2, target.shape[0]))
-    for j in range(target.shape[0]):
-        sigma = np.sqrt(target.target_cov[j, j])
+    diag_cov = np.asarray(diag_cov)
+    p = diag_cov.shape[0]
+    quantile = - ndist.ppf(alpha/2)
+    LU = np.zeros((2, p))
+    for j in range(p):
+        sigma = np.sqrt(diag_cov[j])
         LU[0,j] = observed[j] - sigma * quantile
         LU[1,j] = observed[j] + sigma * quantile
     return LU.T
 
-def naive_pvalues(target, observed, parameter):
-    pvalues = np.zeros(target.shape[0])
-    for j in range(target.shape[0]):
-        sigma = np.sqrt(target.target_cov[j, j])
-        pval = ndist.cdf((observed[j]-parameter[j])/sigma)
-        pvalues[j] = 2*min(pval, 1-pval)
+def naive_pvalues(diag_cov, observed, parameter):
+    diag_cov = np.asarray(diag_cov)
+    p = diag_cov.shape[0]
+    pvalues = np.zeros(p)
+    for j in range(p):
+        sigma = np.sqrt(diag_cov[j])
+        pval = ndist.cdf((observed[j] - parameter[j])/sigma)
+        pvalues[j] = 2 * min(pval, 1-pval)
     return pvalues
diff --git a/selection/randomized/tests/test_Mest.py b/selection/randomized/tests/test_Mest.py
index 62e3929f5..ae21c68f3 100644
--- a/selection/randomized/tests/test_Mest.py
+++ b/selection/randomized/tests/test_Mest.py
@@ -1,6 +1,5 @@
 """
-These tests exposes lower level functions than needed -- see tests_multiple_queries for simpler constructions
-using glm_target
+These tests exposes lower level functions than needed -- see test_convenience for simpler constructions
 """
 from __future__ import print_function
 import numpy as np, pandas as pd
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 9d8563247..97f740127 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -6,27 +6,29 @@
 
 from ...api import (randomization,
                     glm_group_lasso,
-                    multiple_queries,
-                    glm_target)
+                    multiple_queries)
 from ...tests.instance import (gaussian_instance,
                                logistic_instance)
 
-from ..query import naive_confidence_intervals, naive_pvalues
-
 import selection.tests.reports as reports
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.decorators import (wait_for_return_value, 
                                  set_seed_iftrue, 
                                  set_sampling_params_iftrue, 
                                  register_report)
+
+from ..query import naive_confidence_intervals, naive_pvalues
+from ..M_estimator import restricted_Mest
 from ..cv_view import CV_view
+from ..glm import (glm_nonparametric_bootstrap,
+                   pairs_bootstrap_glm)
 
 if SMALL_SAMPLES:
     nboot = 10
 else: 
     nboot = -1
 
-@register_report(['truth', 'cover', 'ci_length_clt', 'naive_pvalues', 'naive_cover', 'ci_length_naive',
+@register_report(['pvalue', 'cover', 'ci_length_clt', 'naive_pvalues', 'naive_cover', 'ci_length_naive',
                   'active', 'BH_decisions', 'active_var'])
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
@@ -106,15 +108,15 @@ def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
     W = lam_frac * np.ones(p) * lam
     penalty = rr.group_lasso(np.arange(p),
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
-    M_est1 = glm_group_lasso(glm_loss, epsilon, penalty, randomizer)
+    M_est = glm_group_lasso(glm_loss, epsilon, penalty, randomizer)
 
     if nboot > 0:
-        cv.nboot = M_est1.nboot = nboot
+        cv.nboot = M_est.nboot = nboot
 
-    mv = multiple_queries([cv, M_est1])
+    mv = multiple_queries([cv, M_est])
     mv.solve()
 
-    active_union = M_est1._overall
+    active_union = M_est._overall
     nactive = np.sum(active_union)
     print("nactive", nactive)
     if nactive==0:
@@ -128,35 +130,40 @@ def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
         true_vec = beta[active_union]
 
         if marginalize_subgrad == True:
-            M_est1.decompose_subgradient(conditioning_groups=np.zeros(p, bool),
+            M_est.decompose_subgradient(conditioning_groups=np.zeros(p, bool),
                                          marginalizing_groups=np.ones(p, bool))
 
-        target_sampler, target_observed = glm_target(glm_loss,
-                                                     active_union,
-                                                     mv,
-                                                     bootstrap=bootstrap)
-
-        target_sample = target_sampler.sample(ndraw=ndraw,
-                                              burnin=burnin)
-        LU = target_sampler.confidence_intervals(target_observed,
-                                                 sample=target_sample,
-                                                 level=0.9)
-
-        pivots_truth = target_sampler.coefficient_pvalues(target_observed,
-                                                          parameter=true_vec,
-                                                          sample=target_sample)
-        pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                     parameter=np.zeros_like(true_vec),
-                                                     sample=target_sample)
-
-        L, U = LU.T
+        selected_features = np.zeros(p, np.bool)
+        selected_features[active_set] = True
+
+        unpenalized_mle = restricted_Mest(M_est.loss, selected_features)
+
+        form_covariances = glm_nonparametric_bootstrap(n, n)
+        target_info, target_observed = pairs_bootstrap_glm(M_est.loss, selected_features, inactive=None)
+
+        cov_info = M_est.setup_sampler()
+        target_cov, score_cov = form_covariances(target_info,  
+                                                 cross_terms=[cov_info],
+                                                 nsample=M_est.nboot)
+
+        opt_sample = M_est.sampler.sample(ndraw,
+                                          burnin)
+
+        pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, 
+                                                    target_cov, 
+                                                    score_cov, 
+                                                    parameter=np.zeros(selected_features.sum()), 
+                                                    sample=opt_sample)
+        intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
+
+        L, U = intervals.T
         sel_covered = np.zeros(nactive, np.bool)
         sel_length = np.zeros(nactive)
 
-        LU_naive = naive_confidence_intervals(target_sampler, target_observed)
+        LU_naive = naive_confidence_intervals(np.diag(target_cov), target_observed)
         naive_covered = np.zeros(nactive, np.bool)
         naive_length = np.zeros(nactive)
-        naive_pvals = naive_pvalues(target_sampler, target_observed, true_vec)
+        naive_pvals = naive_pvalues(np.diag(target_cov), target_observed, true_vec)
 
         active_var = np.zeros(nactive, np.bool)
 
@@ -171,7 +178,7 @@ def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
 
         q = 0.2
         BH_desicions = multipletests(pvalues, alpha=q, method="fdr_bh")[0]
-        return pivots_truth, sel_covered, sel_length, naive_pvals, naive_covered, naive_length, active_var, BH_desicions, active_var
+        return sel_covered, sel_length, naive_pvals, naive_covered, naive_length, active_var, BH_desicions, active_var
 
 
 def report(niter=50, **kwargs):
diff --git a/selection/randomized/tests/test_fixedX.py b/selection/randomized/tests/test_fixedX.py
index 827dfe71a..941aa66c3 100644
--- a/selection/randomized/tests/test_fixedX.py
+++ b/selection/randomized/tests/test_fixedX.py
@@ -3,16 +3,18 @@
 
 import regreg.api as rr
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.instance import gaussian_instance
-from selection.tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.instance import gaussian_instance
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
 import selection.tests.reports as reports
 
-from selection.randomized.api import randomization, multiple_queries, glm_target, glm_nonparametric_bootstrap
-from selection.randomized.glm import resid_bootstrap, fixedX_group_lasso
+from ..api import randomization 
+from ..glm import (resid_bootstrap, 
+                   glm_nonparametric_bootstrap,
+                   fixedX_group_lasso)
 
 
-@register_report(['pvalue', 'active'])
+@register_report(['pvalue', 'cover', 'active'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @set_seed_iftrue(SET_SEED)
 @wait_for_return_value()
@@ -31,99 +33,51 @@ def test_fixedX(ndraw=10000, burnin=2000): # nsim needed for decorator
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
     M_est = fixedX_group_lasso(X, Y, epsilon, penalty, randomizer)
+    M_est.solve()
 
-    mv = multiple_queries([M_est])
-    mv.solve()
+    active_set = M_est.selection_variable['variables']
+    nactive = active_set.sum()
 
-    active = M_est.selection_variable['variables']
-    nactive = active.sum()
+    if set(nonzero).issubset(np.nonzero(active_set)[0]) and active_set.sum() > len(nonzero):
 
-    if set(nonzero).issubset(np.nonzero(active)[0]) and active.sum() > len(nonzero):
+        selected_features = np.zeros(p, np.bool)
+        selected_features[active_set] = True
 
-        pvalues = []
-        active_set = np.nonzero(active)[0]
-        inactive_selected = I = [i for i in np.arange(active_set.shape[0]) if active_set[i] not in nonzero]
-        active_selected = A = [i for i in np.arange(active_set.shape[0]) if active_set[i] in nonzero]
+        Xactive = X[:,active_set]
+        unpenalized_mle = np.linalg.pinv(Xactive).dot(Y)
 
-        if not I:
-            return None
-     
-        idx = I[0]
-        boot_target, target_observed = resid_bootstrap(M_est.loss, active)
-
-        X_active = X[:,active]
-        beta_hat = np.linalg.pinv(X_active).dot(Y)
-        resid_hat = Y - X_active.dot(beta_hat)
         form_covariances = glm_nonparametric_bootstrap(n, n)
-        mv.setup_sampler(form_covariances)
-
-        # null saturated
-
-        def null_target(Y_star):
-            result = boot_target(Y_star)
-            return result[idx]
-
-        null_observed = np.zeros(1)
-        null_observed[0] = target_observed[idx]
-
-        target_sampler = mv.setup_target(null_target, null_observed)
-
-        test_stat = lambda x: x[0]
-        pval = target_sampler.hypothesis_test(test_stat, null_observed, burnin=burnin, ndraw=ndraw) # twosided by default
-        pvalues.append(pval)
-
-        # null selected
-
-        def null_target(Y_star):
-            result = boot_target(Y_star)
-            return np.hstack([result[idx], result[nactive:]])
-
-        null_observed = np.zeros_like(null_target(np.random.standard_normal(n)))
-        null_observed[0] = target_observed[idx]
-        null_observed[1:] = target_observed[nactive:]
+        target_info, target_observed = resid_bootstrap(M_est.loss, active_set)
 
-        target_sampler = mv.setup_target(null_target, null_observed, target_set=[0])
+        cov_info = M_est.setup_sampler()
+        target_cov, score_cov = form_covariances(target_info,  
+                                                 cross_terms=[cov_info],
+                                                 nsample=M_est.nboot)
 
-        test_stat = lambda x: x[0]
-        pval = target_sampler.hypothesis_test(test_stat, null_observed, burnin=burnin, ndraw=ndraw) # twosided by default
-        pvalues.append(pval)
+        opt_sample = M_est.sampler.sample(ndraw,
+                                          burnin)
 
-        # true saturated
+        pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, 
+                                                    target_cov, 
+                                                    score_cov, 
+                                                    parameter=np.zeros(selected_features.sum()), 
+                                                    sample=opt_sample)
+        intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
 
-        idx = A[0]
+        true_vec = beta[M_est.selection_variable['variables']] 
 
-        def active_target(Y_star):
-            result = boot_target(Y_star)
-            return result[idx]
+        L, U = intervals.T
 
-        active_observed = np.zeros(1)
-        active_observed[0] = target_observed[idx]
+        covered = np.zeros(nactive, np.bool)
+        active_var = np.zeros(nactive, np.bool)
+        active_set = np.nonzero(active_set)[0]
 
-        sampler = lambda : np.random.choice(n, size=(n,), replace=True)
+        for j in range(nactive):
+            if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
+                covered[j] = 1
+            active_var[j] = active_set[j] in nonzero
 
-        target_sampler = mv.setup_target(active_target, active_observed)
-
-        test_stat = lambda x: x[0]
-        pval = target_sampler.hypothesis_test(test_stat, active_observed, burnin=burnin, ndraw=ndraw) # twosided by default
-        pvalues.append(pval)
-
-        # true selected
-
-        def active_target(Y_star):
-            result = boot_target(Y_star)
-            return np.hstack([result[idx], result[nactive:]])
-
-        active_observed = np.zeros_like(active_target(np.random.standard_normal(n)))
-        active_observed[0] = target_observed[idx]
-        active_observed[1:] = target_observed[nactive:]
-
-        target_sampler = mv.setup_target(active_target, active_observed, target_set=[0])
-
-        test_stat = lambda x: x[0]
-        pval = target_sampler.hypothesis_test(test_stat, active_observed, burnin=burnin, ndraw=ndraw) # twosided by default
-        pvalues.append(pval)
-
-        return pvalues, [False, False, True, True]
+        return pvalues, covered, active_var
 
 def report(niter=50, **kwargs):
 
@@ -136,4 +90,3 @@ def report(niter=50, **kwargs):
 
     fig = reports.pvalue_plot(runs)
     fig.savefig('fixedX_pivots.pdf') # will have both bootstrap and CLT on plot
-
diff --git a/selection/randomized/tests/test_intervals.py b/selection/randomized/tests/test_intervals.py
index 411d17395..60ffef313 100644
--- a/selection/randomized/tests/test_intervals.py
+++ b/selection/randomized/tests/test_intervals.py
@@ -3,19 +3,21 @@
 
 import regreg.api as rr
 
-from selection.tests.flags import SMALL_SAMPLES, SET_SEED
-from selection.tests.instance import (gaussian_instance, logistic_instance)
-from selection.tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
 import selection.tests.reports as reports
+from ...tests.flags import SMALL_SAMPLES, SET_SEED
+from ...tests.instance import (gaussian_instance, logistic_instance)
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
 
-from selection.api import (randomization, 
-                           glm_group_lasso, 
-                           multiple_queries, 
-                           glm_target)
-from selection.randomized.M_estimator import restricted_Mest
-from selection.randomized.query import (naive_pvalues, naive_confidence_intervals)
+from ..randomization import randomization
 
-@register_report(['mle', 'truth', 'pvalue', 'cover', 'ci_length_clt',
+from ..M_estimator import restricted_Mest
+from ..query import (naive_pvalues, naive_confidence_intervals)
+from ..glm import (glm_group_lasso,
+                   glm_nonparametric_bootstrap,
+                   glm_parametric_covariance,
+                   pairs_bootstrap_glm)
+
+@register_report(['pvalue', 'cover', 'ci_length_clt',
                   'naive_pvalues', 'naive_cover', 'ci_length_naive', 'active'])
 @set_seed_iftrue(SET_SEED, seed=20)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
@@ -53,23 +55,18 @@ def test_intervals(s=0,
     epsilon = 1./np.sqrt(n)
 
     W = lam_frac*np.ones(p)*lam
-    # W[0] = 0 # use at least some unpenalized
+    W[0] = 0 # use at least some unpenalized
     groups = np.concatenate([np.arange(10) for i in range(p//10)])
-    #print(groups)
-    #groups = np.arange(p)
+
     penalty = rr.group_lasso(groups,
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-    # first randomization
-    M_est1 = glm_group_lasso(loss, epsilon, penalty, randomizer)
-    mv = multiple_queries([M_est1])
-    # second randomization
-    #M_est2 = glm_group_lasso(loss, epsilon, penalty, randomizer)
-    #mv = multiple_queries([M_est1, M_est2])
 
-    mv.solve()
+    M_est = glm_group_lasso(loss, epsilon, penalty, randomizer)
+    M_est.solve()
+
 
-    active_union = M_est1.selection_variable['variables']
+    active_union = M_est.selection_variable['variables']
     print("active set", np.nonzero(active_union)[0])
     nactive = np.sum(active_union)
 
@@ -81,29 +78,33 @@ def test_intervals(s=0,
         active_set = np.nonzero(active_union)[0]
         true_vec = beta[active_union]
 
-        target_sampler, target_observed = glm_target(loss,
-                                                     active_union,
-                                                     mv,
-                                                     bootstrap=bootstrap)
-
-        target_sample = target_sampler.sample(ndraw=ndraw,
-                                              burnin=burnin)
-        LU = target_sampler.confidence_intervals(target_observed,
-                                                 sample=target_sample,
-                                                 level=0.9)
-        pivots_mle = target_sampler.coefficient_pvalues(target_observed,
-                                                        parameter=target_sampler.reference,
-                                                        sample=target_sample)
-        pivots_truth = target_sampler.coefficient_pvalues(target_observed,
-                                                      parameter=true_vec,
-                                                      sample=target_sample)
-        pvalues = target_sampler.coefficient_pvalues(target_observed,
-                                                 parameter=np.zeros_like(true_vec),
-                                                 sample=target_sample)
-
-        LU_naive = naive_confidence_intervals(target_sampler, target_observed)
-
-        L, U = LU.T
+        selected_features = np.zeros(p, np.bool)
+        selected_features[active_set] = True
+
+        unpenalized_mle = restricted_Mest(M_est.loss, selected_features)
+
+        form_covariances = glm_nonparametric_bootstrap(n, n)
+        target_info, target_observed = pairs_bootstrap_glm(M_est.loss, selected_features, inactive=None)
+
+        cov_info = M_est.setup_sampler()
+        target_cov, score_cov = form_covariances(target_info,  
+                                                 cross_terms=[cov_info],
+                                                 nsample=M_est.nboot)
+
+        opt_sample = M_est.sampler.sample(ndraw,
+                                          burnin)
+
+        pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, 
+                                                    target_cov, 
+                                                    score_cov, 
+                                                    parameter=np.zeros(selected_features.sum()), 
+                                                    sample=opt_sample)
+        intervals = M_est.sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_sample)
+
+        L, U = intervals.T
+
+        LU_naive = naive_confidence_intervals(np.diag(target_cov), target_observed)
+
         ci_length_sel = np.zeros(nactive)
         covered = np.zeros(nactive, np.bool)
         naive_covered = np.zeros(nactive, np.bool)
@@ -119,35 +120,15 @@ def test_intervals(s=0,
             ci_length_naive[j]= LU_naive[j,1]-LU_naive[j,0]
             active_var[j] = active_set[j] in nonzero
 
-        naive_pvals = naive_pvalues(target_sampler, target_observed, true_vec)
-
-        return pivots_mle, pivots_truth, pvalues, covered, ci_length_sel,\
-               naive_pvals, naive_covered, ci_length_naive, active_var
-
-
-def report_both(niter=10, **kwargs):
-
-    kwargs = {'s': 0, 'n': 500, 'p': 100, 'signal': 7, 'bootstrap': False, 'randomizer': 'gaussian'}
-    intervals_report = reports.reports['test_intervals']
-    CLT_runs = reports.collect_multiple_runs(intervals_report['test'],
-                                             intervals_report['columns'],
-                                             niter,
-                                             reports.summarize_all,
-                                             **kwargs)
-
-    #fig = reports.pivot_plot(CLT_runs, color='b', label='CLT')
-    fig = reports.pivot_plot_2in1(CLT_runs, color='b', label='CLT')
-
-    kwargs['bootstrap'] = True
-    bootstrap_runs = reports.collect_multiple_runs(intervals_report['test'],
-                                                   intervals_report['columns'],
-                                                   niter,
-                                                   reports.summarize_all,
-                                                   **kwargs)
+        naive_pvals = naive_pvalues(np.diag(target_cov), target_observed, true_vec)
 
-    #fig = reports.pivot_plot(bootstrap_runs, color='g', label='Bootstrap', fig=fig)
-    fig = reports.pivot_plot_2in1(bootstrap_runs, color='g', label='Bootstrap', fig=fig)
-    fig.savefig('intervals_pivots.pdf') # will have both bootstrap and CLT on plot
+        return (pvalues, 
+                covered, 
+                ci_length_sel,
+                naive_pvals, 
+                naive_covered, 
+                ci_length_naive, 
+                active_var)
 
 def report(niter=50, **kwargs):
     kwargs = {'s': 0, 'n': 600, 'p': 100, 'signal': 7, 'bootstrap': False, 'randomizer':'gaussian',
diff --git a/selection/randomized/tests/test_multiple_splits.py b/selection/randomized/tests/test_multiple_splits.py
index 2e5d9e7fc..71b0e82b8 100644
--- a/selection/randomized/tests/test_multiple_splits.py
+++ b/selection/randomized/tests/test_multiple_splits.py
@@ -9,10 +9,15 @@
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from selection.api import (randomization,
                            split_glm_group_lasso,
-                           multiple_queries,
-                           glm_target)
+                           multiple_queries)
 from ...tests.instance import logistic_instance
 from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+
+from ..glm import (standard_split_ci,
+                   glm_nonparametric_bootstrap,
+                   pairs_bootstrap_glm)
+
+from ..M_estimator import restricted_Mest
 from ..query import naive_confidence_intervals
 
 @register_report(['pivots_clt', 'pivots_boot',
@@ -68,66 +73,63 @@ def test_multiple_splits(s=3,
     if check_screen and not screen:
         return None
 
-    if True:
-        active_set = np.nonzero(active_union)[0]
-        true_vec = beta[active_union]
-
-        ## bootstrap
-        target_sampler_boot, target_observed = glm_target(loss,
-                                                          active_union,
-                                                          mv,
-                                                          bootstrap=True)
-
-        target_sample_boot = target_sampler_boot.sample(ndraw=ndraw,
-                                                        burnin=burnin)
-        LU_boot = target_sampler_boot.confidence_intervals(target_observed,
-                                                           sample=target_sample_boot,
-                                                           level=0.9)
-        pivots_boot = target_sampler_boot.coefficient_pvalues(target_observed,
-                                                              parameter=true_vec,
-                                                              sample=target_sample_boot)
-        ## CLT plugin
-        target_sampler, _ = glm_target(loss,
-                                       active_union,
-                                       mv,
-                                       bootstrap=False)
-
-        target_sample = target_sampler.sample(ndraw=ndraw,
-                                              burnin=burnin)
-        LU = target_sampler.confidence_intervals(target_observed,
-                                                 sample=target_sample,
-                                                 level=0.9)
-        pivots = target_sampler.coefficient_pvalues(target_observed,
-                                                    parameter=true_vec,
-                                                    sample=target_sample)
-
-        LU_naive = naive_confidence_intervals(target_sampler, target_observed)
-
-
-        def coverage(LU):
-            L, U = LU[:,0], LU[:,1]
-            covered = np.zeros(nactive)
-            ci_length = np.zeros(nactive)
-
-            for j in range(nactive):
-                if check_screen:
-                  if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
-                    covered[j] = 1
-                else:
-                    covered[j] = None
-                ci_length[j] = U[j]-L[j]
-            return covered, ci_length
-
-        covered, ci_length = coverage(LU)
-        covered_boot, ci_length_boot = coverage(LU_boot)
-        covered_naive, ci_length_naive = coverage(LU_naive)
-
-        active_var = np.zeros(nactive, np.bool)
-        for j in range(nactive):
-            active_var[j] = active_set[j] in nonzero
+    true_vec = beta[active_union]
+    selected_features = np.zeros(p, np.bool)
+    selected_features[active_union] = True
+
+    unpenalized_mle = restricted_Mest(loss, selected_features)
+
+    form_covariances = glm_nonparametric_bootstrap(n, n)
+    target_info, target_observed = pairs_bootstrap_glm(loss, selected_features, inactive=None)
+
+    cov_info = view[0].setup_sampler()
+    target_cov, score_cov = form_covariances(target_info,  
+                                             cross_terms=[cov_info],
+                                             nsample=view[0].nboot)
 
-        return pivots, pivots_boot, covered, ci_length, covered_boot, ci_length_boot, \
-                active_var, covered_naive, ci_length_naive
+    for v in view:
+        v.setup_sampler()
+    opt_samples = [v.sampler.sample(ndraw,
+                                    burnin) for v in view]
+
+    #### XXX TODO these only use one view!
+    pivots = view[0].sampler.coefficient_pvalues(unpenalized_mle, 
+                                                 target_cov, 
+                                                 score_cov, 
+                                                 parameter=true_vec,
+                                                 sample=opt_samples[0])
+    LU = view[0].sampler.confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_samples[0])
+
+    LU_naive = naive_confidence_intervals(np.diag(target_cov), target_observed)
+
+    def coverage(LU):
+        L, U = LU[:,0], LU[:,1]
+        covered = np.zeros(nactive)
+        ci_length = np.zeros(nactive)
+
+        for j in range(nactive):
+            if check_screen:
+              if (L[j] <= true_vec[j]) and (U[j] >= true_vec[j]):
+                covered[j] = 1
+            else:
+                covered[j] = None
+            ci_length[j] = U[j]-L[j]
+        return covered, ci_length
+
+    covered, ci_length = coverage(LU)
+    covered_naive, ci_length_naive = coverage(LU_naive)
+
+    active_set = np.where(active_union)[0]
+    active_var = np.zeros(nactive, np.bool)
+    for j in range(nactive):
+        active_var[j] = active_set[j] in nonzero
+
+    return (pivots, 
+            covered, 
+            ci_length, 
+            active_var, 
+            covered_naive, 
+            ci_length_naive)
 
 
 def report(niter=3, **kwargs):
diff --git a/selection/randomized/tests/test_split.py b/selection/randomized/tests/test_split.py
index 642bcfb87..49bbdb77e 100644
--- a/selection/randomized/tests/test_split.py
+++ b/selection/randomized/tests/test_split.py
@@ -71,7 +71,7 @@ def test_split(s=3,
                                                  nsample=M_est.nboot)
 
         opt_sample = M_est.sampler.sample(ndraw,
-                                           burnin)
+                                          burnin)
 
         pvalues = M_est.sampler.coefficient_pvalues(unpenalized_mle, 
                                                     target_cov, 
diff --git a/selection/randomized/tests/test_split_compare.py b/selection/randomized/tests/test_split_compare.py
index 9dc83a16f..875c99058 100644
--- a/selection/randomized/tests/test_split_compare.py
+++ b/selection/randomized/tests/test_split_compare.py
@@ -79,7 +79,7 @@ def test_split_compare(s=3,
         selected_features = np.zeros(p, np.bool)
         selected_features[active_set] = True
 
-        unpenalized_mle = restricted_Mest(M_est.loss, selected_features)
+        unpenalized_mle = restricted_Mest(loss, selected_features)
 
         form_covariances = glm_nonparametric_bootstrap(n, n)
         target_info, target_observed = pairs_bootstrap_glm(M_est.loss, selected_features, inactive=None)

From b4bd70304ec25cae5c53678d9ddb9a8386de59e3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Oct 2017 14:57:10 -0700
Subject: [PATCH 265/617] not using flag for small samples

---
 selection/constraints/tests/test_quadratic_tests.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/selection/constraints/tests/test_quadratic_tests.py b/selection/constraints/tests/test_quadratic_tests.py
index cea1d987f..1de8a7092 100644
--- a/selection/constraints/tests/test_quadratic_tests.py
+++ b/selection/constraints/tests/test_quadratic_tests.py
@@ -22,9 +22,8 @@
 except ImportError:
     R_available = False
 
-@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=20000)
 @set_seed_iftrue(SET_SEED)
-def test_chisq_central(nsim=None, burnin=8000, ndraw=2000):
+def test_chisq_central(nsim=None, burnin=5000, ndraw=20000):
 
     n, p = 4, 10
     A, b = np.random.standard_normal((n, p)), np.zeros(n)
@@ -48,7 +47,7 @@ def test_chisq_central(nsim=None, burnin=8000, ndraw=2000):
 @dec.skipif(not R_available, "needs rpy2")
 @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10, burnin=10, ndraw=10)
 @set_seed_iftrue(SET_SEED)
-def test_chisq_noncentral(nsim=1000, burnin=2000, ndraw=8000):
+def test_chisq_noncentral(nsim=1000, burnin=2000, ndraw=5000):
 
     mu = np.arange(6)
     ncp = np.linalg.norm(mu[:3])**2

From 91b749844d25afe7ce135efa20ec868e606cc144 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Oct 2017 17:54:51 -0700
Subject: [PATCH 266/617] new glmnet coef syntax

---
 selection/algorithms/tests/test_compareR.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 4d51c59b4..c9b58b611 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -42,7 +42,7 @@ def test_fixed_lambda():
         # extract coef for a given lambda; note the 1/n factor!
         # (and we don't save the intercept term)
         lam = %f
-        beta_hat = coef(gfit, s=lam/n, exact=TRUE)
+        beta_hat = coef(gfit, s=lam/n, exact=TRUE, x=x, y=y)
         beta_hat = beta_hat[-1]
 
         # compute fixed lambda p-values and selection intervals
@@ -211,7 +211,7 @@ def test_coxph():
     # extract coef for a given lambda; note the 1/n factor!
 
     lambda = 1.5
-    beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE))
+    beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE, x=x, y=Surv(tim, status)))
     # compute fixed lambda p-values and selection intervals
     out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox")
     pval = out$pv
@@ -269,7 +269,7 @@ def test_logistic():
     # extract coef for a given lambda; note the 1/n factor!
     # (and here  we DO  include the intercept term)
     lambda = .8
-    beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE))
+    beta_hat = as.numeric(coef(gfit, s=lambda/n, exact=TRUE, x=x, y=y))
 
     # compute fixed lambda p-values and selection intervals
     out = fixedLassoInf(x,y,beta_hat,lambda,family="binomial")

From 816fe25f73d025927d45d9b370a7632353b1dbca Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Thu, 5 Oct 2017 18:29:03 -0700
Subject: [PATCH 267/617] limits inactive marginal were bool

---
 doc/examples/figgaussian.pdf                        | Bin
 doc/examples/figlaplace.pdf                         | Bin
 selection/randomized/M_estimator.py                 |  12 ++++++++----
 .../randomized/tests/test_opt_weighted_intervals.py |   6 +++---
 4 files changed, 11 insertions(+), 7 deletions(-)
 create mode 100644 doc/examples/figgaussian.pdf
 create mode 100644 doc/examples/figlaplace.pdf

diff --git a/doc/examples/figgaussian.pdf b/doc/examples/figgaussian.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/doc/examples/figlaplace.pdf b/doc/examples/figlaplace.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index 987fce162..e70b282b7 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -432,7 +432,7 @@ def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=N
         _inactive_groups = ~(self._active_groups+self._unpenalized)
 
         inactive_marginal_groups = np.zeros_like(self._inactive, dtype=bool)
-        limits_marginal_groups = np.zeros_like(self._inactive)
+        limits_marginal_groups = np.zeros_like(self._inactive, np.float)
 
         for i, g in enumerate(groups):
             if (_inactive_groups[i]) and conditioning_groups[i]:
@@ -483,6 +483,9 @@ def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=N
 
         new_opt_transform = (new_linear, new_offset)
 
+        print("limits marginal groups", limits_marginal_groups)
+        print("inactive marginal groups", inactive_marginal_groups)
+
         def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, inactive_marginal_groups):
             return (np.divide(_pdf(full_state_plus) - _pdf(full_state_minus),
                               _cdf(full_state_plus) - _cdf(full_state_minus)))[inactive_marginal_groups]
@@ -534,14 +537,15 @@ def new_log_density(query,
                                                         opt_state)
             full_state = np.atleast_2d(full_state)
             p = query.penalty.shape[0]
-            logdens = 0
+            logdens = np.zeros(full_state.shape[0])
 
             if inactive_marginal_groups.sum()>0:
                 full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
                 full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
-                logdens += np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:,inactive_marginal_groups].sum()
+                logdens += np.sum(np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:,inactive_marginal_groups], axis=1)
 
             logdens += log_dens(full_state[:,~inactive_marginal_groups])
+
             return np.squeeze(logdens) # should this be negative to match the gradient log density?
 
         new_log_density = functools.partial(new_log_density,
@@ -746,4 +750,4 @@ def subsample_diff(m, n, indices):
         cov = second_moment - np.multiply.outer(first_moment,
                                                 first_moment)
 
-        self.randomization.set_covariance(cov)
+        self.randomization.set_covariance(cov)
\ No newline at end of file
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 114135d94..f01c2cbd3 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -25,13 +25,13 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
 
         inst, const = const_info
 
-        X, Y, beta = inst(n=100, p=20, s=2, signal=5., sigma=5.)[:3]
+        X, Y, beta = inst(n=100, p=20, s=0, signal=5., sigma=5.)[:3]
         n, p = X.shape
 
         W = np.ones(X.shape[1]) * 7
         conv = const(X, Y, W, randomizer=rand, parametric_cov_estimator=True)
         signs = conv.fit()
-        #print("signs", signs)
+        print("signs", signs)
 
         marginalizing_groups = np.ones(p, np.bool)
         #marginalizing_groups[:int(p/2)] = True
@@ -47,7 +47,7 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
                                           ndraw=ndraw,
                                           burnin=burnin,
                                           compute_intervals=True)
-
+        print(sel_pivots)
         results.append((rand, sel_pivots, sel_ci, beta[selected_features]))
 
     return results

From 3ce2c639061ddd923a5243185249d8ba0411c32d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c103.SUNet>
Date: Fri, 6 Oct 2017 11:16:56 -0700
Subject: [PATCH 268/617] test_glm fixed

---
 selection/approx_ci/selection_map.py  |  3 +-
 selection/approx_ci/tests/test_glm.py | 57 ++++++++++-----------------
 2 files changed, 22 insertions(+), 38 deletions(-)

diff --git a/selection/approx_ci/selection_map.py b/selection/approx_ci/selection_map.py
index 750787380..abeb084da 100644
--- a/selection/approx_ci/selection_map.py
+++ b/selection/approx_ci/selection_map.py
@@ -42,7 +42,8 @@ def solve_approx(self):
 
         self.score_target_cov = score_cov[:, :nactive]
         self.target_cov = score_cov[:nactive, :nactive]
-        self.target_observed = self.observed_score_state[:nactive]
+        self.target_observed = self.observed_internal_state[:nactive]
+        self.observed_score_state = self.observed_internal_state
         self.nactive = nactive
 
         self.B_active = self._opt_linear_term[:nactive, :nactive]
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index 30aa93b58..b87d409d1 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -1,40 +1,28 @@
 from __future__ import print_function
-
 import numpy as np
 import sys
 import regreg.api as rr
-
-import selection.tests.reports as reports
-from ...randomized.api import randomization
-from ...tests.instance import logistic_instance, gaussian_instance
-from ...tests.flags import SMALL_SAMPLES, SET_SEED
-from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-
-from ..ci_approx_density import approximate_conditional_density
-from ..estimator_approx import M_estimator_approx
-
-from ...randomized.query import naive_confidence_intervals
-from ...randomized.query import naive_pvalues
-
-
-@register_report(['cover', 'ci_length', 'truth', 'naive_cover', 'naive_pvalues'])
-@wait_for_return_value()
-def test_approximate_ci(n=100,
-                        p=10,
-                        s=3,
-                        snr=5,
-                        rho=0.1,
-                        lam_frac = 1.,
-                        loss='gaussian',
-                        randomizer='gaussian'):
-
-
+from selection.tests.instance import logistic_instance, gaussian_instance
+from selection.approx_ci.selection_map import M_estimator_map
+from selection.approx_ci.ci_approx_density import approximate_conditional_density
+from selection.randomized.query import naive_confidence_intervals
+
+def test_approximate_inference(X,
+                               y,
+                               true_mean,
+                               sigma,
+                               seed_n = 0,
+                               lam_frac = 1.,
+                               loss='gaussian',
+                               randomization_scale = 1.):
+
+    from selection.api import randomization
+    n, p = X.shape
+    np.random.seed(seed_n)
     if loss == "gaussian":
-        X, y, beta = gaussian_instance(n=n, p=p, s=s, rho=rho, snr=snr)[:3]
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
         loss = rr.glm.gaussian(X, y)
     elif loss == "logistic":
-        X, y, beta = logistic_instance(n=n, p=p, s=s, rho=rho, snr=snr)[:]
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.binomial(1, 1. / 2, (n, 10000)))).max(0))
         loss = rr.glm.logistic(X, y)
 
@@ -63,14 +51,7 @@ def test_approximate_ci(n=100,
 
         sys.stderr.write("True target to be covered" + str(true_vec) + "\n")
 
-        class target_class(object):
-            def __init__(self, target_cov):
-                self.target_cov = target_cov
-                self.shape = target_cov.shape
-
-        target = target_class(M_est.target_cov)
-
-        ci_naive = naive_confidence_intervals(target, M_est.target_observed)
+        ci_naive = naive_confidence_intervals(np.diag(M_est.target_cov), M_est.target_observed)
         naive_covered = np.zeros(nactive)
         naive_risk = np.zeros(nactive)
 
@@ -90,6 +71,7 @@ def __init__(self, target_cov):
         sel_risk = np.zeros(nactive)
 
         for j in range(nactive):
+
             sel_risk[j] = (sel_MLE[j] - true_vec[j]) ** 2.
             naive_risk[j] = (M_est.target_observed[j]- true_vec[j]) ** 2.
 
@@ -130,3 +112,4 @@ def test_lasso(n, p, s, signal):
         return(lasso)
 
 
+

From edeceee2edd63a9a0c769624ef5bffcc94c463bf Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c103.SUNet>
Date: Fri, 6 Oct 2017 11:24:49 -0700
Subject: [PATCH 269/617] fixed greedy step test and sel map

---
 selection/approx_ci/selection_map.py          | 2 ++
 selection/approx_ci/tests/test_greedy_step.py | 9 +--------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/selection/approx_ci/selection_map.py b/selection/approx_ci/selection_map.py
index abeb084da..bd5ad50de 100644
--- a/selection/approx_ci/selection_map.py
+++ b/selection/approx_ci/selection_map.py
@@ -117,6 +117,8 @@ def solve_approx(self):
         self.B_active = self._opt_linear_term[:nactive, :nactive]
         self.B_inactive = self._opt_linear_term[nactive:, :nactive]
 
+        self.observed_score_state = self.observed_internal_state
+
     def setup_map(self, j):
         self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
         self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
diff --git a/selection/approx_ci/tests/test_greedy_step.py b/selection/approx_ci/tests/test_greedy_step.py
index 5688dd2d2..64957fc15 100644
--- a/selection/approx_ci/tests/test_greedy_step.py
+++ b/selection/approx_ci/tests/test_greedy_step.py
@@ -58,13 +58,7 @@ def approximate_inference(X,
             ci.solve_approx()
             sys.stderr.write("True target to be covered" + str(true_vec) + "\n")
 
-            class target_class(object):
-                def __init__(self, target_cov):
-                    self.target_cov = target_cov
-                    self.shape = target_cov.shape
-
-            target = target_class(GS.target_cov)
-            ci_naive = naive_confidence_intervals(target, GS.target_observed)
+            ci_naive = naive_confidence_intervals(GS.target_cov, GS.target_observed)
             naive_covered = np.zeros(nactive)
             naive_risk = np.zeros(nactive)
 
@@ -119,4 +113,3 @@ def test_greedy_step(n=50, p=100, s=5, signal=5):
     if greedy_step is not None:
         print("output of selection adjusted inference", greedy_step)
         return(greedy_step)
-

From c0db6a9dacdc0c2575a4c1b07219dd013f7670c5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c103.SUNet>
Date: Fri, 6 Oct 2017 11:52:14 -0700
Subject: [PATCH 270/617] chnaged feasible point to bypass observed_opt_state,
 an empty array now

---
 selection/approx_ci/selection_map.py              | 6 ++++--
 selection/approx_ci/tests/test_threshold_score.py | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/selection/approx_ci/selection_map.py b/selection/approx_ci/selection_map.py
index bd5ad50de..cf4a31bb9 100644
--- a/selection/approx_ci/selection_map.py
+++ b/selection/approx_ci/selection_map.py
@@ -142,9 +142,11 @@ def __init__(self, loss,
     def solve_approx(self):
         self.solve()
         self.setup_sampler()
-        print("boundary", self.observed_opt_state, self.boundary)
-        self.feasible_point = self.observed_opt_state[self.boundary]
+        #print("boundary", self.observed_opt_state, self.boundary)
+        #self.feasible_point = self.observed_opt_state[self.boundary]
+        self.feasible_point = np.ones(self.boundary.sum())
         (_opt_linear_term, _opt_offset) = self.opt_transform
+        print("shapes", _opt_linear_term[self.boundary, :].shape, _opt_linear_term[self.interior, :].shape)
         self._opt_linear_term = np.concatenate((_opt_linear_term[self.boundary, :], _opt_linear_term[self.interior, :]),
                                                0)
         self._opt_affine_term = np.concatenate((_opt_offset[self.boundary], _opt_offset[self.interior]), 0)
diff --git a/selection/approx_ci/tests/test_threshold_score.py b/selection/approx_ci/tests/test_threshold_score.py
index 89cf494b0..03eb68851 100644
--- a/selection/approx_ci/tests/test_threshold_score.py
+++ b/selection/approx_ci/tests/test_threshold_score.py
@@ -12,7 +12,7 @@ def test_approximate_inference(X,
                                y,
                                true_mean,
                                sigma,
-                               threshold = 3.,
+                               threshold = 2.,
                                seed_n = 0,
                                lam_frac = 1.,
                                loss='gaussian',
@@ -122,4 +122,4 @@ def test_threshold(n, p, s, signal):
         print("output of selection adjusted inference", threshold)
         return(threshold)
 
-test_threshold(n=100, p=50, s=0, signal=5.)
\ No newline at end of file
+test_threshold(n=50, p=100, s=0, signal=5.)
\ No newline at end of file

From b1c791540ff20c769ec72886160321f69e014429 Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Fri, 6 Oct 2017 12:03:23 -0700
Subject: [PATCH 271/617] testing sampling when marg

---
 doc/examples/compute_coverages.py              | 18 ++++++++++--------
 doc/examples/conditional_sampling.py           |  9 ++++++---
 .../tests/test_opt_weighted_intervals.py       | 18 +++++++++++-------
 selection/randomized/tests/test_sampling.py    | 16 +++++++++++-----
 4 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/doc/examples/compute_coverages.py b/doc/examples/compute_coverages.py
index eade5e6aa..bf2f51afd 100644
--- a/doc/examples/compute_coverages.py
+++ b/doc/examples/compute_coverages.py
@@ -21,14 +21,16 @@ def main(ndraw=20000, burnin=5000, nsim=50):
     sel_ci_all = list()
     rand_all = []
     for i in range(nsim):
-        for idx, (rand, sel_pivots, sel_ci, true_vec) in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
-            if i==0:
-                sel_pivots_all.append([])
-                rand_all.append(rand)
-                sel_ci_all.append([])
-            sel_pivots_all[idx].append(sel_pivots)
-            print(sel_ci)
-            sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
+        for idx, results in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
+            if results is not None:
+                (rand, sel_pivots, sel_ci, true_vec) = results
+                if i==0:
+                    sel_pivots_all.append([])
+                    rand_all.append(rand)
+                    sel_ci_all.append([])
+                sel_pivots_all[idx].append(sel_pivots)
+                print(sel_ci)
+                sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
 
     xval = np.linspace(0, 1, 200)
 
diff --git a/doc/examples/conditional_sampling.py b/doc/examples/conditional_sampling.py
index 2e9ddd8e5..c8ee0021c 100644
--- a/doc/examples/conditional_sampling.py
+++ b/doc/examples/conditional_sampling.py
@@ -20,6 +20,7 @@ def main(ndraw=50000, burnin=5000, remove_atom=False, unpenalized=True, stepsize
 
         fig_idx += 1
         fig = plt.figure(num=fig_idx, figsize=(8,8))
+
         plt.clf()
         idx = 0
         for i in range(mcmc_opt.shape[1]):
@@ -41,10 +42,11 @@ def main(ndraw=50000, burnin=5000, remove_atom=False, unpenalized=True, stepsize
             idx += 1
             if idx == 1:
                 plt.legend(loc='lower right')
-        
+
+        fig.suptitle(' '.join([rand, "opt"]))
+
         fig_idx += 1
         fig = plt.figure(num=fig_idx, figsize=(8,8))
-
         plt.clf()
         idx = 0
         for i in range(mcmc_opt.shape[1]):
@@ -65,7 +67,8 @@ def main(ndraw=50000, burnin=5000, remove_atom=False, unpenalized=True, stepsize
             idx += 1
             if idx == 1:
                 plt.legend(loc='lower right')
-        
+
+        fig.suptitle(' '.join([rand, "omega"]))
     plt.show()
 
             
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index f01c2cbd3..889cb6d8c 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -28,7 +28,7 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         X, Y, beta = inst(n=100, p=20, s=0, signal=5., sigma=5.)[:3]
         n, p = X.shape
 
-        W = np.ones(X.shape[1]) * 7
+        W = np.ones(X.shape[1]) * 8
         conv = const(X, Y, W, randomizer=rand, parametric_cov_estimator=True)
         signs = conv.fit()
         print("signs", signs)
@@ -37,18 +37,22 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         #marginalizing_groups[:int(p/2)] = True
         conditioning_groups = ~marginalizing_groups
         #conditioning_groups[-int(p/4):] = False
-        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
-                                   conditioning_groups=conditioning_groups)
+        #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+        #                           conditioning_groups=conditioning_groups)
 
         selected_features = conv._view.selection_variable['variables']
-        print("nactive", selected_features.sum())
-        sel_pivots, sel_ci = conv.summary(selected_features,
+        nactive=selected_features.sum()
+        print("nactive", nactive)
+        if nactive==0:
+            results.append(None)
+        else:
+            sel_pivots, sel_ci = conv.summary(selected_features,
                                           null_value=beta[selected_features],
                                           ndraw=ndraw,
                                           burnin=burnin,
                                           compute_intervals=True)
-        print(sel_pivots)
-        results.append((rand, sel_pivots, sel_ci, beta[selected_features]))
+            print(sel_pivots)
+            results.append((rand, sel_pivots, sel_ci, beta[selected_features]))
 
     return results
 
diff --git a/selection/randomized/tests/test_sampling.py b/selection/randomized/tests/test_sampling.py
index a1f44fdf1..0e6a203c8 100644
--- a/selection/randomized/tests/test_sampling.py
+++ b/selection/randomized/tests/test_sampling.py
@@ -75,8 +75,8 @@ def sample_opt_vars(X, y, active, signs, lam, epsilon, randomization, nsamples =
     lower[range(nactive + nunpen, p)] = -lam[inactive_set] - X[:, inactive_set].T.dot(y)
     upper[range(nactive + nunpen, p)] = lam[inactive_set] - X[:, inactive_set].T.dot(y)
 
-    print(lower, 'lower')
-    print(upper, 'upper')
+    #print(lower, 'lower')
+    #print(upper, 'upper')
     omega_samples = sampling_truncated_dist(lower, 
                                             upper, 
                                             randomization, 
@@ -157,7 +157,8 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
                      W, 
                      randomizer=rand, 
                      randomizer_scale=randomizer_scale,
-                     ridge_term=ridge_term)
+                     ridge_term=ridge_term,
+                     parametric_cov_estimator=True)
 
         print(rand)
         if rand == "laplace":
@@ -169,6 +170,8 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
 
         signs = conv.fit()
         print("signs", signs)
+        conv.decompose_subgradient(marginalizing_groups=np.ones(p,np.bool),
+                                   conditioning_groups=np.zeros(p,np.bool))
 
         selected_features = conv._view.selection_variable['variables']
         q = conv._view
@@ -185,11 +188,12 @@ def test_conditional_law(ndraw=20000, burnin=2000, ridge_term=0.5, stepsize=None
                                burnin,
                                stepsize=stepsize)
         print(S.shape)
-        print([np.mean(S[:,i]) for i in range(p)])
+        print([np.mean(S[:,i]) for i in range(S.shape[1])])
         print(selected_features, 'selected')
 
         # let's also reconstruct the omegas to compare
-
+        if (S.shape[1]<p):
+            S = np.concatenate((S, np.zeros((S.shape[0],p-S.shape[1]))), axis=1)
         S_omega = reconstruct_opt(conv._view, S)
 
         opt_samples = sample_opt_vars(X, 
@@ -235,3 +239,5 @@ def reconstruct_opt(query, state):
                                                    state)
 
     return np.squeeze(reconstructed)
+
+

From 4e045f75628634d82f4903d1666b12454bbe691c Mon Sep 17 00:00:00 2001
From: Jelena Markovic <jelenam@stanford.edu>
Date: Fri, 6 Oct 2017 13:05:41 -0700
Subject: [PATCH 272/617] subclassed the group lasso

---
 selection/randomized/M_estimator.py           | 112 ++++++++++--------
 .../tests/test_opt_weighted_intervals.py      |   4 +-
 2 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index e70b282b7..adb738bcf 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -201,6 +201,7 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         X, y = loss.data
         W = self.loss.saturated_loss.hessian(beta_full)
         _Mest_hessian = np.dot(X.T, X[:, overall] * W[overall])
+        self._Mest_hessian = _Mest_hessian
         _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
 
         # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
@@ -284,9 +285,6 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         self.unpenalized_slice = unpenalized_slice
         self.ndim = loss.shape[0]
 
-        #self.Q = ((_hessian + epsilon * np.identity(p))[:,active])[active,:]
-        #self.Qinv = np.linalg.inv(self.Q)
-        #self.form_VQLambda()
         self.nboot = nboot
 
 
@@ -358,51 +356,6 @@ def log_density(query,
 
     sampler = property(get_sampler, query.set_sampler)
 
-    def form_VQLambda(self):
-        nactive_groups = len(self.active_directions_list)
-        nactive_vars = sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
-        V = np.zeros((nactive_vars, nactive_vars-nactive_groups))
-
-        Lambda = np.zeros((nactive_vars,nactive_vars))
-        temp_row, temp_col = 0, 0
-        for g in range(len(self.active_directions_list)):
-            size_curr_group = self.active_directions_list[g].shape[0]
-
-            Lambda[temp_row:(temp_row+size_curr_group),temp_row:(temp_row+size_curr_group)] \
-                = self.active_penalty[g]*np.identity(size_curr_group)
-
-            def null(A, eps=1e-12):
-                u, s, vh = np.linalg.svd(A)
-                padding = max(0, np.shape(A)[1] - np.shape(s)[0])
-                null_mask = np.concatenate(((s <= eps), np.ones((padding,), dtype=bool)), axis=0)
-                null_space = scipy.compress(null_mask, vh, axis=0)
-                return scipy.transpose(null_space)
-
-            V_g = null(matrix(self.active_directions_list[g]))
-            V[temp_row:(temp_row + V_g.shape[0]), temp_col:(temp_col + V_g.shape[1])] = V_g
-            temp_row += V_g.shape[0]
-            temp_col += V_g.shape[1]
-        self.VQLambda = np.dot(np.dot(V.T,self.Qinv), Lambda.dot(V))
-
-        return self.VQLambda
-
-    def derivative_logdet_jacobian(self, scalings):
-        nactive_groups = len(self.active_directions_list)
-        nactive_vars = np.sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
-        from scipy.linalg import block_diag
-        matrix_list = [scalings[i]*np.identity(self.active_directions_list[i].shape[0]-1) for i in range(scalings.shape[0])]
-        Gamma_minus = block_diag(*matrix_list)
-        jacobian_inv = np.linalg.inv(Gamma_minus+self.VQLambda)
-
-        group_sizes = [self._active_directions[i].shape[0] for i in range(nactive_groups)]
-        group_sizes_cumsum = np.concatenate(([0], np.array(group_sizes).cumsum()))
-
-        jacobian_inv_blocks = [jacobian_inv[group_sizes_cumsum[i]:group_sizes_cumsum[i+1],group_sizes_cumsum[i]:group_sizes_cumsum[i+1]]
-                                for i in range(nactive_groups)]
-
-        der = np.zeros(self.observed_opt_state.shape[0])
-        der[self.scaling_slice] = np.array([np.matrix.trace(jacobian_inv_blocks[i]) for i in range(scalings.shape[0])])
-        return der
 
     def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=None):
         """
@@ -750,4 +703,65 @@ def subsample_diff(m, n, indices):
         cov = second_moment - np.multiply.outer(first_moment,
                                                 first_moment)
 
-        self.randomization.set_covariance(cov)
\ No newline at end of file
+        self.randomization.set_covariance(cov)
+
+
+
+class M_estimator_group_lasso(M_estimator):
+
+    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
+
+        M_estimator.__init__(self, loss, epsilon, penalty, randomization, solve_args=solve_args)
+
+        self.Q = self._Mest_hessian[self._overall,:] + epsilon * np.identity(self._overall.sum())
+        self.Qinv = np.linalg.inv(self.Q)
+        self.form_VQLambda()
+
+    def form_VQLambda(self):
+        nactive_groups = len(self.active_directions_list)
+        nactive_vars = sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
+        V = np.zeros((nactive_vars, nactive_vars - nactive_groups))
+
+        Lambda = np.zeros((nactive_vars, nactive_vars))
+        temp_row, temp_col = 0, 0
+        for g in range(len(self.active_directions_list)):
+            size_curr_group = self.active_directions_list[g].shape[0]
+
+            Lambda[temp_row:(temp_row + size_curr_group), temp_row:(temp_row + size_curr_group)] \
+                = self.active_penalty[g] * np.identity(size_curr_group)
+
+            def null(A, eps=1e-12):
+                u, s, vh = np.linalg.svd(A)
+                padding = max(0, np.shape(A)[1] - np.shape(s)[0])
+                null_mask = np.concatenate(((s <= eps), np.ones((padding,), dtype=bool)), axis=0)
+                null_space = scipy.compress(null_mask, vh, axis=0)
+                return scipy.transpose(null_space)
+
+            V_g = null(matrix(self.active_directions_list[g]))
+            V[temp_row:(temp_row + V_g.shape[0]), temp_col:(temp_col + V_g.shape[1])] = V_g
+            temp_row += V_g.shape[0]
+            temp_col += V_g.shape[1]
+        self.VQLambda = np.dot(np.dot(V.T, self.Qinv), Lambda.dot(V))
+
+        return self.VQLambda
+
+    def derivative_logdet_jacobian(self, scalings):
+        nactive_groups = len(self.active_directions_list)
+        nactive_vars = np.sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
+        from scipy.linalg import block_diag
+        matrix_list = [scalings[i] * np.identity(self.active_directions_list[i].shape[0] - 1) for i in
+                       range(scalings.shape[0])]
+        Gamma_minus = block_diag(*matrix_list)
+        jacobian_inv = np.linalg.inv(Gamma_minus + self.VQLambda)
+
+        group_sizes = [self._active_directions[i].shape[0] for i in range(nactive_groups)]
+        group_sizes_cumsum = np.concatenate(([0], np.array(group_sizes).cumsum()))
+
+        jacobian_inv_blocks = [
+            jacobian_inv[group_sizes_cumsum[i]:group_sizes_cumsum[i + 1],
+            group_sizes_cumsum[i]:group_sizes_cumsum[i + 1]]
+            for i in range(nactive_groups)]
+
+        der = np.zeros(self.observed_opt_state.shape[0])
+        der[self.scaling_slice] = np.array([np.matrix.trace(jacobian_inv_blocks[i]) for i in range(scalings.shape[0])])
+        return der
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 889cb6d8c..a6945faeb 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -37,8 +37,8 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         #marginalizing_groups[:int(p/2)] = True
         conditioning_groups = ~marginalizing_groups
         #conditioning_groups[-int(p/4):] = False
-        #conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
-        #                           conditioning_groups=conditioning_groups)
+        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups,
+                                   conditioning_groups=conditioning_groups)
 
         selected_features = conv._view.selection_variable['variables']
         nactive=selected_features.sum()

From 56c87948943e1ca3d8590e292091605b250264f0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 12 Oct 2017 10:20:48 -0700
Subject: [PATCH 273/617] deleting unnecessary files

---
 doc/__init__.py              |   0
 doc/examples/__init__.py     |   0
 doc/examples/figgaussian.pdf | Bin
 doc/examples/figlaplace.pdf  | Bin
 4 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 doc/__init__.py
 delete mode 100644 doc/examples/__init__.py
 delete mode 100644 doc/examples/figgaussian.pdf
 delete mode 100644 doc/examples/figlaplace.pdf

diff --git a/doc/__init__.py b/doc/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/doc/examples/__init__.py b/doc/examples/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/doc/examples/figgaussian.pdf b/doc/examples/figgaussian.pdf
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/doc/examples/figlaplace.pdf b/doc/examples/figlaplace.pdf
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000

From a8e6838ad294fdd735eefda044ce72940217095e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 12 Oct 2017 10:34:46 -0700
Subject: [PATCH 274/617] coverage example as rst

---
 doc/examples/compute_coverages.py  |  49 --------------
 doc/examples/compute_coverages.rst | 100 +++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 49 deletions(-)
 delete mode 100644 doc/examples/compute_coverages.py
 create mode 100644 doc/examples/compute_coverages.rst

diff --git a/doc/examples/compute_coverages.py b/doc/examples/compute_coverages.py
deleted file mode 100644
index bf2f51afd..000000000
--- a/doc/examples/compute_coverages.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import numpy as np
-import matplotlib.pyplot as plt
-
-from statsmodels.distributions import ECDF
-from selection.randomized.tests.test_opt_weighted_intervals import test_opt_weighted_intervals
-
-
-def compute_coverage(sel_ci, true_vec):
-    nactive = true_vec.shape[0]
-    coverage = np.zeros(nactive)
-    for i in range(nactive):
-        if true_vec[i]>=sel_ci[i,0] and true_vec[i]<=sel_ci[i,1]:
-            coverage[i]=1
-    return coverage
-
-
-def main(ndraw=20000, burnin=5000, nsim=50):
-    np.random.seed(1)
-
-    sel_pivots_all = list()
-    sel_ci_all = list()
-    rand_all = []
-    for i in range(nsim):
-        for idx, results in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
-            if results is not None:
-                (rand, sel_pivots, sel_ci, true_vec) = results
-                if i==0:
-                    sel_pivots_all.append([])
-                    rand_all.append(rand)
-                    sel_ci_all.append([])
-                sel_pivots_all[idx].append(sel_pivots)
-                print(sel_ci)
-                sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
-
-    xval = np.linspace(0, 1, 200)
-
-    for idx in range(len(rand_all)):
-        fig = plt.figure(num=idx, figsize=(8,8))
-        plt.clf()
-        sel_pivots_all[idx] = [item for sublist in sel_pivots_all[idx] for item in sublist]
-        plt.plot(xval, ECDF(sel_pivots_all[idx])(xval), label='selective')
-        plt.plot(xval, xval, 'k-', lw=1)
-        plt.legend(loc='lower right')
-
-        sel_ci_all[idx] = [item for sublist in sel_ci_all[idx] for item in sublist]
-        print(sel_ci_all)
-        plt.title(''.join(["coverage ", str(np.mean(sel_ci_all[idx]))]))
-        plt.savefig(''.join(["fig", rand_all[idx], '.pdf']))
-
diff --git a/doc/examples/compute_coverages.rst b/doc/examples/compute_coverages.rst
new file mode 100644
index 000000000..ebf2d366d
--- /dev/null
+++ b/doc/examples/compute_coverages.rst
@@ -0,0 +1,100 @@
+Coverage of randomized LASSO intervals
+--------------------------------------
+
+In this example, we demonstrate how to compute confidence intervals
+for a randomized LASSO example, as well as demonstrating
+that the selective pivots are uniformly distributed.
+
+.. nbplot::
+
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from statsmodels.distributions import ECDF
+
+.. mpl-interactive
+
+First, we define a function that will fit a randomized LASSO and
+return both the pivotal quantites and confidence intervals.
+
+.. nbplot::
+
+    from selection.tests.instance import gaussian_instance
+    from selection.randomized.convenience import lasso
+
+    def fit_randomized_LASSO(ndraw=20000, burnin=2000):
+
+    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
+
+        X, Y, beta, _, _ = gaussian_instance(n=100, p=20, s=3, sigma=5.)
+        n, p = X.shape
+        W = np.ones(X.shape[1]) * 8
+        L = lasso.gaussian(X, Y, W, randomizer='gaussian', parametric_cov_estimator=True)
+
+        # the active set and signs of the LASSO fit
+        signs = conv.fit()
+
+        # for computational efficiency, we marginalize over 
+        # inactive coordinates when possible
+
+        marginalizing_groups = np.ones(p, np.bool)
+        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups)
+
+        selected_features = conv._view.selection_variable['variables']
+        nactive = selected_features.sum()
+
+        if nactive==0:
+            return None
+        else:
+            sel_pivots, sel_ci = L.summary(selected_features,
+                                           null_value=beta[selected_features],
+                                           ndraw=10000,
+                                           burnin=2000,
+                                           compute_intervals=True)
+            return sel_pivots, sel_ci, beta[selected_features]
+
+Let's do a test run
+
+.. nbplot::
+
+    def compute_coverage(sel_ci, true_vec):
+        nactive = true_vec.shape[0]
+        coverage = np.zeros(nactive)
+        for i in range(nactive):
+            if true_vec[i]>=sel_ci[i,0] and true_vec[i]<=sel_ci[i,1]:
+                coverage[i]=1
+        return coverage
+
+
+    def main(ndraw=20000, burnin=5000, nsim=50):
+        np.random.seed(1)
+
+        sel_pivots_all = list()
+        sel_ci_all = list()
+        rand_all = []
+        for i in range(nsim):
+            for idx, results in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
+                if results is not None:
+                    (rand, sel_pivots, sel_ci, true_vec) = results
+                    if i==0:
+                        sel_pivots_all.append([])
+                        rand_all.append(rand)
+                        sel_ci_all.append([])
+                    sel_pivots_all[idx].append(sel_pivots)
+                    print(sel_ci)
+                    sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
+
+        xval = np.linspace(0, 1, 200)
+
+        for idx in range(len(rand_all)):
+            fig = plt.figure(num=idx, figsize=(8,8))
+            plt.clf()
+            sel_pivots_all[idx] = [item for sublist in sel_pivots_all[idx] for item in sublist]
+            plt.plot(xval, ECDF(sel_pivots_all[idx])(xval), label='selective')
+            plt.plot(xval, xval, 'k-', lw=1)
+            plt.legend(loc='lower right')
+
+            sel_ci_all[idx] = [item for sublist in sel_ci_all[idx] for item in sublist]
+            print(sel_ci_all)
+            plt.title(''.join(["coverage ", str(np.mean(sel_ci_all[idx]))]))
+            plt.savefig(''.join(["fig", rand_all[idx], '.pdf']))
+

From 9bc50a4110ad9ec556257f371091cdbdb1c7b5d2 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 12 Oct 2017 12:55:37 -0700
Subject: [PATCH 275/617] RsT file for coverage

---
 doc/examples/compute_coverages.rst | 179 +++++++++++++++++------------
 1 file changed, 103 insertions(+), 76 deletions(-)

diff --git a/doc/examples/compute_coverages.rst b/doc/examples/compute_coverages.rst
index ebf2d366d..6e60b60bb 100644
--- a/doc/examples/compute_coverages.rst
+++ b/doc/examples/compute_coverages.rst
@@ -1,100 +1,127 @@
+
 Coverage of randomized LASSO intervals
 --------------------------------------
 
-In this example, we demonstrate how to compute confidence intervals
-for a randomized LASSO example, as well as demonstrating
-that the selective pivots are uniformly distributed.
+In this example, we demonstrate how to compute confidence intervals for
+a randomized LASSO example, as well as demonstrating that the selective
+pivots are uniformly distributed.
+
+.. nbplot::
+
+    >>> import numpy as np
+    >>> import matplotlib.pyplot as plt
+    >>> from statsmodels.distributions import ECDF
+
+.. raw:: html
+
+   <!-- mpl-interactive -->
+
+First, we define a function that will fit a randomized LASSO and return
+both the pivotal quantites and confidence intervals. The design matrix
+is equicorrelated with parameter :math:`\rho=0.2`.
 
 .. nbplot::
 
-    import numpy as np
-    import matplotlib.pyplot as plt
-    from statsmodels.distributions import ECDF
+    >>> from selection.tests.instance import gaussian_instance
+    >>> from selection.randomized.convenience import lasso
+    >>>
+    >>> def fit_randomized_LASSO(ndraw=10000, burnin=2000, marginalize=False):
+    ...
+    ...     X, Y, beta, true_active, _ = gaussian_instance(n=100, p=20, s=3, sigma=5., signal=5)
+    ...     n, p = X.shape
+    ...     W = np.ones(X.shape[1]) * 30
+    ...     L = lasso.gaussian(X, Y, W, randomizer='gaussian', parametric_cov_estimator=True)
+    ...
+    ...     # the active set and signs of the LASSO fit
+    ...     signs = L.fit()
+    ...
+    ...     # for computational efficiency, we can 
+    ...     # marginalize over inactive coordinates 
+    ...
+    ...     if marginalize:
+    ...         marginalizing_groups = np.ones(p, np.bool)
+    ...         L.decompose_subgradient(marginalizing_groups=marginalizing_groups)
+    ...
+    ...     selected_features = signs != 0
+    ...     nactive = selected_features.sum()
+    ...
+    ...     if set(np.nonzero(selected_features)[0]).issuperset(true_active):
+    ...         sel_pivots, sel_pval, sel_ci = L.summary(selected_features,
+    ...                                                  parameter=beta[selected_features],
+    ...                                                  ndraw=ndraw,
+    ...                                                  burnin=burnin,
+    ...                                                  compute_intervals=True)
+    ...
+    ...         return sel_pivots, sel_pval, sel_ci, beta[selected_features]
+
+Let’s do a test run
 
-.. mpl-interactive
+.. nbplot::
 
-First, we define a function that will fit a randomized LASSO and
-return both the pivotal quantites and confidence intervals.
+    >>> fit_randomized_LASSO()
+    (array([ 0.43548428,  0.03278839,  0.00481199]),
+     array([ 0.        ,  0.        ,  0.97660498]),
+     array([[ 18.97524697,  40.49266138],
+            [ 28.08291483,  48.76959338],
+            [-12.15053136,  14.24711888]]),
+     array([ 25.,  25.,  25.]))
 
 .. nbplot::
 
-    from selection.tests.instance import gaussian_instance
-    from selection.randomized.convenience import lasso
+    >>> def compute_coverage(sel_ci, truth):
+    ...     coverage = (sel_ci[:,0] <= truth) * (sel_ci[:,1] >= truth)
+    ...     return coverage
 
-    def fit_randomized_LASSO(ndraw=20000, burnin=2000):
+.. nbplot::
 
-    for const_info, rand in product(zip([gaussian_instance], [cls.gaussian]), ['laplace', 'gaussian']):
+    >>> def main(ndraw=10000, burnin=2000, nsim=50):
+    ...     np.random.seed(1)
+    ...
+    ...     sel_pivots_all = []
+    ...     P0 = []
+    ...     PA = []
+    ...     sel_coverage = []
+    ...
+    ...     for i in range(nsim):
+    ...         results = fit_randomized_LASSO(ndraw=ndraw, burnin=burnin)
+    ...         if results is not None:
+    ...             sel_pivots, sel_pval, sel_ci, truth = results
+    ...             P0.extend(sel_pval[truth == 0])
+    ...             PA.extend(sel_pval[truth != 0])
+    ...             sel_pivots_all.extend(sel_pivots)
+    ...             sel_coverage.extend(compute_coverage(sel_ci, truth))
+    ...
+    ...     return sel_pivots_all, sel_coverage, P0, PA
+
+Make a plot
+~~~~~~~~~~~
 
-        X, Y, beta, _, _ = gaussian_instance(n=100, p=20, s=3, sigma=5.)
-        n, p = X.shape
-        W = np.ones(X.shape[1]) * 8
-        L = lasso.gaussian(X, Y, W, randomizer='gaussian', parametric_cov_estimator=True)
+.. nbplot::
 
-        # the active set and signs of the LASSO fit
-        signs = conv.fit()
+    >>> sel_pivots_all, sel_coverage, P0, PA = main(nsim=30)
+    >>> xval = np.linspace(0, 1, 200)
 
-        # for computational efficiency, we marginalize over 
-        # inactive coordinates when possible
+.. mpl-interactive::
 
-        marginalizing_groups = np.ones(p, np.bool)
-        conv.decompose_subgradient(marginalizing_groups=marginalizing_groups)
+.. nbplot::
 
-        selected_features = conv._view.selection_variable['variables']
-        nactive = selected_features.sum()
+    >>> fig = plt.figure(figsize=(8,8))
+    >>> plt.plot(xval, ECDF(sel_pivots_all)(xval), label='Pivot')
+    >>> plt.plot(xval, ECDF(P0)(xval), label='H0')
+    >>> plt.plot(xval, ECDF(PA)(xval), label='HA')
+    >>>
+    >>> plt.plot(xval, xval, 'k-', lw=1)
+    >>> plt.legend(loc='lower right')
+    <...>
 
-        if nactive==0:
-            return None
-        else:
-            sel_pivots, sel_ci = L.summary(selected_features,
-                                           null_value=beta[selected_features],
-                                           ndraw=10000,
-                                           burnin=2000,
-                                           compute_intervals=True)
-            return sel_pivots, sel_ci, beta[selected_features]
 
-Let's do a test run
+
+What does our coverage look like?
 
 .. nbplot::
 
-    def compute_coverage(sel_ci, true_vec):
-        nactive = true_vec.shape[0]
-        coverage = np.zeros(nactive)
-        for i in range(nactive):
-            if true_vec[i]>=sel_ci[i,0] and true_vec[i]<=sel_ci[i,1]:
-                coverage[i]=1
-        return coverage
-
-
-    def main(ndraw=20000, burnin=5000, nsim=50):
-        np.random.seed(1)
-
-        sel_pivots_all = list()
-        sel_ci_all = list()
-        rand_all = []
-        for i in range(nsim):
-            for idx, results in enumerate(test_opt_weighted_intervals(ndraw=ndraw, burnin=burnin)):
-                if results is not None:
-                    (rand, sel_pivots, sel_ci, true_vec) = results
-                    if i==0:
-                        sel_pivots_all.append([])
-                        rand_all.append(rand)
-                        sel_ci_all.append([])
-                    sel_pivots_all[idx].append(sel_pivots)
-                    print(sel_ci)
-                    sel_ci_all[idx].append(compute_coverage(sel_ci, true_vec))
-
-        xval = np.linspace(0, 1, 200)
-
-        for idx in range(len(rand_all)):
-            fig = plt.figure(num=idx, figsize=(8,8))
-            plt.clf()
-            sel_pivots_all[idx] = [item for sublist in sel_pivots_all[idx] for item in sublist]
-            plt.plot(xval, ECDF(sel_pivots_all[idx])(xval), label='selective')
-            plt.plot(xval, xval, 'k-', lw=1)
-            plt.legend(loc='lower right')
-
-            sel_ci_all[idx] = [item for sublist in sel_ci_all[idx] for item in sublist]
-            print(sel_ci_all)
-            plt.title(''.join(["coverage ", str(np.mean(sel_ci_all[idx]))]))
-            plt.savefig(''.join(["fig", rand_all[idx], '.pdf']))
+    >>> print(np.mean(sel_coverage))
+
+    0.876033057851
+
 

From a380ebf023fdb22da2d7197b9c730ff673bfff93 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 17 Oct 2017 11:27:59 -0700
Subject: [PATCH 276/617] argument rename

---
 selection/randomized/convenience.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index ec5e7690c..5b7fcd06d 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -123,6 +123,7 @@ def fit(self, solve_args={'tol':1.e-12, 'min_its':50},
         self._queries.solve()
    
         self.signs = np.sign(self._view.initial_soln)
+        self.selection_variable = self._view.selection_variable
         return self.signs
 
     def decompose_subgradient(self,
@@ -156,7 +157,7 @@ def decompose_subgradient(self,
 
     def summary(self,
                 selected_features,
-                null_value=None,
+                parameter=None,
                 level=0.9,
                 ndraw=10000, 
                 burnin=2000,
@@ -173,8 +174,8 @@ def summary(self,
             Binary encoding of which features to use in final
             model and targets.
 
-        null_value : np.array
-            Hypothesized value for null -- defaults to 0.
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
 
         level : float
             Confidence level.
@@ -192,8 +193,8 @@ def summary(self,
         if not hasattr(self, "_queries"):
             raise ValueError('run `fit` method before producing summary.')
 
-        if null_value is None:
-            null_value = np.zeros(self.loglike.shape[0])
+        if parameter is None:
+            parameter = np.zeros(self.loglike.shape[0])
 
         unpenalized_mle = restricted_Mest(self.loglike, selected_features)
 
@@ -224,12 +225,17 @@ def summary(self,
 
         ### TODO -- this only uses one view -- what about other queries?
 
-        pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=null_value, sample=opt_samples[0])
+        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter, sample=opt_samples[0])
+        if not np.all(parameter == 0):
+            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=np.zeros_like(parameter), sample=opt_samples[0])
+        else:
+            pvalues = pivots
+
         intervals = None
         if compute_intervals:
             intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_samples[0])
 
-        return pvalues, intervals
+        return pivots, pvalues, intervals
 
     @staticmethod
     def gaussian(X, 

From cb445a37b29a399de6ee78365d9dfdc7477eb6c5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 18 Oct 2017 17:48:08 -0700
Subject: [PATCH 277/617] fixing tests

---
 selection/approx_ci/ci_approx_density.py      |  8 +--
 selection/approx_ci/ci_approx_greedy_step.py  | 11 +---
 selection/approx_ci/tests/test_glm.py         | 60 +++++++++----------
 selection/randomized/M_estimator.py           |  4 +-
 .../tests/test_opt_weighted_intervals.py      |  2 +-
 5 files changed, 37 insertions(+), 48 deletions(-)

diff --git a/selection/approx_ci/ci_approx_density.py b/selection/approx_ci/ci_approx_density.py
index 14d467b7b..ab6818f95 100644
--- a/selection/approx_ci/ci_approx_density.py
+++ b/selection/approx_ci/ci_approx_density.py
@@ -1,6 +1,5 @@
 from __future__ import print_function
 from math import log
-import sys
 from scipy.stats import norm as normal
 
 import numpy as np
@@ -355,7 +354,6 @@ def solve_approx(self):
         #defining the grid on which marginal conditional densities will be evaluated
         self.grid_length = 241
 
-        #print("observed values", self.target_observed)
         self.ind_obs = np.zeros(self.nactive, int)
         self.norm = np.zeros(self.nactive)
         self.h_approx = np.zeros((self.nactive, self.grid_length))
@@ -374,7 +372,6 @@ def solve_approx(self):
             else:
                 self.ind_obs[j] = np.argmin(np.abs(self.grid[j,:]-obs))
 
-            sys.stderr.write("number of variable being computed: " + str(j) + "\n")
             self.h_approx[j, :] = self.approx_conditional_prob(j)
 
     def approx_conditional_prob(self, j):
@@ -393,9 +390,6 @@ def approx_conditional_prob(self, j):
             elif val == -float('Inf') and i > 0:
                 h_hat.append(h_hat[i - 1])
 
-            #sys.stderr.write("point on grid: " + str(i) + "\n")
-            #sys.stderr.write("value on grid: " + str(h_hat[i]) + "\n")
-
         return np.array(h_hat)
 
     def area_normalized_density(self, j, mean):
@@ -494,4 +488,4 @@ def approximate_pvalue(self, j, param):
         area_vec = self.area_normalized_density(j, param)[0]
         area = area_vec[self.ind_obs[j]]
 
-        return 2*min(area, 1.-area)
\ No newline at end of file
+        return 2*min(area, 1.-area)
diff --git a/selection/approx_ci/ci_approx_greedy_step.py b/selection/approx_ci/ci_approx_greedy_step.py
index d34fab7c0..96abc3184 100644
--- a/selection/approx_ci/ci_approx_greedy_step.py
+++ b/selection/approx_ci/ci_approx_greedy_step.py
@@ -1,6 +1,5 @@
-from math import log
 import numpy as np
-import sys
+
 import regreg.api as rr
 from scipy.stats import norm
 
@@ -356,7 +355,6 @@ def solve_approx(self):
             else:
                 self.ind_obs[j] = np.argmin(np.abs(self.grid[j, :] - obs))
 
-            sys.stderr.write("number of variable being computed: " + str(j) + "\n")
             self.h_approx[j, :] = self.approx_conditional_prob(j)
 
     def approx_conditional_prob(self, j):
@@ -375,9 +373,6 @@ def approx_conditional_prob(self, j):
             elif val == -float('Inf') and i > 0:
                 h_hat.append(h_hat[i - 1])
 
-            sys.stderr.write("point on grid: " + str(i) + "\n")
-            sys.stderr.write("value on grid: " + str(h_hat[i]) + "\n")
-
         return np.array(h_hat)
 
     def area_normalized_density(self, j, mean):
@@ -402,7 +397,7 @@ def smooth_objective_MLE(self, param, j, mode='both', check_feasibility=False):
         approx_normalizer = self.area_normalized_density(j, param)
 
         f = (param ** 2) / (2 * self.norm[j]) - (self.target_observed[j] * param) / self.norm[j] + \
-            log(approx_normalizer[1])
+            np.log(approx_normalizer[1])
 
         g = param / self.norm[j] - self.target_observed[j] / self.norm[j] + \
             approx_normalizer[2] / approx_normalizer[1]
@@ -476,4 +471,4 @@ def approximate_pvalue(self, j, param):
         area_vec = self.area_normalized_density(j, param)[0]
         area = area_vec[self.ind_obs[j]]
 
-        return 2*min(area, 1.-area)
\ No newline at end of file
+        return 2*min(area, 1.-area)
diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index b87d409d1..61d9800b5 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -1,22 +1,22 @@
 from __future__ import print_function
 import numpy as np
-import sys
+
 import regreg.api as rr
-from selection.tests.instance import logistic_instance, gaussian_instance
-from selection.approx_ci.selection_map import M_estimator_map
-from selection.approx_ci.ci_approx_density import approximate_conditional_density
-from selection.randomized.query import naive_confidence_intervals
-
-def test_approximate_inference(X,
-                               y,
-                               true_mean,
-                               sigma,
-                               seed_n = 0,
-                               lam_frac = 1.,
-                               loss='gaussian',
-                               randomization_scale = 1.):
-
-    from selection.api import randomization
+from ...tests.instance import logistic_instance, gaussian_instance
+from ..selection_map import M_estimator_map
+from ..ci_approx_density import approximate_conditional_density
+from ...randomized.query import naive_confidence_intervals
+from ...randomized.api import randomization
+
+def approximate_inference(X,
+                          y,
+                          true_mean,
+                          sigma,
+                          seed_n = 0,
+                          lam_frac = 1.,
+                          loss='gaussian',
+                          randomization_scale = 1.):
+
     n, p = X.shape
     np.random.seed(seed_n)
     if loss == "gaussian":
@@ -32,16 +32,16 @@ def test_approximate_inference(X,
     penalty = rr.group_lasso(np.arange(p),
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-    randomization = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-    M_est = M_estimator_map(loss, epsilon, penalty, randomization, randomization_scale = randomization_scale)
+    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale = randomization_scale)
 
     M_est.solve_approx()
     active = M_est._overall
     active_set = np.asarray([i for i in range(p) if active[i]])
     nactive = np.sum(active)
-    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
-    sys.stderr.write("Active set selected by lasso" + str(active_set) + "\n")
-    sys.stderr.write("Observed target" + str(M_est.target_observed) + "\n")
+    print("number of active selected by lasso" + str(nactive) + "\n")
+    print("Active set selected by lasso" + str(active_set) + "\n")
+    print("Observed target" + str(M_est.target_observed) + "\n")
 
     if nactive == 0:
         return None
@@ -49,7 +49,7 @@ def test_approximate_inference(X,
     else:
         true_vec = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
 
-        sys.stderr.write("True target to be covered" + str(true_vec) + "\n")
+        print("True target to be covered" + str(true_vec) + "\n")
 
         ci_naive = naive_confidence_intervals(np.diag(M_est.target_cov), M_est.target_observed)
         naive_covered = np.zeros(nactive)
@@ -96,16 +96,16 @@ def test_approximate_inference(X,
                                        naive_risk)))
 
 
-def test_lasso(n, p, s, signal):
+def test_lasso(n=200, p=5, s=1, signal=5):
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     true_mean = X.dot(beta)
-    lasso = test_approximate_inference(X,
-                                       y,
-                                       true_mean,
-                                       sigma,
-                                       seed_n=0,
-                                       lam_frac=1.,
-                                       loss='gaussian')
+    lasso = approximate_inference(X,
+                                  y,
+                                  true_mean,
+                                  sigma,
+                                  seed_n=0,
+                                  lam_frac=1.,
+                                  loss='gaussian')
 
     if lasso is not None:
         print("output of selection adjusted inference", lasso)
diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index e1fae0513..e4c3dba86 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -199,8 +199,8 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         Mest_slice = slice(0, overall.sum())
         # _Mest_hessian = _hessian[:,overall]
         X, y = loss.data
-        W = self.loss.saturated_loss.hessian(beta_full)
-        _Mest_hessian = np.dot(X.T, X[:, overall] * W[overall])
+        W = self.loss.saturated_loss.hessian(X.dot(beta_full))
+        _Mest_hessian = np.dot(X.T, X[:, overall] * W[:, None])
         self._Mest_hessian = _Mest_hessian
         _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
 
diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index a6945faeb..57a74e936 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -47,7 +47,7 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
             results.append(None)
         else:
             sel_pivots, sel_ci = conv.summary(selected_features,
-                                          null_value=beta[selected_features],
+                                          parameter=beta[selected_features],
                                           ndraw=ndraw,
                                           burnin=burnin,
                                           compute_intervals=True)

From 85983e9856438e21b5aa0544723b1683d792d1e6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 20 Oct 2017 14:19:15 -0700
Subject: [PATCH 278/617] fix to threshold score opt map

---
 selection/approx_ci/selection_map.py    |  4 +++-
 selection/randomized/threshold_score.py | 13 ++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/selection/approx_ci/selection_map.py b/selection/approx_ci/selection_map.py
index cf4a31bb9..f0d0b1c63 100644
--- a/selection/approx_ci/selection_map.py
+++ b/selection/approx_ci/selection_map.py
@@ -144,6 +144,8 @@ def solve_approx(self):
         self.setup_sampler()
         #print("boundary", self.observed_opt_state, self.boundary)
         #self.feasible_point = self.observed_opt_state[self.boundary]
+        self.observed_score_state = self.observed_internal_state
+
         self.feasible_point = np.ones(self.boundary.sum())
         (_opt_linear_term, _opt_offset) = self.opt_transform
         print("shapes", _opt_linear_term[self.boundary, :].shape, _opt_linear_term[self.interior, :].shape)
@@ -184,4 +186,4 @@ def setup_map(self, j):
         self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
 
         self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
-        self.offset_inactive = self.null_statistic[self.nactive:]
\ No newline at end of file
+        self.offset_inactive = self.null_statistic[self.nactive:]
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index de6cac4f8..1ea3e09aa 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -120,7 +120,9 @@ def solve(self, nboot=2000):
 
         self.observed_internal_state = candidate_score
 
-        self.selection_variable = {'boundary_set': self.boundary}
+        active_signs = np.sign(randomized_score[self.boundary])
+        self.selection_variable = {'boundary_set': self.boundary,
+                                   'active_signs': active_signs}
 
         self._solved = True
 
@@ -131,8 +133,13 @@ def solve(self, nboot=2000):
 
         p = self.boundary.shape[0]  # shorthand
         self.num_opt_var = 0
-        self.opt_transform = (np.array([], np.float), np.zeros(p, np.float))
-        self.observed_opt_state = np.array([])
+        opt_transform = np.identity(p)
+        opt_transform = np.vstack([opt_transform[self.boundary], opt_transform[self.interior]])
+        opt_offset = np.hstack([active_signs * threshold[self.boundary], 
+                                np.zeros(self.interior.sum())])
+        self.opt_transform = (opt_transform, opt_offset)
+        self.observed_opt_state = np.hstack([active_signs * threshold[self.boundary], 
+                                             randomized_score[self.interior]])
         _score_linear_term = -np.identity(p)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 

From b3d26e4126ad53d012a3b4eac5f5b6042ad247f1 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Fri, 20 Oct 2017 19:32:41 -0700
Subject: [PATCH 279/617] corrected threshold score test

---
 selection/approx_ci/tests/test_threshold_score.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/selection/approx_ci/tests/test_threshold_score.py b/selection/approx_ci/tests/test_threshold_score.py
index 03eb68851..c10df8673 100644
--- a/selection/approx_ci/tests/test_threshold_score.py
+++ b/selection/approx_ci/tests/test_threshold_score.py
@@ -55,14 +55,7 @@ def test_approximate_inference(X,
 
         sys.stderr.write("True target to be covered" + str(true_vec) + "\n")
 
-        class target_class(object):
-            def __init__(self, target_cov):
-                self.target_cov = target_cov
-                self.shape = target_cov.shape
-
-        target = target_class(TS.target_cov)
-
-        ci_naive = naive_confidence_intervals(target, TS.target_observed)
+        ci_naive = naive_confidence_intervals(np.diag(TS.target_cov), TS.target_observed)
         naive_covered = np.zeros(nactive)
         naive_risk = np.zeros(nactive)
 
@@ -122,4 +115,4 @@ def test_threshold(n, p, s, signal):
         print("output of selection adjusted inference", threshold)
         return(threshold)
 
-test_threshold(n=50, p=100, s=0, signal=5.)
\ No newline at end of file
+test_threshold(n=50, p=50, s=0, signal=5.)
\ No newline at end of file

From 6ba11cd45a8ece4e0d94f7c9894bf11b756a1e20 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 08:20:44 -0700
Subject: [PATCH 280/617] smaller dimension for approx_ci test

---
 selection/approx_ci/tests/test_glm.py         |  3 +-
 .../approx_ci/tests/test_threshold_score.py   | 38 +++++++++----------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/selection/approx_ci/tests/test_glm.py b/selection/approx_ci/tests/test_glm.py
index 61d9800b5..a03dfed46 100644
--- a/selection/approx_ci/tests/test_glm.py
+++ b/selection/approx_ci/tests/test_glm.py
@@ -96,7 +96,8 @@ def approximate_inference(X,
                                        naive_risk)))
 
 
-def test_lasso(n=200, p=5, s=1, signal=5):
+def test_lasso(n=20, p=5, s=1, signal=5):
+
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     true_mean = X.dot(beta)
     lasso = approximate_inference(X,
diff --git a/selection/approx_ci/tests/test_threshold_score.py b/selection/approx_ci/tests/test_threshold_score.py
index c10df8673..02f58665a 100644
--- a/selection/approx_ci/tests/test_threshold_score.py
+++ b/selection/approx_ci/tests/test_threshold_score.py
@@ -8,16 +8,16 @@
 
 from selection.randomized.query import naive_confidence_intervals
 
-def test_approximate_inference(X,
-                               y,
-                               true_mean,
-                               sigma,
-                               threshold = 2.,
-                               seed_n = 0,
-                               lam_frac = 1.,
-                               loss='gaussian',
-                               randomization_scale = 1.):
-
+def approximate_inference(X,
+                          y,
+                          true_mean,
+                          sigma,
+                          threshold = 2.,
+                          seed_n = 0,
+                          lam_frac = 1.,
+                          loss='gaussian',
+                          randomization_scale = 1.):
+    
     from selection.api import randomization
     n, p = X.shape
     np.random.seed(seed_n)
@@ -100,19 +100,19 @@ def test_approximate_inference(X,
                                        naive_risk)))
 
 
-def test_threshold(n, p, s, signal):
+def test_threshold(n=30, p=10, s=0, signal=5.):
+
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     true_mean = X.dot(beta)
-    threshold = test_approximate_inference(X,
-                                           y,
-                                           true_mean,
-                                           sigma,
-                                           seed_n=0,
-                                           lam_frac=1.,
-                                           loss='gaussian')
+    threshold = approximate_inference(X,
+                                      y,
+                                      true_mean,
+                                      sigma,
+                                      seed_n=0,
+                                      lam_frac=1.,
+                                      loss='gaussian')
 
     if threshold is not None:
         print("output of selection adjusted inference", threshold)
         return(threshold)
 
-test_threshold(n=50, p=50, s=0, signal=5.)
\ No newline at end of file

From 50c7bb3c8aab3a3f46672234add4dcb72b16df98 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 08:28:15 -0700
Subject: [PATCH 281/617] setting observed_opt_state to empty array for
 threshold sampler

---
 selection/randomized/threshold_score.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index 1ea3e09aa..3c35ca3c9 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -185,7 +185,7 @@ def log_density(boundary,
             grad_log_density = None
             projection = None
 
-            self._sampler = optimization_sampler(self.observed_opt_state,
+            self._sampler = optimization_sampler(np.zeros(()), # nothing to sample
                                                  self.observed_internal_state.copy(),
                                                  self.score_transform,
                                                  self.opt_transform,

From 2b8aab224d46e2c2d28ff7d8ab7cb18fe1b86ea9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 08:31:42 -0700
Subject: [PATCH 282/617] fixing return value of summary -- maybe we should
 revert

---
 .../randomized/tests/test_opt_weighted_intervals.py    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/selection/randomized/tests/test_opt_weighted_intervals.py b/selection/randomized/tests/test_opt_weighted_intervals.py
index 57a74e936..6e45cdaea 100644
--- a/selection/randomized/tests/test_opt_weighted_intervals.py
+++ b/selection/randomized/tests/test_opt_weighted_intervals.py
@@ -46,11 +46,11 @@ def test_opt_weighted_intervals(ndraw=20000, burnin=2000):
         if nactive==0:
             results.append(None)
         else:
-            sel_pivots, sel_ci = conv.summary(selected_features,
-                                          parameter=beta[selected_features],
-                                          ndraw=ndraw,
-                                          burnin=burnin,
-                                          compute_intervals=True)
+            sel_pivots, sel_pval, sel_ci = conv.summary(selected_features,
+                                                        parameter=beta[selected_features],
+                                                        ndraw=ndraw,
+                                                        burnin=burnin,
+                                                        compute_intervals=True)
             print(sel_pivots)
             results.append((rand, sel_pivots, sel_ci, beta[selected_features]))
 

From 0505cc92d9a45caa4161609a38526a0fd19e7b0e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 10:22:13 -0700
Subject: [PATCH 283/617] touched __init__.py

---
 selection/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/selection/__init__.py b/selection/__init__.py
index e69de29bb..8b1378917 100644
--- a/selection/__init__.py
+++ b/selection/__init__.py
@@ -0,0 +1 @@
+

From 6b18fd45945edf610526c66028c01eceb2841f36 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 10:25:15 -0700
Subject: [PATCH 284/617] updating R-software

---
 R-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-software b/R-software
index bbf7e19f4..85f706302 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit bbf7e19f45b6222519e85f08f9e2af02880b4421
+Subproject commit 85f7063020b99858790f0858896c8a4889f34742

From 19df5d596a1a11ab83d95ab5b72473a818c11948 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 12:00:13 -0700
Subject: [PATCH 285/617] doctest fix

---
 selection/constraints/affine.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/selection/constraints/affine.py b/selection/constraints/affine.py
index 549b0a645..2252df023 100644
--- a/selection/constraints/affine.py
+++ b/selection/constraints/affine.py
@@ -60,10 +60,10 @@ class constraints(object):
     >>> eta = np.array([1,1])
     >>> positive.interval(eta, Y)
     array([  4.6212814 ,  10.17180724])
-    >>> positive.pivot(eta, Y)
-    5.187823627350596e-07
-    >>> positive.bounds(eta, Y)
-    (1.3999999999999988, 7.4000000000000004, inf, 1.4142135623730951)
+    >>> positive.pivot(eta, Y) # doctest: +ELLIPSIS
+    5.187...-07
+    >>> positive.bounds(eta, Y) # doctest: +ELLIPSIS
+    (1.399..., 7.400..., inf, 1.414)
     >>> 
 
     """

From e9ef1120fbf1f7e639537a64a9a2d3bd3c832626 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 17:01:32 -0700
Subject: [PATCH 286/617] python3 problem with iteritems

---
 selection/approx_ci/selection_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/approx_ci/selection_map.py b/selection/approx_ci/selection_map.py
index f0d0b1c63..b1ca54db0 100644
--- a/selection/approx_ci/selection_map.py
+++ b/selection/approx_ci/selection_map.py
@@ -24,7 +24,7 @@ def solve_approx(self):
         self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
         self.feasible_point = np.abs(self.initial_soln[self._overall])
         lagrange = []
-        for key, value in self.penalty.weights.iteritems():
+        for key, value in self.penalty.weights.items():
             lagrange.append(value)
         lagrange = np.asarray(lagrange)
         self.inactive_lagrange = lagrange[~self._overall]

From 52aecc550ffc4b9015d313d8f552bb59ad2920de Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 17:05:38 -0700
Subject: [PATCH 287/617] SKLEARN version

---
 selection/info.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/selection/info.py b/selection/info.py
index cadca57b2..a6e3bf65f 100644
--- a/selection/info.py
+++ b/selection/info.py
@@ -46,6 +46,7 @@
 CYTHON_MIN_VERSION = '0.21'
 MPMATH_MIN_VERSION = "0.18"
 PYINTER_MIN_VERSION = "0.1.6"
+SKLEARN_MIN_VERSION = '0.19'
 
 NAME                = 'selection'
 MAINTAINER          = "Jonathan Taylor"

From 5a2b35a6d956ddc2a45d8559c7ae8b68afde3d3c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 18:19:11 -0700
Subject: [PATCH 288/617] no need for numpy version

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 41e9b7394..12a91af79 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -53,7 +53,6 @@ before_install:
   - source travis-tools/utils.sh
   - travis_before_install
   # Install regreg 
-  - python -c "import numpy; print(numpy.version.version)"
   - git clone https://github.com/jonathan-taylor/regreg.git
   - cd regreg
   - pip install -r requirements.txt

From c17d6897120eaeaf4c5c15cb3f28b4af7a134fb8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 18:31:12 -0700
Subject: [PATCH 289/617] doctest fix

---
 selection/constraints/affine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/constraints/affine.py b/selection/constraints/affine.py
index 2252df023..94e7ceeff 100644
--- a/selection/constraints/affine.py
+++ b/selection/constraints/affine.py
@@ -62,8 +62,8 @@ class constraints(object):
     array([  4.6212814 ,  10.17180724])
     >>> positive.pivot(eta, Y) # doctest: +ELLIPSIS
     5.187...-07
-    >>> positive.bounds(eta, Y) # doctest: +ELLIPSIS
-    (1.399..., 7.400..., inf, 1.414)
+    >>> np.array(positive.bounds(eta, Y)) # doctest: +ELLIPSIS
+    array([ 1.4       ,  7.4       ,         inf,  1.41421356])
     >>> 
 
     """

From f76de82a0a870940cefb288c30dc9f1163fbe786 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 19:43:30 -0700
Subject: [PATCH 290/617] fixing setup to be like regreg

---
 setup.py         | 100 ++++---------
 setup_helpers.py | 383 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 343 insertions(+), 140 deletions(-)

diff --git a/setup.py b/setup.py
index 241b52f6b..4821119ba 100755
--- a/setup.py
+++ b/setup.py
@@ -3,28 +3,43 @@
 
 import os
 import sys
-from os.path import join as pjoin, dirname
-from setup_helpers import package_check
+from os.path import join as pjoin, dirname, exists
 
 # BEFORE importing distutils, remove MANIFEST. distutils doesn't properly
 # update it when the contents of directories change.
-if os.path.exists('MANIFEST'): os.remove('MANIFEST')
+if exists('MANIFEST'): os.remove('MANIFEST')
 
-import numpy as np
+# Unconditionally require setuptools
+import setuptools
 
-# Get version and release info, which is all stored in regreg/info.py
-ver_file = os.path.join('selection', 'info.py')
-# Use exec for compabibility with Python 3
-exec(open(ver_file).read())
+# Package for getting versions from git tags
+import versioneer
 
-from distutils.command import install
+# Import distutils _after_ setuptools import, and after removing
+# MANIFEST
 from distutils.core import setup
 from distutils.extension import Extension
 
 from cythexts import cyproc_exts, get_pyx_sdist
-from setup_helpers import package_check, read_vars_from
+from setup_helpers import (SetupDependency, read_vars_from,
+                           make_np_ext_builder)
+
+# Get various parameters for this version, stored in selection/info.py
 info = read_vars_from(pjoin('selection', 'info.py'))
 
+# Try to preempt setuptools monkeypatching of Extension handling when Pyrex
+# is missing.  Otherwise the monkeypatched Extension will change .pyx
+# filenames to .c filenames, and we probably don't have the .c files.
+sys.path.insert(0, pjoin(dirname(__file__), 'fake_pyrex'))
+# Set setuptools extra arguments
+extra_setuptools_args = dict(
+    tests_require=['nose'],
+    test_suite='nose.collector',
+    zip_safe=False,
+    extras_require = dict(
+        doc=['Sphinx>=1.0'],
+        test=['nose>=0.10.1']))
+
 # Define extensions
 EXTS = []
 for modulename, other_sources in (
@@ -34,70 +49,5 @@
     ):
     pyx_src = pjoin(*modulename.split('.')) + '.pyx'
     EXTS.append(Extension(modulename,[pyx_src] + other_sources,
-                          include_dirs = [np.get_include(),
-                                         "src"],
                           libraries=['m']),
                 )
-extbuilder = cyproc_exts(EXTS, CYTHON_MIN_VERSION, 'pyx-stamps')
-
-extra_setuptools_args = {}
-
-class installer(install.install):
-    def run(self):
-        package_check('numpy', info.NUMPY_MIN_VERSION)
-        package_check('scipy', info.SCIPY_MIN_VERSION)
-        package_check('sklearn', info.SKLEARN_MIN_VERSION)
-        package_check('mpmath', info.MPMATH_MIN_VERSION)
-        install.install.run(self)
-
-cmdclass = dict(
-    build_ext=extbuilder,
-    install=installer,
-    sdist=get_pyx_sdist()
-)
-
-
-def main(**extra_args):
-    setup(name=NAME,
-          maintainer=MAINTAINER,
-          maintainer_email=MAINTAINER_EMAIL,
-          description=DESCRIPTION,
-          long_description=LONG_DESCRIPTION,
-          url=URL,
-          download_url=DOWNLOAD_URL,
-          license=LICENSE,
-          classifiers=CLASSIFIERS,
-          author=AUTHOR,
-          author_email=AUTHOR_EMAIL,
-          platforms=PLATFORMS,
-          version=VERSION,
-          requires=REQUIRES,
-          provides=PROVIDES,
-          packages     = ['selection',
-                          'selection.utils',
-                          'selection.truncated',
-                          'selection.truncated.tests',
-                          'selection.constraints',
-                          'selection.constraints.tests',
-                          'selection.distributions',
-                          'selection.distributions.tests',
-                          'selection.algorithms',
-                          'selection.algorithms.tests',
-                          'selection.sampling',
-                          'selection.sampling.tests',
-                          'selection.randomized',
-                          'selection.randomized.tests',
-                          'selection.tests'
-                          ],
-          ext_modules = EXTS,
-          package_data = {},
-          data_files=[],
-          scripts= [],
-          cmdclass = cmdclass,
-          **extra_args
-         )
-
-#simple way to test what setup will do
-#python setup.py install --prefix=/tmp
-if __name__ == "__main__":
-    main(**extra_setuptools_args)
diff --git a/setup_helpers.py b/setup_helpers.py
index 5a69172ad..23f88e093 100644
--- a/setup_helpers.py
+++ b/setup_helpers.py
@@ -1,82 +1,302 @@
 ''' Distutils / setuptools helpers
 
-Copied from nibabel 'nisext.sexts'
 '''
+import os
+import sys
+from os.path import join as pjoin, split as psplit, splitext, dirname, exists
+import tempfile
+import shutil
 
 from distutils.version import LooseVersion
+from distutils.command.install_scripts import install_scripts
+from distutils.errors import CompileError, LinkError
+
 from distutils import log
 
-# Dependency checks
-def package_check(pkg_name, version=None,
-                  optional=False,
-                  checker=LooseVersion,
-                  version_getter=None,
-                  messages=None
-                  ):
-    ''' Check if package `pkg_name` is present, and correct version
+BAT_TEMPLATE = \
+r"""@echo off
+REM wrapper to use shebang first line of {FNAME}
+set mypath=%~dp0
+set pyscript="%mypath%{FNAME}"
+set /p line1=<%pyscript%
+if "%line1:~0,2%" == "#!" (goto :goodstart)
+echo First line of %pyscript% does not start with "#!"
+exit /b 1
+:goodstart
+set py_exe=%line1:~2%
+REM quote exe in case of spaces in path name
+set py_exe="%py_exe%"
+call %py_exe% %pyscript% %*
+"""
+
+# Path of file to which to write C conditional vars from build-time checks
+CONFIG_H = pjoin('build', 'config.h')
+# File name (no directory) to which to write Python vars from build-time checks
+CONFIG_PY = '__config__.py'
+# Directory to which to write libraries for building
+LIB_DIR_TMP = pjoin('build', 'extra_libs')
+
+
+class install_scripts_bat(install_scripts):
+    """ Make scripts executable on Windows
+
+    Scripts are bare file names without extension on Unix, fitting (for example)
+    Debian rules. They identify as python scripts with the usual ``#!`` first
+    line. Unix recognizes and uses this first "shebang" line, but Windows does
+    not. So, on Windows only we add a ``.bat`` wrapper of name
+    ``bare_script_name.bat`` to call ``bare_script_name`` using the python
+    interpreter from the #! first line of the script.
+
+    Notes
+    -----
+    See discussion at
+    http://matthew-brett.github.com/pydagogue/installing_scripts.html and
+    example at git://github.com/matthew-brett/myscripter.git for more
+    background.
+    """
+    def run(self):
+        install_scripts.run(self)
+        if not os.name == "nt":
+            return
+        for filepath in self.get_outputs():
+            # If we can find an executable name in the #! top line of the script
+            # file, make .bat wrapper for script.
+            with open(filepath, 'rt') as fobj:
+                first_line = fobj.readline()
+            if not (first_line.startswith('#!') and
+                    'python' in first_line.lower()):
+                log.info("No #!python executable found, skipping .bat "
+                            "wrapper")
+                continue
+            pth, fname = psplit(filepath)
+            froot, ext = splitext(fname)
+            bat_file = pjoin(pth, froot + '.bat')
+            bat_contents = BAT_TEMPLATE.replace('{FNAME}', fname)
+            log.info("Making %s wrapper for %s" % (bat_file, filepath))
+            if self.dry_run:
+                continue
+            with open(bat_file, 'wt') as fobj:
+                fobj.write(bat_contents)
+
+
+def add_flag_checking(build_ext_class, flag_defines, top_package_dir=''):
+    """ Override input `build_ext_class` to check compiler `flag_defines`
 
     Parameters
     ----------
-    pkg_name : str
-       name of package as imported into python
-    version : {None, str}, optional
-       minimum version of the package that we require. If None, we don't
-       check the version.  Default is None
-    optional : {False, True}, optional
-       If False, raise error for absent package or wrong version;
-       otherwise warn
-    checker : callable, optional
-       callable with which to return comparable thing from version
-       string.  Default is ``distutils.version.LooseVersion``
-    version_getter : {None, callable}:
-       Callable that takes `pkg_name` as argument, and returns the
-       package version string - as in::
-
-          ``version = version_getter(pkg_name)``
-
-       If None, equivalent to::
-
-          mod = __import__(pkg_name); version = mod.__version__``
-    messages : None or dict, optional
-       dictionary giving output messages
-    '''
-    if version_getter is None:
-        def version_getter(pkg_name):
-            mod = __import__(pkg_name)
-            return mod.__version__
-    if messages is None:
-        messages = {}
-    msgs = {
-         'missing': 'Cannot import package "%s" - is it installed?',
-         'missing opt': 'Missing optional package "%s"',
-         'opt suffix' : '; you may get run-time errors',
-         'version too old': 'You have version %s of package "%s"'
-                            ' but we need version >= %s', }
-    msgs.update(messages)
+    build_ext_class : class
+        Class implementing ``distutils.command.build_ext.build_ext`` interface,
+        with a ``build_extensions`` method.
+    flag_defines : sequence
+        A sequence of elements, where the elements are sequences of length 4
+        consisting of (``compile_flags``, ``link_flags``, ``code``,
+        ``defvar``). ``compile_flags`` is a sequence of compiler flags;
+        ``link_flags`` is a sequence of linker flags. We
+        check ``compile_flags`` to see whether a C source string ``code`` will
+        compile, and ``link_flags`` to see whether the resulting object file
+        will link.  If both compile and link works, we add ``compile_flags`` to
+        ``extra_compile_args`` and ``link_flags`` to ``extra_link_args`` of
+        each extension when we build the extensions.  If ``defvar`` is not
+        None, it is the name of C variable to be defined in ``build/config.h``
+        with 1 if the combination of (``compile_flags``, ``link_flags``,
+        ``code``) will compile and link, 0 otherwise. If None, do not write
+        variable.
+    top_package_dir : str
+        String giving name of top-level package, for writing Python file
+        containing configuration variables.  If empty, do not write this file.
+        Variables written are the same as the Cython variables generated via
+        the `flag_defines` setting.
+
+    Returns
+    -------
+    checker_class : class
+        A class with similar interface to
+        ``distutils.command.build_ext.build_ext``, that adds all working
+        ``compile_flags`` values to the ``extra_compile_args`` and working
+        ``link_flags`` to ``extra_link_args`` attributes of extensions, before
+        compiling.
+    """
+    class Checker(build_ext_class):
+        flag_defs = tuple(flag_defines)
+
+        def can_compile_link(self, compile_flags, link_flags, code):
+            cc = self.compiler
+            fname = 'test.c'
+            cwd = os.getcwd()
+            tmpdir = tempfile.mkdtemp()
+            try:
+                os.chdir(tmpdir)
+                with open(fname, 'wt') as fobj:
+                    fobj.write(code)
+                try:
+                    objects = cc.compile([fname],
+                                         extra_postargs=compile_flags)
+                except CompileError:
+                    return False
+                try:
+                    # Link shared lib rather then executable to avoid
+                    # http://bugs.python.org/issue4431 with MSVC 10+
+                    cc.link_shared_lib(objects, "testlib",
+                                       extra_postargs=link_flags)
+                except (LinkError, TypeError):
+                    return False
+            finally:
+                os.chdir(cwd)
+                shutil.rmtree(tmpdir)
+            return True
+
+        def build_extensions(self):
+            """ Hook into extension building to check compiler flags """
+            def_vars = []
+            good_compile_flags = []
+            good_link_flags = []
+            config_dir = dirname(CONFIG_H)
+            for compile_flags, link_flags, code, def_var in self.flag_defs:
+                compile_flags = list(compile_flags)
+                link_flags = list(link_flags)
+                flags_good = self.can_compile_link(compile_flags,
+                                                   link_flags,
+                                                   code)
+                if def_var:
+                    def_vars.append((def_var, flags_good))
+                if flags_good:
+                    good_compile_flags += compile_flags
+                    good_link_flags += link_flags
+                else:
+                    log.warn("Flags {0} omitted because of compile or link "
+                             "error".format(compile_flags + link_flags))
+            if def_vars:  # write config.h file
+                if not exists(config_dir):
+                    self.mkpath(config_dir)
+                with open(CONFIG_H, 'wt') as fobj:
+                    fobj.write('/* Automatically generated; do not edit\n')
+                    fobj.write('   C defines from build-time checks */\n')
+                    for v_name, v_value in def_vars:
+                        fobj.write('int {0} = {1};\n'.format(
+                            v_name, 1 if v_value else 0))
+            if def_vars and top_package_dir:  # write __config__.py file
+                config_py_dir = (top_package_dir if self.inplace else
+                                 pjoin(self.build_lib, top_package_dir))
+                if not exists(config_py_dir):
+                    self.mkpath(config_py_dir)
+                config_py = pjoin(config_py_dir, CONFIG_PY)
+                with open(config_py, 'wt') as fobj:
+                    fobj.write('# Automatically generated; do not edit\n')
+                    fobj.write('# Variables from compile checks\n')
+                    for v_name, v_value in def_vars:
+                        fobj.write('{0} = {1}\n'.format(v_name, v_value))
+            if def_vars or good_compile_flags or good_link_flags:
+                for ext in self.extensions:
+                    ext.extra_compile_args += good_compile_flags
+                    ext.extra_link_args += good_link_flags
+                    if def_vars:
+                        ext.include_dirs.append(config_dir)
+            build_ext_class.build_extensions(self)
+
+    return Checker
+
+
+def get_pkg_version(pkg_name):
+    """ Return package version for `pkg_name` if installed
+
+    Returns
+    -------
+    pkg_version : str or None
+        Return None if package not importable.  Return 'unknown' if standard
+        ``__version__`` string not present. Otherwise return version string.
+    """
     try:
-        __import__(pkg_name)
+        pkg = __import__(pkg_name)
     except ImportError:
-        if not optional:
-            raise RuntimeError(msgs['missing'] % pkg_name)
-        log.warn(msgs['missing opt'] % pkg_name +
-                 msgs['opt suffix'])
-        return
-    if not version:
-        return
+        return None
     try:
-        have_version = version_getter(pkg_name)
+        return pkg.__version__
     except AttributeError:
-        raise RuntimeError('Cannot find version for %s' % pkg_name)
-    if checker(have_version) < checker(version):
-        if optional:
-            log.warn(msgs['version too old'] % (have_version,
-                                                pkg_name,
-                                                version)
-                     + msgs['opt suffix'])
-        else:
-            raise RuntimeError(msgs['version too old'] % (have_version,
-                                                          pkg_name,
-                                                          version))
+        return 'unknown'
+
+
+def version_error_msg(pkg_name, found_ver, min_ver):
+    """ Return informative error message for version or None
+    """
+    if found_ver is None:
+        return 'We need package {0}, but not importable'.format(pkg_name)
+    if found_ver == 'unknown':
+        return 'We need {0} version {1}, but cannot get version'.format(
+            pkg_name, min_ver)
+    if LooseVersion(found_ver) >= LooseVersion(min_ver):
+        return None
+    return 'We need {0} version {1}, but found version {2}'.format(
+        pkg_name, found_ver, min_ver)
+
+
+class SetupDependency(object):
+    """ SetupDependency class
+
+    Parameters
+    ----------
+    import_name : str
+        Name with which required package should be ``import``ed.
+    min_ver : str
+        Distutils version string giving minimum version for package.
+    req_type : {'install_requires', 'setup_requires'}, optional
+        Setuptools dependency type.
+    heavy : {False, True}, optional
+        If True, and package is already installed (importable), then do not add
+        to the setuptools dependency lists.  This prevents setuptools
+        reinstalling big packages when the package was installed without using
+        setuptools, or this is an upgrade, and we want to avoid the pip default
+        behavior of upgrading all dependencies.
+    install_name : str, optional
+        Name identifying package to install from pypi etc, if different from
+        `import_name`.
+    """
+
+    def __init__(self, import_name,
+                 min_ver,
+                 req_type='install_requires',
+                 heavy=False,
+                 install_name=None):
+        self.import_name = import_name
+        self.min_ver = min_ver
+        self.req_type = req_type
+        self.heavy = heavy
+        self.install_name = (import_name if install_name is None
+                             else install_name)
+
+    def check_fill(self, setuptools_kwargs):
+        """ Process this dependency, maybe filling `setuptools_kwargs`
+
+        Run checks on this dependency.  If not using setuptools, then raise
+        error for unmet dependencies.  If using setuptools, add missing or
+        not-heavy dependencies to `setuptools_kwargs`.
+
+        A heavy dependency is one that is inconvenient to install
+        automatically, such as numpy or (particularly) scipy, matplotlib.
+
+        Parameters
+        ----------
+        setuptools_kwargs : dict
+            Dictionary of setuptools keyword arguments that may be modified
+            in-place while checking dependencies.
+        """
+        found_ver = get_pkg_version(self.import_name)
+        ver_err_msg = version_error_msg(self.import_name,
+                                        found_ver,
+                                        self.min_ver)
+        if not 'setuptools' in sys.modules:
+            # Not using setuptools; raise error for any unmet dependencies
+            if ver_err_msg is not None:
+                raise RuntimeError(ver_err_msg)
+            return
+        # Using setuptools; add packages to given section of
+        # setup/install_requires, unless it's a heavy dependency for which we
+        # already have an acceptable importable version.
+        if self.heavy and ver_err_msg is None:
+            return
+        new_req = '{0}>={1}'.format(self.import_name, self.min_ver)
+        old_reqs = setuptools_kwargs.get(self.req_type, [])
+        setuptools_kwargs[self.req_type] = old_reqs + [new_req]
+
 
 class Bunch(object):
     def __init__(self, vars):
@@ -105,3 +325,36 @@ def read_vars_from(ver_file):
     with open(ver_file, 'rt') as fobj:
         exec(fobj.read(), ns)
     return Bunch(ns)
+
+
+def make_np_ext_builder(build_ext_class):
+    """ Override input `build_ext_class` to add numpy includes to extension
+
+    This is useful to delay call of ``np.get_include`` until the extension is
+    being built.
+
+    Parameters
+    ----------
+    build_ext_class : class
+        Class implementing ``distutils.command.build_ext.build_ext`` interface,
+        with a ``build_extensions`` method.
+
+    Returns
+    -------
+    np_build_ext_class : class
+        A class with similar interface to
+        ``distutils.command.build_ext.build_ext``, that adds libraries in
+        ``np.get_include()`` to include directories of extension.
+    """
+    class NpExtBuilder(build_ext_class):
+
+        def build_extensions(self):
+            """ Hook into extension building to add np include dirs
+            """
+            # Delay numpy import until last moment
+            import numpy as np
+            for ext in self.extensions:
+                ext.include_dirs.append(np.get_include())
+            build_ext_class.build_extensions(self)
+
+    return NpExtBuilder

From 9dbc577b8814a0aeedda7b811ceb24839f82c01a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Oct 2017 19:51:01 -0700
Subject: [PATCH 291/617] missing module

---
 versioneer.py | 1699 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1699 insertions(+)
 create mode 100644 versioneer.py

diff --git a/versioneer.py b/versioneer.py
new file mode 100644
index 000000000..c010f63e3
--- /dev/null
+++ b/versioneer.py
@@ -0,0 +1,1699 @@
+
+# Version: 0.15
+
+"""
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/warner/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy
+* [![Latest Version]
+(https://pypip.in/version/versioneer/badge.svg?style=flat)
+](https://pypi.python.org/pypi/versioneer/)
+* [![Build Status]
+(https://travis-ci.org/warner/python-versioneer.png?branch=master)
+](https://travis-ci.org/warner/python-versioneer)
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere to your $PATH
+* add a `[versioneer]` section to your setup.cfg (see below)
+* run `versioneer install` in your source tree, commit the results
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes.
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+First, decide on values for the following configuration variables:
+
+* `VCS`: the version control system you use. Currently accepts "git".
+
+* `style`: the style of version string to be produced. See "Styles" below for
+  details. Defaults to "pep440", which looks like
+  `TAG[+DISTANCE.gSHORTHASH[.dirty]]`.
+
+* `versionfile_source`:
+
+  A project-relative pathname into which the generated version strings should
+  be written. This is usually a `_version.py` next to your project's main
+  `__init__.py` file, so it can be imported at runtime. If your project uses
+  `src/myproject/__init__.py`, this should be `src/myproject/_version.py`.
+  This file should be checked in to your VCS as usual: the copy created below
+  by `setup.py setup_versioneer` will include code that parses expanded VCS
+  keywords in generated tarballs. The 'build' and 'sdist' commands will
+  replace it with a copy that has just the calculated version string.
+
+  This must be set even if your project does not have any modules (and will
+  therefore never import `_version.py`), since "setup.py sdist" -based trees
+  still need somewhere to record the pre-calculated version strings. Anywhere
+  in the source tree should do. If there is a `__init__.py` next to your
+  `_version.py`, the `setup.py setup_versioneer` command (described below)
+  will append some `__version__`-setting assignments, if they aren't already
+  present.
+
+* `versionfile_build`:
+
+  Like `versionfile_source`, but relative to the build directory instead of
+  the source directory. These will differ when your setup.py uses
+  'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`,
+  then you will probably have `versionfile_build='myproject/_version.py'` and
+  `versionfile_source='src/myproject/_version.py'`.
+
+  If this is set to None, then `setup.py build` will not attempt to rewrite
+  any `_version.py` in the built tree. If your project does not have any
+  libraries (e.g. if it only builds a script), then you should use
+  `versionfile_build = None` and override `distutils.command.build_scripts`
+  to explicitly insert a copy of `versioneer.get_version()` into your
+  generated script.
+
+* `tag_prefix`:
+
+  a string, like 'PROJECTNAME-', which appears at the start of all VCS tags.
+  If your tags look like 'myproject-1.2.0', then you should use
+  tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this
+  should be an empty string.
+
+* `parentdir_prefix`:
+
+  a optional string, frequently the same as tag_prefix, which appears at the
+  start of all unpacked tarball filenames. If your tarball unpacks into
+  'myproject-1.2.0', this should be 'myproject-'. To disable this feature,
+  just omit the field from your `setup.cfg`.
+
+This tool provides one script, named `versioneer`. That script has one mode,
+"install", which writes a copy of `versioneer.py` into the current directory
+and runs `versioneer.py setup` to finish the installation.
+
+To versioneer-enable your project:
+
+* 1: Modify your `setup.cfg`, adding a section named `[versioneer]` and
+  populating it with the configuration values you decided earlier (note that
+  the option names are not case-sensitive):
+
+  ````
+  [versioneer]
+  VCS = git
+  style = pep440
+  versionfile_source = src/myproject/_version.py
+  versionfile_build = myproject/_version.py
+  tag_prefix = ""
+  parentdir_prefix = myproject-
+  ````
+
+* 2: Run `versioneer install`. This will do the following:
+
+  * copy `versioneer.py` into the top of your source tree
+  * create `_version.py` in the right place (`versionfile_source`)
+  * modify your `__init__.py` (if one exists next to `_version.py`) to define
+    `__version__` (by calling a function from `_version.py`)
+  * modify your `MANIFEST.in` to include both `versioneer.py` and the
+    generated `_version.py` in sdist tarballs
+
+  `versioneer install` will complain about any problems it finds with your
+  `setup.py` or `setup.cfg`. Run it multiple times until you have fixed all
+  the problems.
+
+* 3: add a `import versioneer` to your setup.py, and add the following
+  arguments to the setup() call:
+
+        version=versioneer.get_version(),
+        cmdclass=versioneer.get_cmdclass(),
+
+* 4: commit these changes to your VCS. To make sure you won't forget,
+  `versioneer install` will mark everything it touched for addition using
+  `git add`. Don't forget to add `setup.py` and `setup.cfg` too.
+
+## Post-Installation Usage
+
+Once established, all uses of your tree from a VCS checkout should get the
+current version string. All generated tarballs should include an embedded
+version string (so users who unpack them will not need a VCS tool installed).
+
+If you distribute your project through PyPI, then the release process should
+boil down to two steps:
+
+* 1: git tag 1.0
+* 2: python setup.py register sdist upload
+
+If you distribute it through github (i.e. users use github to generate
+tarballs with `git archive`), the process is:
+
+* 1: git tag 1.0
+* 2: git push; git push --tags
+
+Versioneer will report "0+untagged.NUMCOMMITS.gHASH" until your tree has at
+least one tag in its history.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See details.md in the Versioneer source tree for
+descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+### Upgrading to 0.15
+
+Starting with this version, Versioneer is configured with a `[versioneer]`
+section in your `setup.cfg` file. Earlier versions required the `setup.py` to
+set attributes on the `versioneer` module immediately after import. The new
+version will refuse to run (raising an exception during import) until you
+have provided the necessary `setup.cfg` section.
+
+In addition, the Versioneer package provides an executable named
+`versioneer`, and the installation process is driven by running `versioneer
+install`. In 0.14 and earlier, the executable was named
+`versioneer-installer` and was run without an argument.
+
+### Upgrading to 0.14
+
+0.14 changes the format of the version string. 0.13 and earlier used
+hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a
+plus-separated "local version" section strings, with dot-separated
+components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old
+format, but should be ok with the new one.
+
+### Upgrading from 0.11 to 0.12
+
+Nothing special.
+
+### Upgrading from 0.10 to 0.11
+
+You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running
+`setup.py setup_versioneer`. This will enable the use of additional
+version-control systems (SVN, etc) in the future.
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+
+## License
+
+To make Versioneer easier to embed, all its code is hereby released into the
+public domain. The `_version.py` that it creates is also in the public
+domain.
+
+"""
+
+from __future__ import print_function
+try:
+    import configparser
+except ImportError:
+    import ConfigParser as configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+
+
+class VersioneerConfig:
+    pass
+
+
+def get_root():
+    # we require that all commands are run from the project root, i.e. the
+    # directory that contains setup.py, setup.cfg, and versioneer.py .
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        me = os.path.realpath(os.path.abspath(__file__))
+        if os.path.splitext(me)[0] != os.path.splitext(versioneer_py)[0]:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(me), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    # This might raise EnvironmentError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.SafeConfigParser()
+    with open(setup_cfg, "r") as f:
+        parser.readfp(f)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    def get(parser, name):
+        if parser.has_option("versioneer", name):
+            return parser.get("versioneer", name)
+        return None
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = get(parser, "style") or ""
+    cfg.versionfile_source = get(parser, "versionfile_source")
+    cfg.versionfile_build = get(parser, "versionfile_build")
+    cfg.tag_prefix = get(parser, "tag_prefix")
+    cfg.parentdir_prefix = get(parser, "parentdir_prefix")
+    cfg.verbose = get(parser, "verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    pass
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    def decorate(f):
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
+    assert isinstance(commands, list)
+    p = None
+    for c in commands:
+        try:
+            dispcmd = str([c] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
+                                 stderr=(subprocess.PIPE if hide_stderr
+                                         else None))
+            break
+        except EnvironmentError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None
+    stdout = p.communicate()[0].strip()
+    if sys.version_info[0] >= 3:
+        stdout = stdout.decode()
+    if p.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+        return None
+    return stdout
+LONG_VERSION_PY['git'] = '''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.15 (https://github.com/warner/python-versioneer)
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+
+def get_keywords():
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full}
+    return keywords
+
+
+class VersioneerConfig:
+    pass
+
+
+def get_config():
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    pass
+
+
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    def decorate(f):
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
+    assert isinstance(commands, list)
+    p = None
+    for c in commands:
+        try:
+            dispcmd = str([c] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
+                                 stderr=(subprocess.PIPE if hide_stderr
+                                         else None))
+            break
+        except EnvironmentError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None
+    stdout = p.communicate()[0].strip()
+    if sys.version_info[0] >= 3:
+        stdout = stdout.decode()
+    if p.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+        return None
+    return stdout
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    # Source tarballs conventionally unpack into a directory that includes
+    # both the project name and a version string.
+    dirname = os.path.basename(root)
+    if not dirname.startswith(parentdir_prefix):
+        if verbose:
+            print("guessing rootdir is '%%s', but '%%s' doesn't start with "
+                  "prefix '%%s'" %% (root, dirname, parentdir_prefix))
+        raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+    return {"version": dirname[len(parentdir_prefix):],
+            "full-revisionid": None,
+            "dirty": False, "error": None}
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        f = open(versionfile_abs, "r")
+        for line in f.readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["full"] = mo.group(1)
+        f.close()
+    except EnvironmentError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    if not keywords:
+        raise NotThisMethod("no keywords at all, weird")
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = set([r for r in refs if re.search(r'\d', r)])
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs-tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None
+                    }
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags"}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+    # this runs 'git' from the root of the source tree. This only gets called
+    # if the git-archive 'subst' keywords were *not* expanded, and
+    # _version.py hasn't already been rewritten with a short version string,
+    # meaning we're inside a checked out source tree.
+
+    if not os.path.exists(os.path.join(root, ".git")):
+        if verbose:
+            print("no .git in %%s" %% root)
+        raise NotThisMethod("no .git directory")
+
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
+                                      "--always", "--long"],
+                               cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
+                                cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    # exceptions:
+    # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    # TAG[.post.devDISTANCE] . No -dirty
+
+    # exceptions:
+    # 1: no tags. 0.post.devDISTANCE
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post.dev%%d" %% pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that
+    # .dev0 sorts backwards (a dirty tree will appear "older" than the
+    # corresponding clean one), but you shouldn't be releasing software with
+    # -dirty anyways.
+
+    # exceptions:
+    # 1: no tags. 0.postDISTANCE[.dev0]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_old(pieces):
+    # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty.
+
+    # exceptions:
+    # 1: no tags. 0.postDISTANCE[.dev0]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty
+    # --always'
+
+    # exceptions:
+    # 1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty
+    # --always -long'. The distance/hash is unconditional.
+
+    # exceptions:
+    # 1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"]}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None}
+
+
+def get_versions():
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for i in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree"}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version"}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        f = open(versionfile_abs, "r")
+        for line in f.readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["full"] = mo.group(1)
+        f.close()
+    except EnvironmentError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    if not keywords:
+        raise NotThisMethod("no keywords at all, weird")
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = set([r for r in refs if re.search(r'\d', r)])
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs-tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None
+                    }
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags"}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+    # this runs 'git' from the root of the source tree. This only gets called
+    # if the git-archive 'subst' keywords were *not* expanded, and
+    # _version.py hasn't already been rewritten with a short version string,
+    # meaning we're inside a checked out source tree.
+
+    if not os.path.exists(os.path.join(root, ".git")):
+        if verbose:
+            print("no .git in %s" % root)
+        raise NotThisMethod("no .git directory")
+
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
+                                      "--always", "--long"],
+                               cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
+                                cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        me = __file__
+        if me.endswith(".pyc") or me.endswith(".pyo"):
+            me = os.path.splitext(me)[0] + ".py"
+        versioneer_file = os.path.relpath(me)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        f = open(".gitattributes", "r")
+        for line in f.readlines():
+            if line.strip().startswith(versionfile_source):
+                if "export-subst" in line.strip().split()[1:]:
+                    present = True
+        f.close()
+    except EnvironmentError:
+        pass
+    if not present:
+        f = open(".gitattributes", "a+")
+        f.write("%s export-subst\n" % versionfile_source)
+        f.close()
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    # Source tarballs conventionally unpack into a directory that includes
+    # both the project name and a version string.
+    dirname = os.path.basename(root)
+    if not dirname.startswith(parentdir_prefix):
+        if verbose:
+            print("guessing rootdir is '%s', but '%s' doesn't start with "
+                  "prefix '%s'" % (root, dirname, parentdir_prefix))
+        raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+    return {"version": dirname[len(parentdir_prefix):],
+            "full-revisionid": None,
+            "dirty": False, "error": None}
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.15) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+import sys
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except EnvironmentError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    # exceptions:
+    # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    # TAG[.post.devDISTANCE] . No -dirty
+
+    # exceptions:
+    # 1: no tags. 0.post.devDISTANCE
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post.dev%d" % pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that
+    # .dev0 sorts backwards (a dirty tree will appear "older" than the
+    # corresponding clean one), but you shouldn't be releasing software with
+    # -dirty anyways.
+
+    # exceptions:
+    # 1: no tags. 0.postDISTANCE[.dev0]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_old(pieces):
+    # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty.
+
+    # exceptions:
+    # 1: no tags. 0.postDISTANCE[.dev0]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty
+    # --always'
+
+    # exceptions:
+    # 1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty
+    # --always -long'. The distance/hash is unconditional.
+
+    # exceptions:
+    # 1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"]}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None}
+
+
+class VersioneerBadRootError(Exception):
+    pass
+
+
+def get_versions(verbose=False):
+    # returns dict with two keys: 'version' and 'full'
+
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version"}
+
+
+def get_version():
+    return get_versions()["version"]
+
+
+def get_cmdclass():
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/warner/python-versioneer/issues/52
+
+    cmds = {}
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+
+    from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    # we override different "sdist" commands for both environments
+    if "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix = ""
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+INIT_PY_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+
+def do_setup():
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (EnvironmentError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except EnvironmentError:
+            old = ""
+        if INIT_PY_SNIPPET not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(INIT_PY_SNIPPET)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except EnvironmentError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-time keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)

From 82704327b7767419db2ab62abd75ebf80292286a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Oct 2017 11:31:45 -0700
Subject: [PATCH 292/617] trying to fix travis script

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 12a91af79..763dfe5e6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -56,8 +56,9 @@ before_install:
   - git clone https://github.com/jonathan-taylor/regreg.git
   - cd regreg
   - pip install -r requirements.txt
-  - pip install -e .
+  - python setup.py install
   - cd ..
+  - rm -fr regreg
   - sudo apt-get install software-properties-common
   - sudo add-apt-repository -y ppa:marutter/c2d4u
   - sudo add-apt-repository -y ppa:marutter/rrutter
@@ -72,7 +73,6 @@ install:
     else
       pip install  -r requirements.txt; 
     fi
-  - pip install -e .
   - cd R-software
   - git submodule init
   - git submodule update

From f380bd2091c11c790764a2ca6f4b13fdcac50c7b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Oct 2017 11:42:05 -0700
Subject: [PATCH 293/617] fixing setup.py

---
 .travis.yml |  3 +--
 setup.py    | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 763dfe5e6..34b7c2eef 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -56,9 +56,8 @@ before_install:
   - git clone https://github.com/jonathan-taylor/regreg.git
   - cd regreg
   - pip install -r requirements.txt
-  - python setup.py install
+  - pip install -e .
   - cd ..
-  - rm -fr regreg
   - sudo apt-get install software-properties-common
   - sudo add-apt-repository -y ppa:marutter/c2d4u
   - sudo add-apt-repository -y ppa:marutter/rrutter
diff --git a/setup.py b/setup.py
index 4821119ba..a5a9793be 100755
--- a/setup.py
+++ b/setup.py
@@ -51,3 +51,76 @@
     EXTS.append(Extension(modulename,[pyx_src] + other_sources,
                           libraries=['m']),
                 )
+
+# Cython is a dependency for building extensions, iff we don't have stamped
+# up pyx and c files.
+build_ext, need_cython = cyproc_exts(EXTS,
+                                     info.CYTHON_MIN_VERSION,
+                                     'pyx-stamps')
+
+# Add numpy includes when building extension.
+build_ext = make_np_ext_builder(build_ext)
+
+# Check dependencies, maybe add to setuptools lists
+if need_cython:
+    SetupDependency('Cython', info.CYTHON_MIN_VERSION,
+                    req_type='install_requires',
+                    heavy=False).check_fill(extra_setuptools_args)
+SetupDependency('numpy', info.NUMPY_MIN_VERSION,
+                req_type='install_requires',
+                heavy=True).check_fill(extra_setuptools_args)
+SetupDependency('scipy', info.SCIPY_MIN_VERSION,
+                req_type='install_requires',
+                heavy=True).check_fill(extra_setuptools_args)
+
+
+cmdclass=versioneer.get_cmdclass()
+cmdclass.update(dict(
+    build_ext=build_ext,
+    sdist=get_pyx_sdist()))
+
+
+def main(**extra_args):
+    setup(name=info.NAME,
+          maintainer=info.MAINTAINER,
+          maintainer_email=info.MAINTAINER_EMAIL,
+          description=info.DESCRIPTION,
+          url=info.URL,
+          download_url=info.DOWNLOAD_URL,
+          license=info.LICENSE,
+          classifiers=info.CLASSIFIERS,
+          author=info.AUTHOR,
+          author_email=info.AUTHOR_EMAIL,
+          platforms=info.PLATFORMS,
+          version=versioneer.get_version(),
+          requires=info.REQUIRES,
+          provides=info.PROVIDES,
+          packages     = ['selection',
+                          'selection.utils',
+                          'selection.truncated',
+                          'selection.truncated.tests',
+                          'selection.constraints',
+                          'selection.constraints.tests',
+                          'selection.distributions',
+                          'selection.distributions.tests',
+                          'selection.algorithms',
+                          'selection.algorithms.tests',
+                          'selection.sampling',
+                          'selection.sampling.tests',
+                          'selection.randomized',
+                          'selection.randomized.tests',
+                          'selection.tests'
+                          ],
+          ext_modules = EXTS,
+          package_data = {},
+          data_files=[],
+          scripts= [],
+          long_description = open('README.rst', 'rt').read(),
+          cmdclass = cmdclass,
+          **extra_args
+         )
+
+#simple way to test what setup will do
+#python setup.py install --prefix=/tmp
+if __name__ == "__main__":
+    main(**extra_setuptools_args)

From 12d61bbebdb618525b83233d10787729cee9ded0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Oct 2017 11:50:57 -0700
Subject: [PATCH 294/617] RF: update cythexts / setup_helpers from regreg

---
 cythexts.py | 60 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/cythexts.py b/cythexts.py
index 7ff24526a..516aac884 100644
--- a/cythexts.py
+++ b/cythexts.py
@@ -1,7 +1,6 @@
 import os
 from os.path import splitext, sep as filesep, join as pjoin, relpath
 from hashlib import sha1
-from subprocess import check_call
 
 from distutils.command.build_ext import build_ext
 from distutils.command.sdist import sdist
@@ -95,6 +94,8 @@ def cyproc_exts(exts, cython_min_version,
         Can be ``build_ext`` input (if we have good c files) or cython
         ``build_ext`` if we have a good cython, or a class raising an informative
         error on ``run()``
+    need_cython : bool
+        True if we need Cython to build extensions, False otherwise.
     """
     if stamped_pyx_ok(exts, hash_stamps_fname):
         # Replace pyx with c files, use standard builder
@@ -107,29 +108,33 @@ def cyproc_exts(exts, cython_min_version,
                 else:
                     sources.append(source)
             mod.sources = sources
-        return build_ext
+        return build_ext, False
     # We need cython
     try:
         from Cython.Compiler.Version import version as cyversion
     except ImportError:
-        cython_ok = False
-    else:
-        cython_ok = LooseVersion(cyversion) >= cython_min_version
-    if cython_ok:
+        return derror_maker(build_ext,
+                            'Need cython>={0} to build extensions '
+                            'but cannot import "Cython"'.format(
+                            cython_min_version)), True
+    if LooseVersion(cyversion) >= cython_min_version:
         from Cython.Distutils import build_ext as extbuilder
-        return extbuilder
+        return extbuilder, True
     return derror_maker(build_ext,
-                        'Need cython>=%s to build extensions'
-                        % cython_min_version)
+                        'Need cython>={0} to build extensions'
+                        'but found cython version {1}'.format(
+                        cython_min_version, cyversion)), True
 
 
-def build_stamp(pyxes):
+def build_stamp(pyxes, include_dirs=()):
     """ Cythonize files in `pyxes`, return pyx, C filenames, hashes
 
     Parameters
     ----------
     pyxes : sequence
         sequence of filenames of files on which to run Cython
+    include_dirs : sequence
+        Any extra include directories in which to find Cython files.
 
     Returns
     -------
@@ -139,11 +144,17 @@ def build_stamp(pyxes):
         hash>; "c_filename", <c filemane>; "c_hash", <c file SHA1 hash>.
     """
     pyx_defs = {}
+    from Cython.Compiler.Main import compile
+    from Cython.Compiler.CmdLine import parse_command_line
+    includes = sum([['--include-dir', d] for d in include_dirs], [])
     for source in pyxes:
         base, ext = splitext(source)
         pyx_hash = sha1(open(source, 'rt').read()).hexdigest()
         c_filename = base + '.c'
-        check_call('cython ' + source, shell=True)
+        options, sources = parse_command_line(includes + [source])
+        result = compile(sources, options)
+        if result.num_errors > 0:
+            raise RuntimeError('Cython failed to compile ' + source)
         c_hash = sha1(open(c_filename, 'rt').read()).hexdigest()
         pyx_defs[source] = dict(pyx_hash=pyx_hash,
                                 c_filename=c_filename,
@@ -173,22 +184,19 @@ def write_stamps(pyx_defs, stamp_fname='pyx-stamps'):
                                            pyx_info['c_hash']))
 
 
-def find_pyx(root_dir=None):
+def find_pyx(root_dir):
     """ Recursively find files with extension '.pyx' starting at `root_dir`
 
     Parameters
     ----------
-    root_dir : None or str, optional
-        Directory from which to search for pyx files.  If None, use current
-        working directory.
+    root_dir : str
+        Directory from which to search for pyx files.
 
     Returns
     -------
     pyxes : list
         list of filenames relative to `root_dir`
     """
-    if root_dir is None:
-        root_dir = os.getcwd()
     pyxes = []
     for dirpath, dirnames, filenames in os.walk(root_dir):
         for filename in filenames:
@@ -199,7 +207,8 @@ def find_pyx(root_dir=None):
     return pyxes
 
 
-def get_pyx_sdist(sdist_like=sdist, hash_stamps_fname='pyx-stamps'):
+def get_pyx_sdist(sdist_like=sdist, hash_stamps_fname='pyx-stamps',
+                  include_dirs=()):
     """ Add pyx->c conversion, hash recording to sdist command `sdist_like`
 
     Parameters
@@ -210,6 +219,8 @@ def get_pyx_sdist(sdist_like=sdist, hash_stamps_fname='pyx-stamps'):
     hash_stamps_fname : str, optional
         filename to which to write hashes of pyx / py and c files.  Default is
         ``pyx-stamps``
+    include_dirs : sequence
+        Any extra include directories in which to find Cython files.
 
     Returns
     -------
@@ -240,7 +251,7 @@ def make_distribution(self):
                     base, ext = splitext(source)
                     if ext in ('.pyx', '.py'):
                         pyxes.append(source)
-            self.pyx_defs = build_stamp(pyxes)
+            self.pyx_defs = build_stamp(pyxes, include_dirs)
             for pyx_fname, pyx_info in self.pyx_defs.items():
                 self.filelist.append(pyx_info['c_filename'])
             sdist_like.make_distribution(self)
@@ -254,7 +265,8 @@ def make_release_tree(self, base_dir, files):
     return PyxSDist
 
 
-def build_stamp_source(root_dir=None, stamp_fname='pyx-stamps'):
+def build_stamp_source(root_dir=None, stamp_fname='pyx-stamps',
+                       include_dirs=None):
     """ Build cython c files, make stamp file in source tree `root_dir`
 
     Parameters
@@ -264,7 +276,13 @@ def build_stamp_source(root_dir=None, stamp_fname='pyx-stamps'):
         working directory.
     stamp_fname : str, optional
         Filename for stamp file we will write
+    include_dirs : None or sequence
+        Any extra Cython include directories
     """
+    if root_dir is None:
+        root_dir = os.getcwd()
+    if include_dirs is None:
+        include_dirs = [pjoin(root_dir, 'src')]
     pyxes = find_pyx(root_dir)
-    pyx_defs = build_stamp(pyxes)
+    pyx_defs = build_stamp(pyxes, include_dirs=include_dirs)
     write_stamps(pyx_defs, stamp_fname)

From 4762c822f52465c836015cd36fd04e3d51a230c0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Oct 2017 11:53:07 -0700
Subject: [PATCH 295/617] fixing setup.py and setup.cfg

---
 setup.cfg | 6 ++++++
 setup.py  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 setup.cfg

diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 000000000..dd939ceb4
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = selection/_version.py
+tag_prefix =
+parentdir_prefix = selection-
diff --git a/setup.py b/setup.py
index a5a9793be..1263ef0f4 100755
--- a/setup.py
+++ b/setup.py
@@ -115,7 +115,7 @@ def main(**extra_args):
           package_data = {},
           data_files=[],
           scripts= [],
-          long_description = open('README.rst', 'rt').read(),
+          long_description = open('README.md', 'rt').read(),
           cmdclass = cmdclass,
           **extra_args
          )

From bac84fc098e351a1cc4279815419347dcbdffa4d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Oct 2017 12:48:22 -0700
Subject: [PATCH 296/617] sdist is last failing -- fixing MANIFEST.in

---
 MANIFEST.in           |  10 +
 selection/_version.py | 460 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 470 insertions(+)
 create mode 100644 MANIFEST.in
 create mode 100644 selection/_version.py

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 000000000..c69c03809
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,10 @@
+include AUTHOR LICENSE Makefile* MANIFEST.in setup* README.*
+include Changelog TODO
+recursive-include doc *
+recursive-include tools *
+# setup utilities
+include setup_helpers.py
+include cythexts.py
+recursive-include fake_pyrex *
+include versioneer.py
+include selection/_version.py
diff --git a/selection/_version.py b/selection/_version.py
new file mode 100644
index 000000000..da70f7fc0
--- /dev/null
+++ b/selection/_version.py
@@ -0,0 +1,460 @@
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.15 (https://github.com/warner/python-versioneer)
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+
+def get_keywords():
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    keywords = {"refnames": git_refnames, "full": git_full}
+    return keywords
+
+
+class VersioneerConfig:
+    pass
+
+
+def get_config():
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = "selection-"
+    cfg.versionfile_source = "selection/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    pass
+
+
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    def decorate(f):
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
+    assert isinstance(commands, list)
+    p = None
+    for c in commands:
+        try:
+            dispcmd = str([c] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
+                                 stderr=(subprocess.PIPE if hide_stderr
+                                         else None))
+            break
+        except EnvironmentError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None
+    stdout = p.communicate()[0].strip()
+    if sys.version_info[0] >= 3:
+        stdout = stdout.decode()
+    if p.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+        return None
+    return stdout
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    # Source tarballs conventionally unpack into a directory that includes
+    # both the project name and a version string.
+    dirname = os.path.basename(root)
+    if not dirname.startswith(parentdir_prefix):
+        if verbose:
+            print("guessing rootdir is '%s', but '%s' doesn't start with "
+                  "prefix '%s'" % (root, dirname, parentdir_prefix))
+        raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+    return {"version": dirname[len(parentdir_prefix):],
+            "full-revisionid": None,
+            "dirty": False, "error": None}
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        f = open(versionfile_abs, "r")
+        for line in f.readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["full"] = mo.group(1)
+        f.close()
+    except EnvironmentError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    if not keywords:
+        raise NotThisMethod("no keywords at all, weird")
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = set([r for r in refs if re.search(r'\d', r)])
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs-tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None
+                    }
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags"}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+    # this runs 'git' from the root of the source tree. This only gets called
+    # if the git-archive 'subst' keywords were *not* expanded, and
+    # _version.py hasn't already been rewritten with a short version string,
+    # meaning we're inside a checked out source tree.
+
+    if not os.path.exists(os.path.join(root, ".git")):
+        if verbose:
+            print("no .git in %s" % root)
+        raise NotThisMethod("no .git directory")
+
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
+                                      "--always", "--long"],
+                               cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
+                                cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    # exceptions:
+    # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    # TAG[.post.devDISTANCE] . No -dirty
+
+    # exceptions:
+    # 1: no tags. 0.post.devDISTANCE
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post.dev%d" % pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that
+    # .dev0 sorts backwards (a dirty tree will appear "older" than the
+    # corresponding clean one), but you shouldn't be releasing software with
+    # -dirty anyways.
+
+    # exceptions:
+    # 1: no tags. 0.postDISTANCE[.dev0]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_old(pieces):
+    # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty.
+
+    # exceptions:
+    # 1: no tags. 0.postDISTANCE[.dev0]
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty
+    # --always'
+
+    # exceptions:
+    # 1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty
+    # --always -long'. The distance/hash is unconditional.
+
+    # exceptions:
+    # 1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"]}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None}
+
+
+def get_versions():
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for i in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree"}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version"}

From 1aaadda6a1d548272ab773604caec9d81187dffa Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Oct 2017 13:33:58 -0700
Subject: [PATCH 297/617] incorrect shape of active set for logistic and
 poisson

---
 selection/algorithms/tests/test_lasso.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/selection/algorithms/tests/test_lasso.py b/selection/algorithms/tests/test_lasso.py
index d5a3ae657..e29a6cc23 100644
--- a/selection/algorithms/tests/test_lasso.py
+++ b/selection/algorithms/tests/test_lasso.py
@@ -247,12 +247,12 @@ def test_data_carving_sqrt_lasso(n=200,
                                  return_only_screening=True):
     
     X, y, beta, true_active, sigma = instance(n=n, 
-                                         p=p, 
-                                         s=s, 
-                                         sigma=sigma, 
-                                         rho=rho, 
-                                         signal=signal, 
-                                         df=df)
+                                              p=p, 
+                                              s=s, 
+                                              sigma=sigma, 
+                                              rho=rho, 
+                                              signal=signal, 
+                                              df=df)
     mu = np.dot(X, beta)
 
     idx = np.arange(n)
@@ -365,7 +365,7 @@ def test_data_carving_logistic(n=700,
 
         Xa = X[:,DC.active]
 
-        active = np.zeros(p, np.bool)
+        active = np.zeros(p+1, np.bool)
         active[true_active] = 1
         v = (carve, split, active)
         return v
@@ -435,7 +435,7 @@ def test_data_carving_poisson(n=500,
 
         Xa = X[:,DC.active]
 
-        active = np.zeros(p, np.bool)
+        active = np.zeros(p+1, np.bool)
         active[true_active] = 1
         v = (carve, split, active)
         return v

From 4f0cbb87f171cd27a12ba3169bc9085cb040a584 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 26 Oct 2017 22:42:07 -0700
Subject: [PATCH 298/617] modified test_QP for R code

---
 selection/algorithms/tests/test_compareR.py | 38 +++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index c9b58b611..1d1145ebb 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -339,8 +339,23 @@ def test_solve_QP():
     nactive = as.integer(1)
     kkt_tol = 1.e-12
     objective_tol = 1.e-16
+    parameter_tol = 1.e-10
     maxiter = 500
-    soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, -t(X) %*% Y / n, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln
+    soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, 
+                                           lam, 
+                                           maxiter, 
+                                           soln_R, 
+                                           -t(X) %*% Y / n, 
+                                           grad, 
+                                           ever_active, 
+                                           nactive, 
+                                           kkt_tol, 
+                                           objective_tol, 
+                                           parameter_tol,
+                                           p,
+                                           TRUE,
+                                           TRUE,
+                                           TRUE)$soln
 
     # test wide solver
     Xtheta = rep(0, n)
@@ -348,7 +363,23 @@ def test_solve_QP():
     ever_active = as.integer(c(1, rep(0, p-1)))
     soln_R_wide = rep(0, p)
     grad = - t(X) %*% Y / n
-    soln_R_wide = selectiveInference:::solve_QP_wide(X, lam, maxiter, soln_R_wide, -t(X) %*% Y / n, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, p)$soln
+    soln_R_wide = selectiveInference:::solve_QP_wide(X, 
+                                                     rep(lam, p), 
+                                                     0,
+                                                     maxiter, 
+                                                     soln_R_wide, 
+                                                     -t(X) %*% Y / n, 
+                                                     grad, 
+                                                     Xtheta,
+                                                     ever_active, 
+                                                     nactive, 
+                                                     kkt_tol, 
+                                                     objective_tol, 
+                                                     parameter_tol,
+                                                     p,
+                                                     TRUE,
+                                                     TRUE,
+                                                     TRUE)$soln
 
     """
 
@@ -359,6 +390,9 @@ def test_solve_QP():
     rpy2.robjects.numpy2ri.deactivate()
 
     tol = 1.e-5
+    print(soln - soln_R)
+    print(soln_R - soln_R_wide)
+
     yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
     yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver'
 

From 958c48b7a4d9e5aee6f81c2600edc11f1c8f6fac Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 27 Oct 2017 07:53:31 -0700
Subject: [PATCH 299/617] a test of QP solver for not LASSO (but invertible)

---
 selection/algorithms/tests/test_compareR.py | 106 +++++++++++++++++++-
 1 file changed, 102 insertions(+), 4 deletions(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 1d1145ebb..58b73d66e 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -303,9 +303,8 @@ def test_logistic():
     yield np.testing.assert_allclose, L.summary('onesided')['pval'][1:], R_pvals, tol, tol, False, 'logistic pvalues'
 
 
-
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
-def test_solve_QP():
+def test_solve_QP_lasso():
     """
     Check the R coordinate descent LASSO solver
     """
@@ -345,7 +344,7 @@ def test_solve_QP():
                                            lam, 
                                            maxiter, 
                                            soln_R, 
-                                           -t(X) %*% Y / n, 
+                                           1. * grad,
                                            grad, 
                                            ever_active, 
                                            nactive, 
@@ -368,7 +367,101 @@ def test_solve_QP():
                                                      0,
                                                      maxiter, 
                                                      soln_R_wide, 
-                                                     -t(X) %*% Y / n, 
+                                                     1. * grad,
+                                                     grad, 
+                                                     Xtheta,
+                                                     ever_active, 
+                                                     nactive, 
+                                                     kkt_tol, 
+                                                     objective_tol, 
+                                                     parameter_tol,
+                                                     p,
+                                                     TRUE,
+                                                     TRUE,
+                                                     TRUE)$soln
+
+    """
+
+    rpy.r(R_code)
+
+    soln_R = np.asarray(rpy.r('soln_R'))
+    soln_R_wide = np.asarray(rpy.r('soln_R_wide'))
+    rpy2.robjects.numpy2ri.deactivate()
+
+    tol = 1.e-5
+    print(soln - soln_R)
+    print(soln_R - soln_R_wide)
+
+    yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver for LASSO problem'
+    yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver for LASSO problem'
+
+@np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
+def test_solve_QP():
+    """
+    Check the R coordinate descent LASSO solver
+    """
+
+    n, p = 100, 50
+    lam = 0.08
+
+    X = np.random.standard_normal((n, p))
+
+    loss = rr.squared_error(X, np.zeros(n), coef=1./n)
+    pen = rr.l1norm(p, lagrange=lam)
+    E = np.zeros(p)
+    E[2] = 1
+    Q = rr.identity_quadratic(0, 0, E, 0)
+    problem = rr.simple_problem(loss, pen)
+    soln = problem.solve(Q, min_its=500, tol=1.e-12)
+
+    import rpy2.robjects.numpy2ri
+    rpy2.robjects.numpy2ri.activate()
+
+    rpy.r.assign('X', X)
+    rpy.r.assign('E', E)
+    rpy.r.assign('lam', lam)
+
+    R_code = """
+
+    library(selectiveInference)
+    p = ncol(X)
+    n = nrow(X)
+    soln_R = rep(0, p)
+    grad = 1. * E
+    ever_active = as.integer(c(1, rep(0, p-1)))
+    nactive = as.integer(1)
+    kkt_tol = 1.e-12
+    objective_tol = 1.e-16
+    parameter_tol = 1.e-10
+    maxiter = 500
+    soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, 
+                                           lam, 
+                                           maxiter, 
+                                           soln_R, 
+                                           E,
+                                           grad, 
+                                           ever_active, 
+                                           nactive, 
+                                           kkt_tol, 
+                                           objective_tol, 
+                                           parameter_tol,
+                                           p,
+                                           TRUE,
+                                           TRUE,
+                                           TRUE)$soln
+
+    # test wide solver
+    Xtheta = rep(0, n)
+    nactive = as.integer(1)
+    ever_active = as.integer(c(1, rep(0, p-1)))
+    soln_R_wide = rep(0, p)
+    grad = 1. * E
+    soln_R_wide = selectiveInference:::solve_QP_wide(X, 
+                                                     rep(lam, p), 
+                                                     0,
+                                                     maxiter, 
+                                                     soln_R_wide, 
+                                                     E,
                                                      grad, 
                                                      Xtheta,
                                                      ever_active, 
@@ -393,7 +486,12 @@ def test_solve_QP():
     print(soln - soln_R)
     print(soln_R - soln_R_wide)
 
+    G = X.T.dot(X).dot(soln) / n + E
+    
     yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
     yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver'
+    yield np.testing.assert_allclose, G[soln != 0], -np.sign(soln[soln != 0]) * lam, tol, tol, False, 'checking active coordinate KKT for QP solver'
+    yield nt.assert_true, np.fabs(G).max() < lam * (1. + 1.e-6), 'testing linfinity norm'
 
+    
 

From 43e6ae34b8f574020891e1020c90dd8d2768357c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 30 Oct 2017 21:42:00 -0700
Subject: [PATCH 300/617] adding C software repo

---
 .gitmodules | 3 +++
 C-software  | 1 +
 2 files changed, 4 insertions(+)
 create mode 160000 C-software

diff --git a/.gitmodules b/.gitmodules
index fb40dbf24..af9d5ba96 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "R-software"]
 	path = R-software
 	url = https://github.com/selective-inference/R-software
+[submodule "C-software"]
+	path = C-software
+	url = https://github.com/selective-inference/C-software
diff --git a/C-software b/C-software
new file mode 160000
index 000000000..a3d9a1723
--- /dev/null
+++ b/C-software
@@ -0,0 +1 @@
+Subproject commit a3d9a1723ce94cb430b5dfd3e058fd708a6bae7f

From 541ada4dc1876ca0f14ad4c34b5efb66b6335c9b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 30 Oct 2017 22:16:22 -0700
Subject: [PATCH 301/617] foo test of cython wrapper

---
 C-software                      |  2 +-
 selection/quadratic_program.pyx | 40 +++++++++++++++++++++++++++++++++
 setup.py                        |  6 +++++
 test_foo.py                     |  6 +++++
 4 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 selection/quadratic_program.pyx
 create mode 100644 test_foo.py

diff --git a/C-software b/C-software
index a3d9a1723..610903c02 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit a3d9a1723ce94cb430b5dfd3e058fd708a6bae7f
+Subproject commit 610903c022f5f2577a64185e4bf27ded005947a0
diff --git a/selection/quadratic_program.pyx b/selection/quadratic_program.pyx
new file mode 100644
index 000000000..3be670e46
--- /dev/null
+++ b/selection/quadratic_program.pyx
@@ -0,0 +1,40 @@
+import warnings
+import numpy as np, cython
+cimport numpy as np
+
+DTYPE_float = np.float
+ctypedef np.float_t DTYPE_float_t
+DTYPE_int = np.int
+ctypedef np.int_t DTYPE_int_t
+
+cdef extern from "debias.h":
+
+    void multiply_by_2(double *X, int nval)
+
+def foo(np.ndarray[DTYPE_float_t, ndim=1] A):
+    multiply_by_2(<double *>A.data, A.shape[0]) 
+    print('here')
+    return A
+
+#    int solve_wide(double *X_ptr,              # Sqrt of non-neg def matrix -- X^TX/ncase = nndef #
+#                   double *X_theta_ptr,        # Fitted values   #
+#                   double *linear_func_ptr,    # Linear term in objective #
+#                   double *nndef_diag_ptr,     # Diagonal entries of non-neg def matrix #
+#                   double *gradient_ptr,       # X^TX/ncase times theta + linear_func#
+#                   int *need_update_ptr,       # Keeps track of updated gradient coords #
+#                   int *ever_active_ptr,       # Ever active set: 1-based # 
+#                   int *nactive_ptr,           # Size of ever active set #
+#                   int ncase,                  # How many rows in X #
+#                   int nfeature,               # How many columns in X #
+#                   double *bound_ptr,          # Lagrange multipliers #
+#                   double ridge_term,          # Ridge / ENet term #
+#                   double *theta_ptr,          # current value #
+#                   double *theta_old_ptr,      # previous value #
+#                   int maxiter,                # max number of iterations #
+#                   double kkt_tol,             # precision for checking KKT conditions #
+#                   double objective_tol,       # precision for checking relative decrease in objective value #
+#                   double parameter_tol,       # precision for checking relative convergence of parameter #
+#                   int max_active,             # Upper limit for size of active set -- otherwise break # 
+#                   int objective_stop,         # Break based on convergence of objective value? #
+#                   int kkt_stop,               # Break based on KKT? #
+#                   int param_stop)             # Break based on parameter convergence? #
diff --git a/setup.py b/setup.py
index 1263ef0f4..ca3821287 100755
--- a/setup.py
+++ b/setup.py
@@ -52,6 +52,12 @@
                           libraries=['m']),
                 )
 
+EXTS.append(Extension('selection.quadratic_program',
+                      ['selection/quadratic_program.pyx', 
+                       'C-software/src/quadratic_program_wide.c'],
+                      libraries=['m'],
+                      include_dirs=['C-software/src']))
+
 # Cython is a dependency for building extensions, iff we don't have stamped
 # up pyx and c files.
 build_ext, need_cython = cyproc_exts(EXTS,
diff --git a/test_foo.py b/test_foo.py
new file mode 100644
index 000000000..66c5f5f31
--- /dev/null
+++ b/test_foo.py
@@ -0,0 +1,6 @@
+from selection.quadratic_program import foo
+import numpy as np
+
+A = np.arange(10) * 2.
+B = A.copy()
+print(B, foo(A))

From fdba8201607f14d9d3c7f9d0611fdade76aaccf3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 30 Oct 2017 22:58:07 -0700
Subject: [PATCH 302/617] C code for debiasing matrix running, test not passing

---
 C-software                                    |  2 +-
 selection/algorithms/debiased_lasso.py        | 51 ++++++++++++
 selection/algorithms/debiased_lasso_utils.pyx | 81 +++++++++++++++++++
 .../algorithms/tests/test_debiased_lasso.py   |  6 +-
 selection/quadratic_program.pyx               | 40 ---------
 setup.py                                      |  4 +-
 6 files changed, 140 insertions(+), 44 deletions(-)
 create mode 100644 selection/algorithms/debiased_lasso_utils.pyx
 delete mode 100644 selection/quadratic_program.pyx

diff --git a/C-software b/C-software
index 610903c02..626c889fe 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 610903c022f5f2577a64185e4bf27ded005947a0
+Subproject commit 626c889fec185ee2b9d505dc379b0f2781288acd
diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index f26c085cd..3719d26e2 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -4,6 +4,7 @@
                         l1norm,
                         simple_problem)
 
+from .debiased_lasso_utils import solve_wide_
 from ..constraints.affine import constraints
 
 def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):
@@ -44,6 +45,56 @@ def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1
 
     return soln
 
+def _find_row_approx_inverse_X(X, j, delta, 
+                               maxiter=50,
+                               kkt_tol=1.e-4,
+                               objective_tol=1.e-4,
+                               parameter_tol=1.e-4,
+                               kkt_stop=True,
+                               objective_stop=True,
+                               parameter_stop=True,
+                               max_active=None,
+                               ):
+    n, p = X.shape
+    theta = np.zeros(p)
+    theta_old = np.zeros(p)
+    X_theta = np.zeros(n)
+    linear_func = np.zeros(p)
+    linear_func[j] = -1
+    gradient = linear_func.copy()
+    ever_active = -np.ones(p, np.int)
+    nactive = np.array([0], np.int)
+    bound = np.ones(p) * delta
+    ridge_term = 0
+
+    nndef_diag = (X**2).sum(0) / X.shape[0]
+    need_update = np.zeros(p, np.int)
+
+    if max_active is None:
+        max_active = max(50, 0.3 * n)
+
+    solve_wide_(X,
+                X_theta,
+                linear_func,
+                nndef_diag,
+                gradient,
+                need_update,
+                ever_active, 
+                nactive,
+                bound,
+                ridge_term,
+                theta,
+                theta_old,
+                maxiter,
+                kkt_tol,
+                objective_tol,
+                parameter_tol,
+                max_active,
+                kkt_stop,
+                objective_stop,
+                parameter_stop)
+
+    return theta
 
 def debiased_lasso_inference(lasso_obj, variables, delta):
 
diff --git a/selection/algorithms/debiased_lasso_utils.pyx b/selection/algorithms/debiased_lasso_utils.pyx
new file mode 100644
index 000000000..d0992cd09
--- /dev/null
+++ b/selection/algorithms/debiased_lasso_utils.pyx
@@ -0,0 +1,81 @@
+import warnings
+import numpy as np, cython
+cimport numpy as np
+
+DTYPE_float = np.float
+ctypedef np.float_t DTYPE_float_t
+DTYPE_int = np.int
+ctypedef np.int_t DTYPE_int_t
+
+cdef extern from "debias.h":
+
+   int solve_wide(double *X_ptr,              # Sqrt of non-neg def matrix -- X^TX/ncase = nndef #
+                  double *X_theta_ptr,        # Fitted values   #
+                  double *linear_func_ptr,    # Linear term in objective #
+                  double *nndef_diag_ptr,     # Diagonal entries of non-neg def matrix #
+                  double *gradient_ptr,       # X^TX/ncase times theta + linear_func#
+                  int *need_update_ptr,       # Keeps track of updated gradient coords #
+                  int *ever_active_ptr,       # Ever active set: 1-based # 
+                  int *nactive_ptr,           # Size of ever active set #
+                  int ncase,                  # How many rows in X #
+                  int nfeature,               # How many columns in X #
+                  double *bound_ptr,          # Lagrange multipliers #
+                  double ridge_term,          # Ridge / ENet term #
+                  double *theta_ptr,          # current value #
+                  double *theta_old_ptr,      # previous value #
+                  int maxiter,                # max number of iterations #
+                  double kkt_tol,             # precision for checking KKT conditions #
+                  double objective_tol,       # precision for checking relative decrease in objective value #
+                  double parameter_tol,       # precision for checking relative convergence of parameter #
+                  int max_active,             # Upper limit for size of active set -- otherwise break # 
+                  int kkt_stop,               # Break based on KKT? #
+                  int objective_stop,         # Break based on convergence of objective value? #
+                  int parameter_stop)         # Break based on parameter convergence? #
+
+
+def solve_wide_(np.ndarray[DTYPE_float_t, ndim=2] X,            # Sqrt of non-neg def matrix -- X^TX/ncase = nndef 
+                np.ndarray[DTYPE_float_t, ndim=1] X_theta,      # Fitted values   #
+                np.ndarray[DTYPE_float_t, ndim=1] linear_func,  # Linear term in objective #
+                np.ndarray[DTYPE_float_t, ndim=1] nndef_diag,   # Diagonal entries of non-neg def matrix #
+                np.ndarray[DTYPE_float_t, ndim=1] gradient,     # X^TX/ncase times theta + linear_func#
+                np.ndarray[DTYPE_int_t, ndim=1] need_update,    # Keeps track of updated gradient coords #
+                np.ndarray[DTYPE_int_t, ndim=1] ever_active,    # Ever active set: 1-based # 
+                np.ndarray[DTYPE_int_t, ndim=1] nactive,        # Size of ever active set #
+                np.ndarray[DTYPE_float_t, ndim=1] bound,        # Lagrange multipliers #
+                double ridge_term,                              # Ridge / ENet term #
+                np.ndarray[DTYPE_float_t, ndim=1] theta,        # current value #
+                np.ndarray[DTYPE_float_t, ndim=1] theta_old,    # previous value #
+                int maxiter,                                    # max number of iterations #
+                double kkt_tol,                                 # precision for checking KKT conditions #
+                double objective_tol,                           # precision for checking relative 
+                                                                #   decrease in objective value #
+                double parameter_tol,                           # precision for checking 
+                                                                #   relative convergence of parameter #
+                int max_active,                                 # Upper limit for size of active set #
+                int kkt_stop,                                   # Break based on KKT? #
+                int objective_stop,                             # Break based on convergence of objective value? #
+                int parameter_stop):                            # Break based on parameter convergence? #
+
+    solve_wide(<double *>X.data,
+               <double *>X_theta.data,
+               <double *>linear_func.data,
+               <double *>nndef_diag.data,
+               <double *>gradient.data,
+               <int *>need_update.data,
+               <int *>ever_active.data,
+               <int *>nactive.data,
+	       <int>X.shape[0],
+	       <int>X.shape[1],
+               <double *>bound.data,
+               ridge_term,
+               <double *>theta.data,
+               <double *>theta_old.data,
+               maxiter,
+               kkt_tol,
+               parameter_tol,
+               objective_tol,
+               max_active,
+               kkt_stop,
+               parameter_stop,
+               objective_stop)
+
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index 5dc036a73..188cce8aa 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -7,7 +7,8 @@
 
 from selection.algorithms.lasso import lasso 
 from selection.algorithms.debiased_lasso import (debiased_lasso_inference,
-                                                 _find_row_approx_inverse)
+                                                 _find_row_approx_inverse,
+                                                 _find_row_approx_inverse_X)
 import regreg.api as rr
 
 def test_gaussian(n=100, p=20):
@@ -36,6 +37,8 @@ def test_approx_inverse():
     
     soln = _find_row_approx_inverse(S, j, delta)
 
+    soln2_ = _find_row_approx_inverse_X(X, j, delta)
+
     basis_vector = np.zeros(p)
     basis_vector[j] = 1.
 
@@ -46,3 +49,4 @@ def test_approx_inverse():
     nt.assert_equal(np.argmax(np.fabs(U)), j)
     nt.assert_equal(np.sign(U[j]), -np.sign(soln[j]))
     nt.assert_raises(ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta)
+    np.testing.assert_allclose(soln, soln2_)
diff --git a/selection/quadratic_program.pyx b/selection/quadratic_program.pyx
deleted file mode 100644
index 3be670e46..000000000
--- a/selection/quadratic_program.pyx
+++ /dev/null
@@ -1,40 +0,0 @@
-import warnings
-import numpy as np, cython
-cimport numpy as np
-
-DTYPE_float = np.float
-ctypedef np.float_t DTYPE_float_t
-DTYPE_int = np.int
-ctypedef np.int_t DTYPE_int_t
-
-cdef extern from "debias.h":
-
-    void multiply_by_2(double *X, int nval)
-
-def foo(np.ndarray[DTYPE_float_t, ndim=1] A):
-    multiply_by_2(<double *>A.data, A.shape[0]) 
-    print('here')
-    return A
-
-#    int solve_wide(double *X_ptr,              # Sqrt of non-neg def matrix -- X^TX/ncase = nndef #
-#                   double *X_theta_ptr,        # Fitted values   #
-#                   double *linear_func_ptr,    # Linear term in objective #
-#                   double *nndef_diag_ptr,     # Diagonal entries of non-neg def matrix #
-#                   double *gradient_ptr,       # X^TX/ncase times theta + linear_func#
-#                   int *need_update_ptr,       # Keeps track of updated gradient coords #
-#                   int *ever_active_ptr,       # Ever active set: 1-based # 
-#                   int *nactive_ptr,           # Size of ever active set #
-#                   int ncase,                  # How many rows in X #
-#                   int nfeature,               # How many columns in X #
-#                   double *bound_ptr,          # Lagrange multipliers #
-#                   double ridge_term,          # Ridge / ENet term #
-#                   double *theta_ptr,          # current value #
-#                   double *theta_old_ptr,      # previous value #
-#                   int maxiter,                # max number of iterations #
-#                   double kkt_tol,             # precision for checking KKT conditions #
-#                   double objective_tol,       # precision for checking relative decrease in objective value #
-#                   double parameter_tol,       # precision for checking relative convergence of parameter #
-#                   int max_active,             # Upper limit for size of active set -- otherwise break # 
-#                   int objective_stop,         # Break based on convergence of objective value? #
-#                   int kkt_stop,               # Break based on KKT? #
-#                   int param_stop)             # Break based on parameter convergence? #
diff --git a/setup.py b/setup.py
index ca3821287..4b4a4cc53 100755
--- a/setup.py
+++ b/setup.py
@@ -52,8 +52,8 @@
                           libraries=['m']),
                 )
 
-EXTS.append(Extension('selection.quadratic_program',
-                      ['selection/quadratic_program.pyx', 
+EXTS.append(Extension('selection.algorithms.debiased_lasso_utils',
+                      ['selection/algorithms/debiased_lasso_utils.pyx',
                        'C-software/src/quadratic_program_wide.c'],
                       libraries=['m'],
                       include_dirs=['C-software/src']))

From 6b43abdca7ad559ec13f3d506b4463739113f89f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 12:29:13 -0700
Subject: [PATCH 303/617] BF: design has to be a fortranarray to use C code

---
 selection/algorithms/debiased_lasso.py        | 15 ++++++++----
 .../algorithms/tests/test_debiased_lasso.py   | 23 +++++++++++--------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 3719d26e2..6a04d8634 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -55,6 +55,11 @@ def _find_row_approx_inverse_X(X, j, delta,
                                parameter_stop=True,
                                max_active=None,
                                ):
+
+    # need a copy as column major ordering for C code
+
+    X_F = np.asfortranarray(X)
+
     n, p = X.shape
     theta = np.zeros(p)
     theta_old = np.zeros(p)
@@ -62,18 +67,20 @@ def _find_row_approx_inverse_X(X, j, delta,
     linear_func = np.zeros(p)
     linear_func[j] = -1
     gradient = linear_func.copy()
-    ever_active = -np.ones(p, np.int)
-    nactive = np.array([0], np.int)
+    ever_active = np.zeros(p, np.int)
+    ever_active[0] = j+1 # C code has ever_active as 1-based
+    nactive = np.array([1], np.int)
     bound = np.ones(p) * delta
+
     ridge_term = 0
 
-    nndef_diag = (X**2).sum(0) / X.shape[0]
+    nndef_diag = (X**2).sum(0) / n
     need_update = np.zeros(p, np.int)
 
     if max_active is None:
         max_active = max(50, 0.3 * n)
 
-    solve_wide_(X,
+    solve_wide_(X_F,
                 X_theta,
                 linear_func,
                 nndef_diag,
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index 188cce8aa..1746594eb 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -27,17 +27,20 @@ def test_gaussian(n=100, p=20):
     print(debiased_lasso_inference(L, L.active, np.sqrt(2 * np.log(p) / n)))
     print(beta)
 
-def test_approx_inverse():
+def test_approx_inverse(n=50, p=100):
 
     n, p = 50, 100
     X = np.random.standard_normal((n, p))
-    S = X.T.dot(X) / n
     j = 5
-    delta = 0.60
+    delta = 0.30
+    
+    X[:,3] = X[:,3] + X[:,j]
+    X[:,10] = X[:,10] + X[:,j]
+    S = X.T.dot(X) / n
     
-    soln = _find_row_approx_inverse(S, j, delta)
+    soln = _find_row_approx_inverse(S, j, delta, solve_args={'min_its':500, 'tol':1.e-14, 'max_its':1000} )
 
-    soln2_ = _find_row_approx_inverse_X(X, j, delta)
+    soln_C = _find_row_approx_inverse_X(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, maxiter=1000, objective_tol=1.e-14)
 
     basis_vector = np.zeros(p)
     basis_vector[j] = 1.
@@ -45,8 +48,8 @@ def test_approx_inverse():
     nt.assert_true(np.fabs(S.dot(soln) - basis_vector).max() < delta * 1.001)
 
     U = - S.dot(-soln) - basis_vector
-    nt.assert_true(np.fabs(U).max() < delta * 1.001)
-    nt.assert_equal(np.argmax(np.fabs(U)), j)
-    nt.assert_equal(np.sign(U[j]), -np.sign(soln[j]))
-    nt.assert_raises(ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta)
-    np.testing.assert_allclose(soln, soln2_)
+
+    yield nt.assert_true, np.fabs(U).max() < delta * 1.001
+    yield nt.assert_equal, np.sign(U[j]), -np.sign(soln[j])
+    yield nt.assert_raises, ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta
+    yield np.testing.assert_allclose, soln, soln_C, 1.e-3

From c265eb6c9f4d3eb75a63a52f0e98100345197481 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 12:31:44 -0700
Subject: [PATCH 304/617] unnecessary file

---
 test_foo.py | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 test_foo.py

diff --git a/test_foo.py b/test_foo.py
deleted file mode 100644
index 66c5f5f31..000000000
--- a/test_foo.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from selection.quadratic_program import foo
-import numpy as np
-
-A = np.arange(10) * 2.
-B = A.copy()
-print(B, foo(A))

From c9a41bdfc989b57df98fc21e7b8ed251257f1934 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 12:43:54 -0700
Subject: [PATCH 305/617] updating R-software

---
 R-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-software b/R-software
index 85f706302..ff598d095 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit 85f7063020b99858790f0858896c8a4889f34742
+Subproject commit ff598d095f87c2a1e44f6349e3843f7dfe342feb

From ef6a300c681aadd7b556d8504a8f1673b0f15337 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 12:53:45 -0700
Subject: [PATCH 306/617] trying to see git log in travis

---
 .travis.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 34b7c2eef..52f69dfae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -75,8 +75,7 @@ install:
   - cd R-software
   - git submodule init
   - git submodule update
-  - rm -f selectiveInference/src/RcppExports.cpp
-  - rm -f selectiveInference/R/RcppExports.R
+  - git log
   - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
   - sudo Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference

From f2ac637846cbf696174aa18a33a869629b563faf Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 13:46:57 -0700
Subject: [PATCH 307/617] trying to make stricter numpy requirement to resolve
 C extension problem

---
 selection/info.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/info.py b/selection/info.py
index a6e3bf65f..c3e1bdd9d 100644
--- a/selection/info.py
+++ b/selection/info.py
@@ -41,8 +41,8 @@
 """
 
 # versions
-NUMPY_MIN_VERSION='1.3'
-SCIPY_MIN_VERSION = '0.7'
+NUMPY_MIN_VERSION='1.7.1'
+SCIPY_MIN_VERSION = '0.9'
 CYTHON_MIN_VERSION = '0.21'
 MPMATH_MIN_VERSION = "0.18"
 PYINTER_MIN_VERSION = "0.1.6"

From 74ba421c8e87ff4c8ac352547b31dd102aad655d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 13:54:20 -0700
Subject: [PATCH 308/617] trying to import statsmodels once more

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 52f69dfae..7db91c7a0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -75,11 +75,11 @@ install:
   - cd R-software
   - git submodule init
   - git submodule update
-  - git log
   - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
   - sudo Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference
   - cd ..
+  - python -c "from statsmodels.api import PHReg"
   - travis_install $INSTALL_TYPE
 
 # command to run tests, e.g. python setup.py test

From 712bbd36aba5f52d9e96c0e902f2ff70280c5cae Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 13:55:39 -0700
Subject: [PATCH 309/617] trying to import numpy in travis

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 7db91c7a0..5986ed308 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -79,6 +79,7 @@ install:
   - sudo Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference
   - cd ..
+  - python -c "import numpy as np"
   - python -c "from statsmodels.api import PHReg"
   - travis_install $INSTALL_TYPE
 

From d78645956091ede24bf4fa5091630ea8a6858bdd Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 14:04:22 -0700
Subject: [PATCH 310/617] running right after installing requirements

---
 .travis.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5986ed308..554a29bab 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -56,6 +56,7 @@ before_install:
   - git clone https://github.com/jonathan-taylor/regreg.git
   - cd regreg
   - pip install -r requirements.txt
+  - python -c "from statsmodels.api import PHReg"
   - pip install -e .
   - cd ..
   - sudo apt-get install software-properties-common
@@ -79,8 +80,6 @@ install:
   - sudo Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference
   - cd ..
-  - python -c "import numpy as np"
-  - python -c "from statsmodels.api import PHReg"
   - travis_install $INSTALL_TYPE
 
 # command to run tests, e.g. python setup.py test

From 46f2b5fe4fddc9db512edf14b6b2f5bd9d3f4acd Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 14:35:51 -0700
Subject: [PATCH 311/617] after we've install selection requirements

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 554a29bab..7db91c7a0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -56,7 +56,6 @@ before_install:
   - git clone https://github.com/jonathan-taylor/regreg.git
   - cd regreg
   - pip install -r requirements.txt
-  - python -c "from statsmodels.api import PHReg"
   - pip install -e .
   - cd ..
   - sudo apt-get install software-properties-common
@@ -80,6 +79,7 @@ install:
   - sudo Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference
   - cd ..
+  - python -c "from statsmodels.api import PHReg"
   - travis_install $INSTALL_TYPE
 
 # command to run tests, e.g. python setup.py test

From 08cb7ff510d6da43d11f0aee4bd1022d5e41bd72 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 14:53:06 -0700
Subject: [PATCH 312/617] trying newer constraint on numpy

---
 selection/info.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/info.py b/selection/info.py
index c3e1bdd9d..8d2ccb4ce 100644
--- a/selection/info.py
+++ b/selection/info.py
@@ -41,7 +41,7 @@
 """
 
 # versions
-NUMPY_MIN_VERSION='1.7.1'
+NUMPY_MIN_VERSION='1.13.3'
 SCIPY_MIN_VERSION = '0.9'
 CYTHON_MIN_VERSION = '0.21'
 MPMATH_MIN_VERSION = "0.18"

From 13e7b23bffd1d81f10ae146dbbe5fb75f6af28fe Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 14:55:10 -0700
Subject: [PATCH 313/617] reverting numpy version

---
 selection/info.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/info.py b/selection/info.py
index 8d2ccb4ce..c3e1bdd9d 100644
--- a/selection/info.py
+++ b/selection/info.py
@@ -41,7 +41,7 @@
 """
 
 # versions
-NUMPY_MIN_VERSION='1.13.3'
+NUMPY_MIN_VERSION='1.7.1'
 SCIPY_MIN_VERSION = '0.9'
 CYTHON_MIN_VERSION = '0.21'
 MPMATH_MIN_VERSION = "0.18"

From 9121b747df0a90e9eeeaaca714359ab83e2ce628 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 31 Oct 2017 19:13:49 -0700
Subject: [PATCH 314/617] updating R-software

---
 R-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-software b/R-software
index ff598d095..232760d6a 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit ff598d095f87c2a1e44f6349e3843f7dfe342feb
+Subproject commit 232760d6aef5182e040b82e30555f4af5ad6803c

From 56d902ef69d5416cbc9550750dc9a88755b45f98 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 2 Nov 2017 17:06:14 -0700
Subject: [PATCH 315/617] updating C-software

---
 C-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/C-software b/C-software
index 626c889fe..c94a73666 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 626c889fec185ee2b9d505dc379b0f2781288acd
+Subproject commit c94a736665e48ef416ba1865a230c759b12e76b9

From 38ac75a081be8b367d99dbe13551c4ad418b49c2 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 2 Nov 2017 17:12:08 -0700
Subject: [PATCH 316/617] more update C software; using C-ordered X for
 debiased lasso

---
 C-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/C-software b/C-software
index c94a73666..158c64d8d 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit c94a736665e48ef416ba1865a230c759b12e76b9
+Subproject commit 158c64d8d81fbcf434869c0c68f5bb7a4a9cdf5a

From eab09b3d1361ff539cde7d456a57e81128b30ac7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 2 Nov 2017 17:12:22 -0700
Subject: [PATCH 317/617] more update C software; using C-ordered X for
 debiased lasso

---
 selection/algorithms/debiased_lasso.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 6a04d8634..c270b233a 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -56,10 +56,6 @@ def _find_row_approx_inverse_X(X, j, delta,
                                max_active=None,
                                ):
 
-    # need a copy as column major ordering for C code
-
-    X_F = np.asfortranarray(X)
-
     n, p = X.shape
     theta = np.zeros(p)
     theta_old = np.zeros(p)
@@ -80,7 +76,7 @@ def _find_row_approx_inverse_X(X, j, delta,
     if max_active is None:
         max_active = max(50, 0.3 * n)
 
-    solve_wide_(X_F,
+    solve_wide_(X,
                 X_theta,
                 linear_func,
                 nndef_diag,

From db6b3fd2d16eade8e6b4306cea48c365f092504c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 6 Nov 2017 18:10:41 -0800
Subject: [PATCH 318/617] updating C-software

---
 C-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/C-software b/C-software
index 158c64d8d..ec6a954d6 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 158c64d8d81fbcf434869c0c68f5bb7a4a9cdf5a
+Subproject commit ec6a954d6b335439115e961abde91fa5a07a3669

From 51d4cec3709fadd2beac58b7d3e71752fc51376f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j9t.SUNet>
Date: Tue, 7 Nov 2017 09:11:06 -0800
Subject: [PATCH 319/617] new directory

---
 selection/adjusted_MLE/__init__.py       | 0
 selection/adjusted_MLE/tests/__init__.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 selection/adjusted_MLE/__init__.py
 create mode 100644 selection/adjusted_MLE/tests/__init__.py

diff --git a/selection/adjusted_MLE/__init__.py b/selection/adjusted_MLE/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/adjusted_MLE/tests/__init__.py b/selection/adjusted_MLE/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb

From a9ba4708f5fb889ef3a7aac9f7945493ca701f8c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j9t.SUNet>
Date: Tue, 7 Nov 2017 10:45:42 -0800
Subject: [PATCH 320/617] added test for pivot based on exact MLE

---
 selection/adjusted_MLE/tests/exact_MLE.py | 46 +++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 selection/adjusted_MLE/tests/exact_MLE.py

diff --git a/selection/adjusted_MLE/tests/exact_MLE.py b/selection/adjusted_MLE/tests/exact_MLE.py
new file mode 100644
index 000000000..d2fb991a4
--- /dev/null
+++ b/selection/adjusted_MLE/tests/exact_MLE.py
@@ -0,0 +1,46 @@
+import numpy as np
+from scipy.stats import norm as ndist
+
+def grad_CGF(mu, randomization_scale = 0.5, threshold = 2):
+    grad = mu + (1. / np.sqrt(1. + randomization_scale ** 2.)) * (ndist.pdf((threshold -mu)
+                                                                          / (np.sqrt(1.+randomization_scale ** 2.)))
+                                                                / (1.-ndist.cdf(( threshold -mu) /(np.sqrt(1.+randomization_scale ** 2.)))))
+    return grad
+
+def fisher_info(mu, randomization_scale = 0.5, threshold = 2):
+    hessian = 1.- (1./(1.+ randomization_scale**2.))*(((mu-threshold)/(np.sqrt(1.+randomization_scale**2.)))
+                                                    *ndist.pdf((threshold-mu)/(np.sqrt(1.+randomization_scale**2.)))
+                                                    / (1.-ndist.cdf((threshold-mu)/(np.sqrt(1.+randomization_scale**2.)))))
+    - (1./(1.+randomization_scale**2.))*((ndist.pdf((threshold-mu)/(np.sqrt(1.+randomization_scale**2.)))
+                                                     / (1.-ndist.cdf((threshold-mu)/(np.sqrt(1.+randomization_scale**2.)))))**2)
+
+    return hessian
+
+
+def simulate_truncated(mu, randomization_scale = 0.5, threshold = 2):
+    while True:
+        Z = np.random.normal(mu, 1, 1)
+        W = np.random.normal(0, randomization_scale, 1)
+        if (Z + W > threshold):
+            return Z
+
+
+def test_pivot(mu, randomization_scale = 0.5, threshold = 2):
+    Z = np.array([simulate_truncated(mu, randomization_scale = randomization_scale, threshold=threshold) for _ in range(25000)])
+
+    mu_seq = np.linspace(-7., 6, num = 2600)
+    grad_partition = np.zeros(mu_seq.shape[0])
+    for i in range(mu_seq.shape[0]):
+        grad_partition[i] = grad_CGF(mu_seq[i])
+
+    pivot = []
+    exact_MLE = []
+    sd_MLE = 1/ np.sqrt(fisher_info(mu))
+    for k in range(Z.shape[0]):
+        MLE = mu_seq[np.argmin(np.abs(grad_partition - Z[k]))]
+        exact_MLE.append(MLE)
+        pivot.append((MLE-mu)/sd_MLE)
+
+    return np.asarray(pivot), np.asarray(exact_MLE)
+
+print(test_pivot(1))
\ No newline at end of file

From 1c538aa09fed2c5ccf37fc39135d4d1fd2cf85dd Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j9t.SUNet>
Date: Tue, 7 Nov 2017 10:55:28 -0800
Subject: [PATCH 321/617] debugged hessian

---
 selection/adjusted_MLE/tests/exact_MLE.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/selection/adjusted_MLE/tests/exact_MLE.py b/selection/adjusted_MLE/tests/exact_MLE.py
index d2fb991a4..b7561637e 100644
--- a/selection/adjusted_MLE/tests/exact_MLE.py
+++ b/selection/adjusted_MLE/tests/exact_MLE.py
@@ -8,11 +8,10 @@ def grad_CGF(mu, randomization_scale = 0.5, threshold = 2):
     return grad
 
 def fisher_info(mu, randomization_scale = 0.5, threshold = 2):
-    hessian = 1.- (1./(1.+ randomization_scale**2.))*(((mu-threshold)/(np.sqrt(1.+randomization_scale**2.)))
-                                                    *ndist.pdf((threshold-mu)/(np.sqrt(1.+randomization_scale**2.)))
-                                                    / (1.-ndist.cdf((threshold-mu)/(np.sqrt(1.+randomization_scale**2.)))))
-    - (1./(1.+randomization_scale**2.))*((ndist.pdf((threshold-mu)/(np.sqrt(1.+randomization_scale**2.)))
-                                                     / (1.-ndist.cdf((threshold-mu)/(np.sqrt(1.+randomization_scale**2.)))))**2)
+    variance = 1.+randomization_scale**2.
+    hessian = 1.- (1./variance)*((((mu-threshold)/(np.sqrt(variance)))*ndist.pdf((threshold-mu)/(np.sqrt(variance))))/(1.-ndist.cdf((threshold-mu)/(np.sqrt(variance)))))\
+              - (1./(variance))*((ndist.pdf((threshold-mu)/(np.sqrt(variance)))
+                                                     / (1.-ndist.cdf((threshold-mu)/(np.sqrt(variance)))))**2)
 
     return hessian
 
@@ -43,4 +42,6 @@ def test_pivot(mu, randomization_scale = 0.5, threshold = 2):
 
     return np.asarray(pivot), np.asarray(exact_MLE)
 
-print(test_pivot(1))
\ No newline at end of file
+#print("grad cgf check", grad_CGF(2))
+#print("hessian cgf check", fisher_info(0))
+#print(test_pivot(1))
\ No newline at end of file

From 7d55a57047b95a00cdef9930842fd0019c1409ec Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j9t.SUNet>
Date: Tue, 7 Nov 2017 13:24:24 -0800
Subject: [PATCH 322/617] added approx mle in simple case

---
 selection/adjusted_MLE/tests/approx_MLE.py | 77 ++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 selection/adjusted_MLE/tests/approx_MLE.py

diff --git a/selection/adjusted_MLE/tests/approx_MLE.py b/selection/adjusted_MLE/tests/approx_MLE.py
new file mode 100644
index 000000000..c88034ef0
--- /dev/null
+++ b/selection/adjusted_MLE/tests/approx_MLE.py
@@ -0,0 +1,77 @@
+import numpy as np
+from scipy.stats import norm as ndist
+from scipy.optimize import minimize
+
+def log_barrier(u, barrier_scale, threshold = 2.):
+
+    BIG = 10 ** 10
+    violation = u-threshold<0.
+    return np.log(1 + (np.sqrt(barrier_scale)/ (u-threshold))) + violation* BIG
+
+def grad_log_barrier(u, barrier_scale, threshold = 2.):
+    return 1./(u-threshold + np.sqrt(barrier_scale)) - 1./(u-threshold)
+
+def grad_log_hessian(u, barrier_scale, threshold = 2.):
+    return -1. / ((u - threshold + np.sqrt(barrier_scale))**2.) + 1. / ((u - threshold)** 2.)
+
+def approx_grad_cgf(mu, randomization_scale = 0.5, threshold = 2, nstep= 50, tol=1.e-10):
+
+    variance = 1 + randomization_scale ** 2.
+    objective = lambda u: -u*(mu/variance) + (u ** 2.)/(2.* variance)+ log_barrier(u, variance)
+    gradient = lambda u: -(mu/variance) + u/variance + grad_log_barrier(u, variance)
+    hessian = lambda u: 1/variance + grad_log_hessian(u, variance)
+
+    current_value = np.inf
+    initial = threshold +1.
+    current = initial
+    step = 1
+
+    for itercount in range(nstep):
+        newton_step = (gradient(current)/(hessian(current)))
+
+        # make sure proposal is feasible
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * newton_step
+            failing = (proposal < threshold)
+            if not failing.sum():
+                break
+            step *= 0.5 ** failing
+
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+
+        # make sure proposal is a descent
+
+        while True:
+            proposal = current - step * newton_step
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
+
+    value = objective(current)
+    return current/variance + ((randomization_scale** 2.)/(1+randomization_scale**2.))*mu, value, current
+
+def approx_fisher_info(mu, randomization_scale=0.5, threshold=2):
+
+    variance = 1 + randomization_scale ** 2.
+    minimizer = approx_grad_cgf(mu)[2]
+    return (1./ variance**2.)* (1./((1./variance) + grad_log_hessian(minimizer, variance)))+ ((randomization_scale ** 2.)/variance)
+
+#print("grad cgf check", approx_grad_cgf(-1)[0])
+print("fisher info check", approx_fisher_info(-2))
\ No newline at end of file

From aefa2121e7db871080b7e416cca8468a921e2674 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j9t.SUNet>
Date: Tue, 7 Nov 2017 13:28:17 -0800
Subject: [PATCH 323/617] test for pivots based on approx MLE

---
 selection/adjusted_MLE/tests/approx_MLE.py | 32 ++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/approx_MLE.py b/selection/adjusted_MLE/tests/approx_MLE.py
index c88034ef0..078866c8c 100644
--- a/selection/adjusted_MLE/tests/approx_MLE.py
+++ b/selection/adjusted_MLE/tests/approx_MLE.py
@@ -73,5 +73,33 @@ def approx_fisher_info(mu, randomization_scale=0.5, threshold=2):
     minimizer = approx_grad_cgf(mu)[2]
     return (1./ variance**2.)* (1./((1./variance) + grad_log_hessian(minimizer, variance)))+ ((randomization_scale ** 2.)/variance)
 
-#print("grad cgf check", approx_grad_cgf(-1)[0])
-print("fisher info check", approx_fisher_info(-2))
\ No newline at end of file
+def simulate_truncated(mu, randomization_scale = 0.5, threshold = 2):
+    while True:
+        Z = np.random.normal(mu, 1, 1)
+        W = np.random.normal(0, randomization_scale, 1)
+        if (Z + W > threshold):
+            return Z
+
+def test_pivot(mu, randomization_scale=0.5, threshold=2):
+    Z = np.array([simulate_truncated(mu, randomization_scale=randomization_scale, threshold=threshold) for _ in
+                  range(25000)])
+
+    mu_seq = np.linspace(-7., 6, num=2600)
+    grad_partition = np.zeros(mu_seq.shape[0])
+    for i in range(mu_seq.shape[0]):
+        grad_partition[i] = approx_grad_cgf(mu_seq[i])[0]
+
+    pivot = []
+    approx_MLE = []
+    sd_MLE = 1 / np.sqrt(approx_fisher_info(mu))
+    for k in range(Z.shape[0]):
+        MLE = mu_seq[np.argmin(np.abs(grad_partition - Z[k]))]
+        approx_MLE.append(MLE)
+        pivot.append((MLE - mu) / sd_MLE)
+
+    return np.asarray(pivot), np.asarray(approx_MLE)
+
+print(test_pivot(1))
+
+    #print("grad cgf check", approx_grad_cgf(-1)[0])
+#print("fisher info check", approx_fisher_info(-2))
\ No newline at end of file

From a1f655073a43317fa5308afd2867ad62da3d9b85 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j9t.SUNet>
Date: Tue, 7 Nov 2017 15:02:45 -0800
Subject: [PATCH 324/617] added test for computing mle--marginalizes

---
 selection/adjusted_MLE/tests/mle_LASSO.py | 61 +++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 selection/adjusted_MLE/tests/mle_LASSO.py

diff --git a/selection/adjusted_MLE/tests/mle_LASSO.py b/selection/adjusted_MLE/tests/mle_LASSO.py
new file mode 100644
index 000000000..1722257e3
--- /dev/null
+++ b/selection/adjusted_MLE/tests/mle_LASSO.py
@@ -0,0 +1,61 @@
+from __future__ import print_function
+import sys
+
+import numpy as np
+import regreg.api as rr
+from selection.tests.instance import gaussian_instance
+from selection.approx_ci.ci_approx_density import approximate_conditional_density
+from selection.approx_ci.selection_map import M_estimator_map
+
+def test_approximate_MLE(X,
+                         y,
+                         true_mean,
+                         sigma,
+                         seed_n = 0,
+                         lam_frac = 1.,
+                         loss='gaussian',
+                         randomization_scale = 1.):
+    from selection.api import randomization
+
+    n, p = X.shape
+    np.random.seed(seed_n)
+    if loss == "gaussian":
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+        loss = rr.glm.gaussian(X, y)
+
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    randomization = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    M_est = M_estimator_map(loss, epsilon, penalty, randomization, randomization_scale=randomization_scale)
+
+    M_est.solve_approx()
+    active = M_est._overall
+    active_set = np.asarray([i for i in range(p) if active[i]])
+    nactive = np.sum(active)
+    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
+    sys.stderr.write("Active set selected by lasso" + str(active_set) + "\n")
+    sys.stderr.write("Observed target" + str(M_est.target_observed) + "\n")
+
+    ci = approximate_conditional_density(M_est)
+    ci.solve_approx()
+    sel_MLE = np.zeros(nactive)
+
+    for j in range(nactive):
+        sel_MLE[j] = ci.approx_MLE_solver(j, step=1, nstep=150)[0]
+
+    return sel_MLE
+
+X, y, beta, nonzero, sigma = gaussian_instance(n=100, p=200, s=5, rho=0., signal=3., sigma=1.)
+true_mean = X.dot(beta)
+test = test_approximate_MLE(X,
+                            y,
+                            true_mean,
+                            sigma,
+                            seed_n = 0,
+                            lam_frac = 1.,
+                            loss='gaussian')
+print(test)
\ No newline at end of file

From 29a41c18d76b14a3502ffdae7614136f0dce6abc Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Tue, 7 Nov 2017 22:31:00 -0800
Subject: [PATCH 325/617] commit before switch

---
 selection/adjusted_MLE/tests/mle_LASSO.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/adjusted_MLE/tests/mle_LASSO.py b/selection/adjusted_MLE/tests/mle_LASSO.py
index 1722257e3..0090b2f80 100644
--- a/selection/adjusted_MLE/tests/mle_LASSO.py
+++ b/selection/adjusted_MLE/tests/mle_LASSO.py
@@ -49,7 +49,7 @@ def test_approximate_MLE(X,
 
     return sel_MLE
 
-X, y, beta, nonzero, sigma = gaussian_instance(n=100, p=200, s=5, rho=0., signal=3., sigma=1.)
+X, y, beta, nonzero, sigma = gaussian_instance(n=100, p=100, s=2, rho=0., signal=3., sigma=1.)
 true_mean = X.dot(beta)
 test = test_approximate_MLE(X,
                             y,

From 665557a97dd1243afc9ff1fc1e74a8fa1b3f99b7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c06f.SUNet>
Date: Thu, 9 Nov 2017 09:16:19 -0800
Subject: [PATCH 326/617] started function for selective MLE/UMVU

---
 selection/adjusted_MLE/selective_MLE.py  | 94 ++++++++++++++++++++++++
 selection/adjusted_MLE/tests/test_MLE.py | 31 ++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 selection/adjusted_MLE/selective_MLE.py
 create mode 100644 selection/adjusted_MLE/tests/test_MLE.py

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
new file mode 100644
index 000000000..710113d3a
--- /dev/null
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -0,0 +1,94 @@
+import numpy as np
+import regreg.api as rr
+from selection.randomized.M_estimator import M_estimator
+
+class M_estimator_map(M_estimator):
+
+    def __init__(self, loss, epsilon, penalty, randomization, randomization_scale = 1.):
+        M_estimator.__init__(self, loss, epsilon, penalty, randomization)
+        self.randomizer = randomization
+        self.randomization_scale = randomization_scale
+
+    def solve_approx(self):
+        self.solve()
+        (_opt_linear_term, _opt_affine_term) = self.opt_transform
+        self._opt_linear_term = np.concatenate(
+            (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
+        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]), 0)
+        self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
+
+        (_score_linear_term, _) = self.score_transform
+        self._score_linear_term = np.concatenate(
+            (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
+        self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
+        self.feasible_point = np.abs(self.initial_soln[self._overall])
+        nactive = self._overall.sum()
+        self.inactive_subgrad = self.observed_opt_state[nactive:]
+
+        lagrange = []
+        for key, value in self.penalty.weights.iteritems():
+            lagrange.append(value)
+        lagrange = np.asarray(lagrange)
+        self.inactive_lagrange = lagrange[~self._overall]
+
+        X, _ = self.loss.data
+        n, p = X.shape
+        self.p = p
+
+
+        score_cov = np.zeros((p, p))
+        X_active_inv = np.linalg.inv(X[:,self._overall].T.dot(X[:,self._overall]))
+        projection_perp = np.identity(n) - X[:,self._overall].dot(X_active_inv).dot( X[:,self._overall].T)
+        score_cov[:nactive, :nactive] = X_active_inv
+        score_cov[nactive:, nactive:] = X[:,~self._overall].T.dot(projection_perp).dot(X[:,~self._overall])
+
+        self.score_target_cov = score_cov[:, :nactive]
+        self.target_cov = score_cov[:nactive, :nactive]
+        self.target_observed = self.observed_internal_state[:nactive]
+        self.observed_score_state = self.observed_internal_state
+        self.nactive = nactive
+
+        self.B_active = self._opt_linear_term[:nactive, :nactive]
+        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
+        self.B = np.vstack([self.B_active, self.B_inactive])
+
+
+    def setup_map(self, j):
+
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+
+        self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
+        self.offset_inactive = self.null_statistic[self.nactive:]
+
+class selective_MLE(rr.smooth_atom):
+    def __init__(self,
+                 map,
+                 coef=1.,
+                 offset=None,
+                 quadratic=None):
+
+        self.map = map
+        self.randomizer_cov = map.randomizer.precision
+        self.target_observed = self.map.target_observed
+        self.nactive = self.target_observed.shape[0]
+        self.target_cov = self.map.target_cov
+
+    def solve_Gaussian_density(self, j):
+
+        self.map.setup_map(j)
+        inverse_cov = np.zeros((1+self.nactive, 1+self.nactive))
+        inverse_cov[0,0] = self.map.A.T.dot(self.map.A)/ self.target_cov[j,j]
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
new file mode 100644
index 000000000..27354c36a
--- /dev/null
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -0,0 +1,31 @@
+from __future__ import print_function
+import numpy as np
+
+import regreg.api as rr
+from selection.tests.instance import gaussian_instance
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import M_estimator_map
+
+def test(n=100, p=50, s=2, signal=3., seed_n = 0, lam_frac=1., randomization_scale=1.):
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+    n, p = X.shape
+    np.random.seed(seed_n)
+
+    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    loss = rr.glm.gaussian(X, y)
+
+    epsilon = 1. / np.sqrt(n)
+
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
+
+    M_est.solve_approx()
+    active = M_est._overall
+    active_set = np.asarray([i for i in range(p) if active[i]])
+    nactive = np.sum(active)
+
+test()
\ No newline at end of file

From 8af6d358b50f8d867388769f09908926ae69e8f5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a222273.SUNet>
Date: Thu, 9 Nov 2017 10:22:29 -0800
Subject: [PATCH 327/617] added Gaussian parameters

---
 selection/adjusted_MLE/selective_MLE.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 710113d3a..93f9d6e26 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -69,7 +69,7 @@ def __init__(self,
                  quadratic=None):
 
         self.map = map
-        self.randomizer_cov = map.randomizer.precision
+        self.randomizer_precision = map.randomizer.precision
         self.target_observed = self.map.target_observed
         self.nactive = self.target_observed.shape[0]
         self.target_cov = self.map.target_cov
@@ -78,7 +78,25 @@ def solve_Gaussian_density(self, j):
 
         self.map.setup_map(j)
         inverse_cov = np.zeros((1+self.nactive, 1+self.nactive))
-        inverse_cov[0,0] = self.map.A.T.dot(self.map.A)/ self.target_cov[j,j]
+        inverse_cov[0,0] = self.map.A.T.dot(self.randomizer_precision).dot(self.map.A) + 1./self.target_cov[j,j]
+        inverse_cov[0,0:] = self.map.A.T.dot(self.randomizer_precision).dot(self.map.B)
+        inverse_cov[0:,0] = self.map.B.T.dot(self.randomizer_precision).self.map.A
+        inverse_cov[0:,0:] = self.map.B.T.dot(self.randomizer_precision).self.map.B
+        cov = np.linalg.inv(inverse_cov)
+
+        self.L = cov[0,0:].dot(np.linalg.inv(cov[0:,0:]))
+        self.M_1 = (1./inverse_cov[0,0])*(1./self.target_cov[j,j])
+        self.M_2 = (1./inverse_cov[0,0]).dot(self.map.A.T).dot(self.randomizer_precision)
+
+        self.conditional_par = inverse_cov[0:,0:].dot(cov[0:,0]).dot((1./cov[0,0])* self.target_observed[j]) + \
+                               self.B.T(self.randomizer_precision).dot(self.map.null_statistic + self.map.inactive_subgrad)
+
+    def solve_UMVU(self, j):
+
+
+
+
+
 
 
 

From f5fd8171316ddc875655e31960bd847cb9e76dd9 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a222273.SUNet>
Date: Thu, 9 Nov 2017 10:43:46 -0800
Subject: [PATCH 328/617] solver for UMVU

---
 selection/adjusted_MLE/selective_MLE.py | 70 ++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 7 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 93f9d6e26..990bbe173 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -25,6 +25,7 @@ def solve_approx(self):
         nactive = self._overall.sum()
         self.inactive_subgrad = self.observed_opt_state[nactive:]
 
+
         lagrange = []
         for key, value in self.penalty.weights.iteritems():
             lagrange.append(value)
@@ -61,12 +62,9 @@ def setup_map(self, j):
         self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
         self.offset_inactive = self.null_statistic[self.nactive:]
 
-class selective_MLE(rr.smooth_atom):
+class selective_MLE():
     def __init__(self,
-                 map,
-                 coef=1.,
-                 offset=None,
-                 quadratic=None):
+                 map):
 
         self.map = map
         self.randomizer_precision = map.randomizer.precision
@@ -74,6 +72,8 @@ def __init__(self,
         self.nactive = self.target_observed.shape[0]
         self.target_cov = self.map.target_cov
 
+        initial = self.map.feasible_point
+
     def solve_Gaussian_density(self, j):
 
         self.map.setup_map(j)
@@ -87,11 +87,67 @@ def solve_Gaussian_density(self, j):
         self.L = cov[0,0:].dot(np.linalg.inv(cov[0:,0:]))
         self.M_1 = (1./inverse_cov[0,0])*(1./self.target_cov[j,j])
         self.M_2 = (1./inverse_cov[0,0]).dot(self.map.A.T).dot(self.randomizer_precision)
+        self.inactive_subgrad = np.zeros(self.map.p)
+        self.inactive_subgrad[self.nactive:] = self.map.inactive_subgrad
+        self.conditioned_value = self.map.null_statistic + self.map.inactive_subgrad
 
         self.conditional_par = inverse_cov[0:,0:].dot(cov[0:,0]).dot((1./cov[0,0])* self.target_observed[j]) + \
-                               self.B.T(self.randomizer_precision).dot(self.map.null_statistic + self.map.inactive_subgrad)
+                               self.B.T(self.randomizer_precision).dot(self.conditioned_value)
+        self.conditional_var = inverse_cov[0:,0:]
+
+    def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
+
+        objective = lambda u: u.T.dot(self.conditional_par) - u.T.dot(self.conditional_var).dot(u)/2. - np.log(1.+ 1./u)
+        grad = lambda u: self.conditional_par - self.conditional_var.dot(u) - 1./(1.+ u) + 1./u
+
+        for itercount in range(nstep):
+            newton_step = grad(current)
+
+            # make sure proposal is feasible
+
+            count = 0
+            while True:
+                count += 1
+                proposal = current - step * newton_step
+                if np.all(proposal > 0):
+                    break
+                step *= 0.5
+                if count >= 40:
+                    raise ValueError('not finding a feasible point')
+
+            # make sure proposal is a descent
+
+            count = 0
+            while True:
+                proposal = current - step * newton_step
+                proposed_value = objective(proposal)
+                # print(current_value, proposed_value, 'minimize')
+                if proposed_value <= current_value:
+                    break
+                step *= 0.5
+
+            # stop if relative decrease is small
+
+            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+                current = proposal
+                current_value = proposed_value
+                break
+
+            current = proposal
+            current_value = proposed_value
+
+            if itercount % 4 == 0:
+                step *= 2
+
+                # print('iter', itercount)
+        value = objective(current)
+        return -(1./self.M_1)*self.L.dot(current)+ (1./self.M_1)*(self.target_observed[j]- self.M_2.dot(self.conditioned_value)), \
+               value
+
+
+
+
 
-    def solve_UMVU(self, j):
 
 
 

From 04f4fa7f3d1979ebfeb51f787fc4f35d9977b48f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a222273.SUNet>
Date: Thu, 9 Nov 2017 10:48:42 -0800
Subject: [PATCH 329/617] removed solver

---
 selection/adjusted_MLE/selective_MLE.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 990bbe173..827c98ce1 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -74,7 +74,7 @@ def __init__(self,
 
         initial = self.map.feasible_point
 
-    def solve_Gaussian_density(self, j):
+    def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
 
         self.map.setup_map(j)
         inverse_cov = np.zeros((1+self.nactive, 1+self.nactive))
@@ -95,8 +95,6 @@ def solve_Gaussian_density(self, j):
                                self.B.T(self.randomizer_precision).dot(self.conditioned_value)
         self.conditional_var = inverse_cov[0:,0:]
 
-    def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
-
         objective = lambda u: u.T.dot(self.conditional_par) - u.T.dot(self.conditional_var).dot(u)/2. - np.log(1.+ 1./u)
         grad = lambda u: self.conditional_par - self.conditional_var.dot(u) - 1./(1.+ u) + 1./u
 

From fbf6998d0f3b1131a76162ea0b51a92100260c44 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN52eckv.SUNet>
Date: Thu, 9 Nov 2017 11:34:30 -0800
Subject: [PATCH 330/617] removed indexing bugs

---
 selection/adjusted_MLE/selective_MLE.py  | 20 ++++++++++----------
 selection/adjusted_MLE/tests/test_MLE.py |  9 +++++++--
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 827c98ce1..e47f57c9b 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -67,7 +67,7 @@ def __init__(self,
                  map):
 
         self.map = map
-        self.randomizer_precision = map.randomizer.precision
+        self.randomizer_precision = (1./map.randomization_scale)* np.identity(self.map.p)
         self.target_observed = self.map.target_observed
         self.nactive = self.target_observed.shape[0]
         self.target_cov = self.map.target_cov
@@ -79,21 +79,21 @@ def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
         self.map.setup_map(j)
         inverse_cov = np.zeros((1+self.nactive, 1+self.nactive))
         inverse_cov[0,0] = self.map.A.T.dot(self.randomizer_precision).dot(self.map.A) + 1./self.target_cov[j,j]
-        inverse_cov[0,0:] = self.map.A.T.dot(self.randomizer_precision).dot(self.map.B)
-        inverse_cov[0:,0] = self.map.B.T.dot(self.randomizer_precision).self.map.A
-        inverse_cov[0:,0:] = self.map.B.T.dot(self.randomizer_precision).self.map.B
+        inverse_cov[0,1:] = self.map.A.T.dot(self.randomizer_precision).dot(self.map.B)
+        inverse_cov[1:,0] = self.map.B.T.dot(self.randomizer_precision).dot(self.map.A)
+        inverse_cov[1:,1:] = self.map.B.T.dot(self.randomizer_precision).dot(self.map.B)
         cov = np.linalg.inv(inverse_cov)
 
-        self.L = cov[0,0:].dot(np.linalg.inv(cov[0:,0:]))
+        self.L = cov[0,1:].dot(np.linalg.inv(cov[1:,1:]))
         self.M_1 = (1./inverse_cov[0,0])*(1./self.target_cov[j,j])
-        self.M_2 = (1./inverse_cov[0,0]).dot(self.map.A.T).dot(self.randomizer_precision)
+        self.M_2 = (1./inverse_cov[0,0])*(self.map.A.T).dot(self.randomizer_precision)
         self.inactive_subgrad = np.zeros(self.map.p)
         self.inactive_subgrad[self.nactive:] = self.map.inactive_subgrad
-        self.conditioned_value = self.map.null_statistic + self.map.inactive_subgrad
 
-        self.conditional_par = inverse_cov[0:,0:].dot(cov[0:,0]).dot((1./cov[0,0])* self.target_observed[j]) + \
-                               self.B.T(self.randomizer_precision).dot(self.conditioned_value)
-        self.conditional_var = inverse_cov[0:,0:]
+        self.conditioned_value = self.map.null_statistic + self.inactive_subgrad
+        self.conditional_par = inverse_cov[1:,1:].dot(cov[1:,0]).dot((1./cov[0,0])* self.target_observed[j]) + \
+                               self.map.B.T.dot(self.randomizer_precision).dot(self.conditioned_value)
+        self.conditional_var = inverse_cov[1:,1:]
 
         objective = lambda u: u.T.dot(self.conditional_par) - u.T.dot(self.conditional_var).dot(u)/2. - np.log(1.+ 1./u)
         grad = lambda u: self.conditional_par - self.conditional_var.dot(u) - 1./(1.+ u) + 1./u
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 27354c36a..2a902a441 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -1,10 +1,10 @@
 from __future__ import print_function
-import numpy as np
+import numpy as np, sys
 
 import regreg.api as rr
 from selection.tests.instance import gaussian_instance
 from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, selective_MLE
 
 def test(n=100, p=50, s=2, signal=3., seed_n = 0, lam_frac=1., randomization_scale=1.):
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
@@ -21,11 +21,16 @@ def test(n=100, p=50, s=2, signal=3., seed_n = 0, lam_frac=1., randomization_sca
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
     randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    #randomizer = randomization.gaussian(np.identity(p))
     M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
 
     M_est.solve_approx()
     active = M_est._overall
     active_set = np.asarray([i for i in range(p) if active[i]])
     nactive = np.sum(active)
+    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
+
+    solve_mle = selective_MLE(M_est)
+    mle = solve_mle.solve_UMVU(0)
 
 test()
\ No newline at end of file

From a397e847f5494c5fd3c32dc169f47b3b43e61db8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN52eckv.SUNet>
Date: Thu, 9 Nov 2017 11:44:03 -0800
Subject: [PATCH 331/617] selective UMVU working

---
 selection/adjusted_MLE/selective_MLE.py  | 8 +++++---
 selection/adjusted_MLE/tests/test_MLE.py | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index e47f57c9b..a23620b92 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -72,8 +72,6 @@ def __init__(self,
         self.nactive = self.target_observed.shape[0]
         self.target_cov = self.map.target_cov
 
-        initial = self.map.feasible_point
-
     def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
 
         self.map.setup_map(j)
@@ -95,9 +93,12 @@ def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
                                self.map.B.T.dot(self.randomizer_precision).dot(self.conditioned_value)
         self.conditional_var = inverse_cov[1:,1:]
 
-        objective = lambda u: u.T.dot(self.conditional_par) - u.T.dot(self.conditional_var).dot(u)/2. - np.log(1.+ 1./u)
+        objective = lambda u: u.T.dot(self.conditional_par) - u.T.dot(self.conditional_var).dot(u)/2. - np.log(1.+ 1./u).sum()
         grad = lambda u: self.conditional_par - self.conditional_var.dot(u) - 1./(1.+ u) + 1./u
 
+        current = self.map.feasible_point
+        current_value = np.inf
+
         for itercount in range(nstep):
             newton_step = grad(current)
 
@@ -119,6 +120,7 @@ def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
             while True:
                 proposal = current - step * newton_step
                 proposed_value = objective(proposal)
+                #print("proposed value", proposed_value, proposal)
                 # print(current_value, proposed_value, 'minimize')
                 if proposed_value <= current_value:
                     break
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 2a902a441..cb9d24bf2 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -32,5 +32,6 @@ def test(n=100, p=50, s=2, signal=3., seed_n = 0, lam_frac=1., randomization_sca
 
     solve_mle = selective_MLE(M_est)
     mle = solve_mle.solve_UMVU(0)
+    print("mle", mle)
 
 test()
\ No newline at end of file

From 30819ad86f5b908d90536e0110946f361092977b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN52eckv.SUNet>
Date: Thu, 9 Nov 2017 11:48:05 -0800
Subject: [PATCH 332/617] updated test

---
 selection/adjusted_MLE/tests/test_MLE.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index cb9d24bf2..a726ce84e 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -26,12 +26,11 @@ def test(n=100, p=50, s=2, signal=3., seed_n = 0, lam_frac=1., randomization_sca
 
     M_est.solve_approx()
     active = M_est._overall
-    active_set = np.asarray([i for i in range(p) if active[i]])
     nactive = np.sum(active)
     sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
 
     solve_mle = selective_MLE(M_est)
     mle = solve_mle.solve_UMVU(0)
-    print("mle", mle)
+    print("mle", mle, M_est.target_observed[0])
 
 test()
\ No newline at end of file

From c24441e2585c80e906194458eaad392d2b7fd9d8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN52eckv.SUNet>
Date: Thu, 9 Nov 2017 11:59:19 -0800
Subject: [PATCH 333/617] corrected coefficient

---
 selection/adjusted_MLE/selective_MLE.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index a23620b92..e77342f05 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -141,7 +141,7 @@ def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
 
                 # print('iter', itercount)
         value = objective(current)
-        return -(1./self.M_1)*self.L.dot(current)+ (1./self.M_1)*(self.target_observed[j]- self.M_2.dot(self.conditioned_value)), \
+        return -(1./self.M_1)*self.L.dot(current)+ (1./self.M_1)*(self.target_observed[j]- -(1./self.M_1)*self.M_2.dot(self.conditioned_value)), \
                value
 
 

From c782473b8ed824aa44fbcadb26cbcd7880ec8089 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j2n.SUNet>
Date: Thu, 9 Nov 2017 13:44:21 -0800
Subject: [PATCH 334/617] added offset term in map

---
 selection/adjusted_MLE/selective_MLE.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index e77342f05..75ea52541 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -88,7 +88,7 @@ def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
         self.inactive_subgrad = np.zeros(self.map.p)
         self.inactive_subgrad[self.nactive:] = self.map.inactive_subgrad
 
-        self.conditioned_value = self.map.null_statistic + self.inactive_subgrad
+        self.conditioned_value = self.map.null_statistic + self.inactive_subgrad + self.map._opt_affine_term
         self.conditional_par = inverse_cov[1:,1:].dot(cov[1:,0]).dot((1./cov[0,0])* self.target_observed[j]) + \
                                self.map.B.T.dot(self.randomizer_precision).dot(self.conditioned_value)
         self.conditional_var = inverse_cov[1:,1:]

From 1508ba7d1f3da97b903bcd516a2fa4ef21c39a46 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j2n.SUNet>
Date: Thu, 9 Nov 2017 13:53:45 -0800
Subject: [PATCH 335/617] changed sign

---
 selection/adjusted_MLE/selective_MLE.py  | 2 +-
 selection/adjusted_MLE/tests/test_MLE.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 75ea52541..5f3c0e3ae 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -141,7 +141,7 @@ def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
 
                 # print('iter', itercount)
         value = objective(current)
-        return -(1./self.M_1)*self.L.dot(current)+ (1./self.M_1)*(self.target_observed[j]- -(1./self.M_1)*self.M_2.dot(self.conditioned_value)), \
+        return -(1./self.M_1)*self.L.dot(current)+ (1./self.M_1)*(self.target_observed[j]- (1./self.M_1)*self.M_2.dot(self.conditioned_value)), \
                value
 
 
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index a726ce84e..619b97dcd 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -6,7 +6,7 @@
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, selective_MLE
 
-def test(n=100, p=50, s=2, signal=3., seed_n = 0, lam_frac=1., randomization_scale=1.):
+def test(n=200, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
     np.random.seed(seed_n)
@@ -31,6 +31,6 @@ def test(n=100, p=50, s=2, signal=3., seed_n = 0, lam_frac=1., randomization_sca
 
     solve_mle = selective_MLE(M_est)
     mle = solve_mle.solve_UMVU(0)
-    print("mle", mle, M_est.target_observed[0])
+    print("mle", mle[0], M_est.target_observed[0])
 
 test()
\ No newline at end of file

From dda25124063d43ffb5bd6f2df6dd2453aab6392b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j2n.SUNet>
Date: Thu, 9 Nov 2017 14:05:51 -0800
Subject: [PATCH 336/617] return sel MLE and unadjusted MLE

---
 selection/adjusted_MLE/tests/test_MLE.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 619b97dcd..06e7a9240 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -6,7 +6,7 @@
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, selective_MLE
 
-def test(n=200, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
+def test(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
     np.random.seed(seed_n)
@@ -30,7 +30,10 @@ def test(n=200, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_sca
     sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
 
     solve_mle = selective_MLE(M_est)
-    mle = solve_mle.solve_UMVU(0)
-    print("mle", mle[0], M_est.target_observed[0])
+    mle = np.zeros(nactive)
+    for j in range(nactive):
+        mle[j] = solve_mle.solve_UMVU(j)[0]
 
-test()
\ No newline at end of file
+    return np.transpose(np.vstack([mle, M_est.target_observed]))
+
+print(test())
\ No newline at end of file

From 4fca0c1952d9d3d10c080f915b777cdec5ad1fc1 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a1eadf8.SUNet>
Date: Thu, 9 Nov 2017 22:52:56 -0800
Subject: [PATCH 337/617] changed code for UMVU computation

---
 selection/adjusted_MLE/selective_MLE.py  | 158 ++++++++++++-----------
 selection/adjusted_MLE/tests/test_MLE.py |  37 ++++--
 2 files changed, 110 insertions(+), 85 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 5f3c0e3ae..aa3dffe7b 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -62,87 +62,95 @@ def setup_map(self, j):
         self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
         self.offset_inactive = self.null_statistic[self.nactive:]
 
-class selective_MLE():
-    def __init__(self,
-                 map):
-
-        self.map = map
-        self.randomizer_precision = (1./map.randomization_scale)* np.identity(self.map.p)
-        self.target_observed = self.map.target_observed
-        self.nactive = self.target_observed.shape[0]
-        self.target_cov = self.map.target_cov
-
-    def solve_UMVU(self, j, step=1, nstep=30, tol=1.e-8):
-
-        self.map.setup_map(j)
-        inverse_cov = np.zeros((1+self.nactive, 1+self.nactive))
-        inverse_cov[0,0] = self.map.A.T.dot(self.randomizer_precision).dot(self.map.A) + 1./self.target_cov[j,j]
-        inverse_cov[0,1:] = self.map.A.T.dot(self.randomizer_precision).dot(self.map.B)
-        inverse_cov[1:,0] = self.map.B.T.dot(self.randomizer_precision).dot(self.map.A)
-        inverse_cov[1:,1:] = self.map.B.T.dot(self.randomizer_precision).dot(self.map.B)
-        cov = np.linalg.inv(inverse_cov)
-
-        self.L = cov[0,1:].dot(np.linalg.inv(cov[1:,1:]))
-        self.M_1 = (1./inverse_cov[0,0])*(1./self.target_cov[j,j])
-        self.M_2 = (1./inverse_cov[0,0])*(self.map.A.T).dot(self.randomizer_precision)
-        self.inactive_subgrad = np.zeros(self.map.p)
-        self.inactive_subgrad[self.nactive:] = self.map.inactive_subgrad
-
-        self.conditioned_value = self.map.null_statistic + self.inactive_subgrad + self.map._opt_affine_term
-        self.conditional_par = inverse_cov[1:,1:].dot(cov[1:,0]).dot((1./cov[0,0])* self.target_observed[j]) + \
-                               self.map.B.T.dot(self.randomizer_precision).dot(self.conditioned_value)
-        self.conditional_var = inverse_cov[1:,1:]
-
-        objective = lambda u: u.T.dot(self.conditional_par) - u.T.dot(self.conditional_var).dot(u)/2. - np.log(1.+ 1./u).sum()
-        grad = lambda u: self.conditional_par - self.conditional_var.dot(u) - 1./(1.+ u) + 1./u
-
-        current = self.map.feasible_point
-        current_value = np.inf
-
-        for itercount in range(nstep):
-            newton_step = grad(current)
-
-            # make sure proposal is feasible
-
-            count = 0
-            while True:
-                count += 1
-                proposal = current - step * newton_step
-                if np.all(proposal > 0):
-                    break
-                step *= 0.5
-                if count >= 40:
-                    raise ValueError('not finding a feasible point')
-
-            # make sure proposal is a descent
-
-            count = 0
-            while True:
-                proposal = current - step * newton_step
-                proposed_value = objective(proposal)
-                #print("proposed value", proposed_value, proposal)
-                # print(current_value, proposed_value, 'minimize')
-                if proposed_value <= current_value:
-                    break
-                step *= 0.5
-
-            # stop if relative decrease is small
-
-            if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
-                current = proposal
-                current_value = proposed_value
+def solve_UMVU(target_transform,
+               opt_transform,
+               target_observed,
+               feasible_point,
+               target_cov,
+               randomizer_precision,
+               step=1,
+               nstep=30,
+               tol=1.e-8):
+
+    A, data_offset = target_transform # data_offset = N
+    B, opt_offset = opt_transform     # opt_offset = u
+
+    nfeature, nopt = B.shape[1]
+    ntarget = A.shape[1]
+
+    # XXX should be able to do vector version as well
+    # but for now code assumes 1dim
+    assert ntarget == 1
+
+    # setup joint implied covariance matrix
+
+    inverse_target_cov = np.linalg.inv(target_cov)
+    inverse_cov = np.zeros((ntarget + nopt, ntarget + nopt))
+    inverse_cov[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + inverse_target_cov
+    inverse_cov[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
+    inverse_cov[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
+    inverse_cov[nopt:,nopt:] = B.T.dot(randomizer_precision).dot(B)
+    cov = np.linalg.inv(inverse_cov)
+
+    cov_opt = cov[ntarget:,ntarget:]
+    implied_cov_target = cov[:ntarget,:ntarget]
+    cross_cov = cov[:ntarget,ntarget:]
+
+    L = cross_cov.dot(np.linalg.inv(cov_opt))
+    M_1 = np.linalg.inv(inverse_cov[:ntarget,:ntarget]).dot(inverse_target_cov)
+    M_2 = np.linalg.inv(inverse_cov[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
+
+    conditioned_value = data_offset + opt_offset
+    conditional_par = (inverse_cov[ntarget:,ntarget:].dot(cross_cov.T.dot(np.linalg.inv(implied_cov_target).dot(target_observed))) + \
+                           B.T.dot(randomizer_precision).dot(conditioned_value))
+    conditional_var_inv = inverse_cov[ntarget:,ntarget:]
+
+    objective = lambda u: u.T.dot(conditional_par) - u.T.dot(conditional_var_inv).dot(u)/2. - np.log(1.+ 1./u).sum()
+    grad = lambda u: conditional_par - conditional_var_inv.dot(u) - 1./(1.+ u) + 1./u
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        newton_step = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * newton_step
+            if np.all(proposal > 0):
                 break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
 
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            proposal = current - step * newton_step
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
             current = proposal
             current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
 
-            if itercount % 4 == 0:
-                step *= 2
+        if itercount % 4 == 0:
+            step *= 2
 
-                # print('iter', itercount)
-        value = objective(current)
-        return -(1./self.M_1)*self.L.dot(current)+ (1./self.M_1)*(self.target_observed[j]- (1./self.M_1)*self.M_2.dot(self.conditioned_value)), \
-               value
+    value = objective(current)
+    return -np.linalg.inv(M_1).dot(L.dot(current))+ np.linalg.inv(M_1).dot(target_observed- M_2.dot(conditioned_value)), value
 
 
 
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 06e7a9240..6bbd921c1 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -5,8 +5,9 @@
 from selection.tests.instance import gaussian_instance
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, selective_MLE
+import matplotlib.pyplot as plt
 
-def test(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
+def test(n=100, p=1, s=1, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
     np.random.seed(seed_n)
@@ -28,12 +29,28 @@ def test(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_sca
     active = M_est._overall
     nactive = np.sum(active)
     sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
-
-    solve_mle = selective_MLE(M_est)
-    mle = np.zeros(nactive)
-    for j in range(nactive):
-        mle[j] = solve_mle.solve_UMVU(j)[0]
-
-    return np.transpose(np.vstack([mle, M_est.target_observed]))
-
-print(test())
\ No newline at end of file
+    if nactive>0:
+        solve_mle = selective_MLE(M_est)
+        mle = np.zeros(nactive)
+        for j in range(nactive):
+            mle[j] = solve_mle.solve_UMVU(j)[0]
+
+        return mle, M_est.target_observed
+    else:
+        return None
+
+print(test())
+def simulate(ndraw = 100):
+    seed_seq = np.arange(ndraw)
+    sel_MLE = []
+    naive_MLE = []
+    for i in range(seed_seq.shape[0]):
+        draw = test(n=100, p=1, s=1, signal=0., seed_n = seed_seq[i])
+        if draw[0] is not None:
+            sel_MLE.append(draw[0])
+            naive_MLE.append(draw[1])
+
+    plt.plot(np.asarray(naive_MLE), np.asarray(sel_MLE), 'r--')
+    plt.show()
+
+#simulate()
\ No newline at end of file

From 6fb8396118b984b0da8b7f022b454df182c3c004 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a1eadf8.SUNet>
Date: Thu, 9 Nov 2017 23:17:23 -0800
Subject: [PATCH 338/617] fixing selection map to get appropriate returns

---
 selection/adjusted_MLE/selective_MLE.py   | 24 +++++------------------
 selection/adjusted_MLE/tests/mle_LASSO.py |  2 +-
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index aa3dffe7b..bee9be158 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -1,5 +1,4 @@
 import numpy as np
-import regreg.api as rr
 from selection.randomized.M_estimator import M_estimator
 
 class M_estimator_map(M_estimator):
@@ -9,34 +8,27 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
         self.randomizer = randomization
         self.randomization_scale = randomization_scale
 
-    def solve_approx(self):
+    def solve_map(self):
         self.solve()
+        nactive = self._overall.sum()
         (_opt_linear_term, _opt_affine_term) = self.opt_transform
         self._opt_linear_term = np.concatenate(
             (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
-        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall], _opt_affine_term[~self._overall]), 0)
+        self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall],
+                                                _opt_affine_term[~self._overall]+self.observed_opt_state[nactive:]), 0)
         self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
 
         (_score_linear_term, _) = self.score_transform
         self._score_linear_term = np.concatenate(
             (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
         self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
-        self.feasible_point = np.abs(self.initial_soln[self._overall])
-        nactive = self._overall.sum()
-        self.inactive_subgrad = self.observed_opt_state[nactive:]
-
 
-        lagrange = []
-        for key, value in self.penalty.weights.iteritems():
-            lagrange.append(value)
-        lagrange = np.asarray(lagrange)
-        self.inactive_lagrange = lagrange[~self._overall]
+        self.feasible_point = np.abs(self.initial_soln[self._overall])
 
         X, _ = self.loss.data
         n, p = X.shape
         self.p = p
 
-
         score_cov = np.zeros((p, p))
         X_active_inv = np.linalg.inv(X[:,self._overall].T.dot(X[:,self._overall]))
         projection_perp = np.identity(n) - X[:,self._overall].dot(X_active_inv).dot( X[:,self._overall].T)
@@ -46,14 +38,8 @@ def solve_approx(self):
         self.score_target_cov = score_cov[:, :nactive]
         self.target_cov = score_cov[:nactive, :nactive]
         self.target_observed = self.observed_internal_state[:nactive]
-        self.observed_score_state = self.observed_internal_state
         self.nactive = nactive
 
-        self.B_active = self._opt_linear_term[:nactive, :nactive]
-        self.B_inactive = self._opt_linear_term[nactive:, :nactive]
-        self.B = np.vstack([self.B_active, self.B_inactive])
-
-
     def setup_map(self, j):
 
         self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
diff --git a/selection/adjusted_MLE/tests/mle_LASSO.py b/selection/adjusted_MLE/tests/mle_LASSO.py
index 0090b2f80..65ceabf60 100644
--- a/selection/adjusted_MLE/tests/mle_LASSO.py
+++ b/selection/adjusted_MLE/tests/mle_LASSO.py
@@ -32,7 +32,7 @@ def test_approximate_MLE(X,
     randomization = randomization.isotropic_gaussian((p,), scale=randomization_scale)
     M_est = M_estimator_map(loss, epsilon, penalty, randomization, randomization_scale=randomization_scale)
 
-    M_est.solve_approx()
+    M_est.map_solve()
     active = M_est._overall
     active_set = np.asarray([i for i in range(p) if active[i]])
     nactive = np.sum(active)

From 1c2d219cb9e5c2754b0c458887af54c421c54630 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 9 Nov 2017 23:22:47 -0800
Subject: [PATCH 339/617] updating C-software

---
 C-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/C-software b/C-software
index ec6a954d6..563bf1aa3 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit ec6a954d6b335439115e961abde91fa5a07a3669
+Subproject commit 563bf1aa370b55f8343693224717047f1df0d0c3

From 930da15bd891c7afc4d8013fd0df2954113583d4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 9 Nov 2017 23:23:36 -0800
Subject: [PATCH 340/617] adding requirements to sdist and wheel for travis

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 7db91c7a0..b48dc4fe6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -39,12 +39,12 @@ matrix:
       env:
         # Sdist install should collect all dependencies
         - INSTALL_TYPE=sdist
-        - DEPENDS=
+        - DEPENDS="cython numpy scipy"
     - python: 2.7
       env:
         # Wheel install should collect all dependencies
         - INSTALL_TYPE=wheel
-        - DEPENDS=
+        - DEPENDS="cython numpy scipy"
     - python: 2.7
       env:
         - INSTALL_TYPE=requirements

From 096fea520c0740f92d37280b764d282fd18b90a0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a1eadf8.SUNet>
Date: Thu, 9 Nov 2017 23:24:10 -0800
Subject: [PATCH 341/617] changed selection map

---
 selection/adjusted_MLE/selective_MLE.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index bee9be158..0890e2c28 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -40,13 +40,17 @@ def solve_map(self):
         self.target_observed = self.observed_internal_state[:nactive]
         self.nactive = nactive
 
-    def setup_map(self, j):
-
-        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
-        self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
-
-        self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
-        self.offset_inactive = self.null_statistic[self.nactive:]
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:,:nactive]).dot(np.linalg.inv(self.target_cov))
+        self.data_offset = self._score_linear_term.dot(self.observed_score_state)- self.A.dot(self.target_observed)
+        self.target_transform = (self.A, self.data_offset )
+
+    # def setup_map(self, j):
+    #
+    #     self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+    #     self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+    #
+    #     self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
+    #     self.offset_inactive = self.null_statistic[self.nactive:]
 
 def solve_UMVU(target_transform,
                opt_transform,

From 05f801fd8fea81b548e3cdfa44562251590e272d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 9 Nov 2017 23:24:43 -0800
Subject: [PATCH 342/617] updating R-software

---
 R-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-software b/R-software
index 232760d6a..9e7a08192 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit 232760d6aef5182e040b82e30555f4af5ad6803c
+Subproject commit 9e7a081924179ed93469aac41f596ff1dd5b21bb

From 207578cc914b7e4caec5422caaeb80f034c9e548 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 9 Nov 2017 23:42:29 -0800
Subject: [PATCH 343/617] adding adaptMCMC

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index b48dc4fe6..47eaab5cc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -76,7 +76,7 @@ install:
   - git submodule init
   - git submodule update
   - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"
-  - sudo Rscript -e "install.packages(c('glmnet', 'intervals'), repos='http://cloud.r-project.org')"
+  - sudo Rscript -e "install.packages(c('glmnet', 'intervals', 'adaptMCMC'), repos='http://cloud.r-project.org')"
   - sudo R CMD INSTALL selectiveInference
   - cd ..
   - python -c "from statsmodels.api import PHReg"

From 0adcf6d5d0a8fd6efd8dd4c5e2d56fc10caa51f4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 10 Nov 2017 00:21:20 -0800
Subject: [PATCH 344/617] try to get R-software/C-software installed

---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 47eaab5cc..c051f1bf3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -72,6 +72,8 @@ install:
     else
       pip install  -r requirements.txt; 
     fi
+  - git submodule init
+  - git submodule update
   - cd R-software
   - git submodule init
   - git submodule update

From eecd1f319197359b937dd2cc9c8fae4b7aeb5a88 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a221635.SUNet>
Date: Fri, 10 Nov 2017 10:34:29 -0800
Subject: [PATCH 345/617] pushed changes

---
 selection/adjusted_MLE/selective_MLE.py  |  5 ++--
 selection/adjusted_MLE/tests/test_MLE.py | 36 ++++++++----------------
 2 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 0890e2c28..f87a97f4a 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -28,6 +28,7 @@ def solve_map(self):
         X, _ = self.loss.data
         n, p = X.shape
         self.p = p
+        self.randomizer_precision = (1./self.randomization_scale)* np.identity(p)
 
         score_cov = np.zeros((p, p))
         X_active_inv = np.linalg.inv(X[:,self._overall].T.dot(X[:,self._overall]))
@@ -38,7 +39,7 @@ def solve_map(self):
         self.score_target_cov = score_cov[:, :nactive]
         self.target_cov = score_cov[:nactive, :nactive]
         self.target_observed = self.observed_internal_state[:nactive]
-        self.nactive = nactive
+        self.observed_score_state = self.observed_internal_state
 
         self.A = np.dot(self._score_linear_term, self.score_target_cov[:,:nactive]).dot(np.linalg.inv(self.target_cov))
         self.data_offset = self._score_linear_term.dot(self.observed_score_state)- self.A.dot(self.target_observed)
@@ -65,7 +66,7 @@ def solve_UMVU(target_transform,
     A, data_offset = target_transform # data_offset = N
     B, opt_offset = opt_transform     # opt_offset = u
 
-    nfeature, nopt = B.shape[1]
+    nopt = B.shape[1]
     ntarget = A.shape[1]
 
     # XXX should be able to do vector version as well
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 6bbd921c1..790101533 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -4,7 +4,7 @@
 import regreg.api as rr
 from selection.tests.instance import gaussian_instance
 from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, selective_MLE
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 import matplotlib.pyplot as plt
 
 def test(n=100, p=1, s=1, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
@@ -25,32 +25,20 @@ def test(n=100, p=1, s=1, signal=5., seed_n = 0, lam_frac=1., randomization_scal
     #randomizer = randomization.gaussian(np.identity(p))
     M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
 
-    M_est.solve_approx()
+    M_est.solve_map()
     active = M_est._overall
     nactive = np.sum(active)
     sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
-    if nactive>0:
-        solve_mle = selective_MLE(M_est)
-        mle = np.zeros(nactive)
-        for j in range(nactive):
-            mle[j] = solve_mle.solve_UMVU(j)[0]
-
-        return mle, M_est.target_observed
+    if nactive > 0:
+        mle = solve_UMVU(M_est.target_transform,
+                         M_est.opt_transform,
+                         M_est.target_observed,
+                         M_est.feasible_point,
+                         M_est.target_cov,
+                         M_est.randomizer_precision)
+
+        return mle[0], M_est.target_observed, nactive
     else:
         return None
 
-print(test())
-def simulate(ndraw = 100):
-    seed_seq = np.arange(ndraw)
-    sel_MLE = []
-    naive_MLE = []
-    for i in range(seed_seq.shape[0]):
-        draw = test(n=100, p=1, s=1, signal=0., seed_n = seed_seq[i])
-        if draw[0] is not None:
-            sel_MLE.append(draw[0])
-            naive_MLE.append(draw[1])
-
-    plt.plot(np.asarray(naive_MLE), np.asarray(sel_MLE), 'r--')
-    plt.show()
-
-#simulate()
\ No newline at end of file
+#print(test())

From 1ef9e2dc7b5bc83663fb742f70fab78940b64484 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jj8.SUNet>
Date: Fri, 10 Nov 2017 11:46:35 -0800
Subject: [PATCH 346/617] added exact MLE

---
 selection/adjusted_MLE/selective_MLE.py  | 34 +++++++++++++++----
 selection/adjusted_MLE/tests/test_MLE.py | 42 ++++++++++++++++++++++--
 2 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index f87a97f4a..f7c658382 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -53,6 +53,8 @@ def solve_map(self):
     #     self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
     #     self.offset_inactive = self.null_statistic[self.nactive:]
 
+import numpy as np
+
 def solve_UMVU(target_transform,
                opt_transform,
                target_observed,
@@ -92,12 +94,31 @@ def solve_UMVU(target_transform,
     M_2 = np.linalg.inv(inverse_cov[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
     conditioned_value = data_offset + opt_offset
-    conditional_par = (inverse_cov[ntarget:,ntarget:].dot(cross_cov.T.dot(np.linalg.inv(implied_cov_target).dot(target_observed))) + \
-                           B.T.dot(randomizer_precision).dot(conditioned_value))
-    conditional_var_inv = inverse_cov[ntarget:,ntarget:]
+    conditional_mean = (cross_cov.T.dot(np.linalg.inv(implied_cov_target).dot(target_observed)) +
+                        B.T.dot(randomizer_precision).dot(conditioned_value))
+    conditional_precision = inverse_cov[ntarget:,ntarget:]
+
+    soln, value = solve_barrier_nonneg(conditional_mean,
+                                       conditional_precision,
+                                       feasible_point=feasible_point)
+    sel_MLE = -np.linalg.inv(M_1).dot(L.dot(soln))+ np.linalg.inv(M_1).dot(target_observed- M_2.dot(conditioned_value))
+    return np.squeeze(sel_MLE), value
+
+def solve_barrier_nonneg(mean_vec,
+                         precision,
+                         feasible_point=None,
+                         step=1,
+                         nstep=30,
+                         tol=1.e-8):
+
+    conjugate_arg = precision.dot(mean_vec)
+    scaling = np.sqrt(np.diag(precision))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
 
-    objective = lambda u: u.T.dot(conditional_par) - u.T.dot(conditional_var_inv).dot(u)/2. - np.log(1.+ 1./u).sum()
-    grad = lambda u: conditional_par - conditional_var_inv.dot(u) - 1./(1.+ u) + 1./u
+    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
+    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(1.+ u) + 1./u) / scaling
 
     current = feasible_point
     current_value = np.inf
@@ -140,8 +161,7 @@ def solve_UMVU(target_transform,
         if itercount % 4 == 0:
             step *= 2
 
-    value = objective(current)
-    return -np.linalg.inv(M_1).dot(L.dot(current))+ np.linalg.inv(M_1).dot(target_observed- M_2.dot(conditioned_value)), value
+    return current, current_value
 
 
 
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 790101533..c38f0daf0 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -5,7 +5,7 @@
 from selection.tests.instance import gaussian_instance
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-import matplotlib.pyplot as plt
+from selection.adjusted_MLE.tests.exact_MLE import grad_CGF
 
 def test(n=100, p=1, s=1, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
@@ -41,4 +41,42 @@ def test(n=100, p=1, s=1, signal=5., seed_n = 0, lam_frac=1., randomization_scal
     else:
         return None
 
-#print(test())
+def test_selective_MLE(target_observed=2):
+
+    """
+    Simple problem thresholded at 2
+    """
+
+    target_transform = (np.identity(1), np.zeros(1))
+    opt_transform = (np.identity(1), np.ones(1) * 2.)
+    feasible_point = 1.
+    randomizer_precision = np.identity(1)
+    target_cov = np.identity(1)
+
+    return solve_UMVU(target_transform,
+                      opt_transform,
+                      target_observed,
+                      feasible_point,
+                      target_cov,
+                      randomizer_precision)
+
+if __name__ == "__main__":
+
+    import matplotlib.pyplot as plt
+
+    Zval = np.linspace(-1,3,51)
+
+    mu_seq = np.linspace(-7., 6, num=2600)
+    grad_partition = np.array([grad_CGF(mu) for mu in mu_seq])
+
+    exact_MLE = []
+    for k in range(Zval.shape[0]):
+        true = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
+        exact_MLE.append(true)
+
+    MLE = np.array([test_selective_MLE(z)[0] for z in Zval])
+    MLE = MLE * (np.fabs(MLE) < 200)
+
+    plt.plot(Zval, MLE)
+    plt.plot(Zval, np.asarray(exact_MLE), 'r--')
+    plt.show()

From 7b5f33a874cbb9bac6936807b46c1fb470975454 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jj8.SUNet>
Date: Fri, 10 Nov 2017 11:58:49 -0800
Subject: [PATCH 347/617] corrected scale of exact_MLE

---
 selection/adjusted_MLE/selective_MLE.py  | 2 +-
 selection/adjusted_MLE/tests/test_MLE.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index f7c658382..af543e907 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -118,7 +118,7 @@ def solve_barrier_nonneg(mean_vec,
         feasible_point = 1. / scaling
 
     objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
-    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(1.+ u) + 1./u) / scaling
+    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
 
     current = feasible_point
     current_value = np.inf
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index c38f0daf0..85709f138 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -48,7 +48,7 @@ def test_selective_MLE(target_observed=2):
     """
 
     target_transform = (np.identity(1), np.zeros(1))
-    opt_transform = (np.identity(1), np.ones(1) * 2.)
+    opt_transform = (np.identity(1), -np.ones(1) * 2.)
     feasible_point = 1.
     randomizer_precision = np.identity(1)
     target_cov = np.identity(1)
@@ -67,7 +67,7 @@ def test_selective_MLE(target_observed=2):
     Zval = np.linspace(-1,3,51)
 
     mu_seq = np.linspace(-7., 6, num=2600)
-    grad_partition = np.array([grad_CGF(mu) for mu in mu_seq])
+    grad_partition = np.array([grad_CGF(mu, randomization_scale = 1., threshold = 2) for mu in mu_seq])
 
     exact_MLE = []
     for k in range(Zval.shape[0]):

From 76dffc5e804d2137fa05c08dd6d30a7d198fbeb7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jj8.SUNet>
Date: Fri, 10 Nov 2017 12:07:06 -0800
Subject: [PATCH 348/617] changed offset to -2

---
 selection/adjusted_MLE/tests/test_MLE.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 85709f138..76c809d2e 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -64,7 +64,7 @@ def test_selective_MLE(target_observed=2):
 
     import matplotlib.pyplot as plt
 
-    Zval = np.linspace(-1,3,51)
+    Zval = np.linspace(-1,5,51)
 
     mu_seq = np.linspace(-7., 6, num=2600)
     grad_partition = np.array([grad_CGF(mu, randomization_scale = 1., threshold = 2) for mu in mu_seq])

From 2328df285a2803ca1e59d69ddf6bae5ac84e2471 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jj8.SUNet>
Date: Fri, 10 Nov 2017 14:24:21 -0800
Subject: [PATCH 349/617] commit changes

---
 selection/adjusted_MLE/selective_MLE.py  | 42 ++++++++--------
 selection/adjusted_MLE/tests/test_MLE.py | 61 +++++++++++++-----------
 2 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index af543e907..d82060e84 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -1,4 +1,4 @@
-import numpy as np
+
 from selection.randomized.M_estimator import M_estimator
 
 class M_estimator_map(M_estimator):
@@ -77,41 +77,41 @@ def solve_UMVU(target_transform,
 
     # setup joint implied covariance matrix
 
-    inverse_target_cov = np.linalg.inv(target_cov)
-    inverse_cov = np.zeros((ntarget + nopt, ntarget + nopt))
-    inverse_cov[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + inverse_target_cov
-    inverse_cov[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
-    inverse_cov[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
-    inverse_cov[nopt:,nopt:] = B.T.dot(randomizer_precision).dot(B)
-    cov = np.linalg.inv(inverse_cov)
+    target_precision = np.linalg.inv(target_cov)
+    implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
+    implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
+    implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
+    implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
+    implied_precision[nopt:,nopt:] = B.T.dot(randomizer_precision).dot(B)
+    implied_cov = np.linalg.inv(implied_precision)
 
-    cov_opt = cov[ntarget:,ntarget:]
-    implied_cov_target = cov[:ntarget,:ntarget]
-    cross_cov = cov[:ntarget,ntarget:]
+    implied_opt = implied_cov[ntarget:,ntarget:]
+    implied_target = implied_cov[:ntarget,:ntarget]
+    implied_cross = implied_cov[:ntarget,ntarget:]
 
-    L = cross_cov.dot(np.linalg.inv(cov_opt))
-    M_1 = np.linalg.inv(inverse_cov[:ntarget,:ntarget]).dot(inverse_target_cov)
-    M_2 = np.linalg.inv(inverse_cov[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
+    L = implied_cross.dot(np.linalg.inv(implied_opt))
+    M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
+    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
     conditioned_value = data_offset + opt_offset
-    conditional_mean = (cross_cov.T.dot(np.linalg.inv(implied_cov_target).dot(target_observed)) +
-                        B.T.dot(randomizer_precision).dot(conditioned_value))
-    conditional_precision = inverse_cov[ntarget:,ntarget:]
+    conditional_natural_parameter = (implied_cross.T.dot(np.linalg.inv(implied_target).dot(target_observed)) -
+                                     B.T.dot(randomizer_precision).dot(conditioned_value))
+    conditional_precision = implied_precision[ntarget:,ntarget:]
 
-    soln, value = solve_barrier_nonneg(conditional_mean,
+    soln, value = solve_barrier_nonneg(conditional_natural_parameter,
                                        conditional_precision,
                                        feasible_point=feasible_point)
-    sel_MLE = -np.linalg.inv(M_1).dot(L.dot(soln))+ np.linalg.inv(M_1).dot(target_observed- M_2.dot(conditioned_value))
+    sel_MLE = -np.linalg.inv(M_1).dot(L.dot(soln))+ np.linalg.inv(M_1).dot(target_observed - M_2.dot(conditioned_value))
     return np.squeeze(sel_MLE), value
 
-def solve_barrier_nonneg(mean_vec,
+def solve_barrier_nonneg(conjugate_arg,
                          precision,
                          feasible_point=None,
                          step=1,
                          nstep=30,
                          tol=1.e-8):
 
-    conjugate_arg = precision.dot(mean_vec)
+    #conjugate_arg = precision.dot(mean_vec)
     scaling = np.sqrt(np.diag(precision))
 
     if feasible_point is None:
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 76c809d2e..9b8cebffa 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -41,17 +41,16 @@ def test(n=100, p=1, s=1, signal=5., seed_n = 0, lam_frac=1., randomization_scal
     else:
         return None
 
-def test_selective_MLE(target_observed=2):
-
+def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
     """
-    Simple problem thresholded at 2
+    Simple problem: randomizaiton of sd 1 and thresholded at 2 (default args)
     """
-
-    target_transform = (np.identity(1), np.zeros(1))
-    opt_transform = (np.identity(1), -np.ones(1) * 2.)
-    feasible_point = 1.
-    randomizer_precision = np.identity(1)
-    target_cov = np.identity(1)
+    target_observed = np.atleast_1d(target_observed)
+    target_transform = (-np.identity(n), np.zeros(n))
+    opt_transform = (np.identity(n), np.ones(n) * threshold)
+    feasible_point = np.ones(n)
+    randomizer_precision = np.identity(n) / randomization_scale ** 2
+    target_cov = np.identity(n)
 
     return solve_UMVU(target_transform,
                       opt_transform,
@@ -60,23 +59,27 @@ def test_selective_MLE(target_observed=2):
                       target_cov,
                       randomizer_precision)
 
-if __name__ == "__main__":
-
-    import matplotlib.pyplot as plt
-
-    Zval = np.linspace(-1,5,51)
-
-    mu_seq = np.linspace(-7., 6, num=2600)
-    grad_partition = np.array([grad_CGF(mu, randomization_scale = 1., threshold = 2) for mu in mu_seq])
-
-    exact_MLE = []
-    for k in range(Zval.shape[0]):
-        true = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
-        exact_MLE.append(true)
-
-    MLE = np.array([test_selective_MLE(z)[0] for z in Zval])
-    MLE = MLE * (np.fabs(MLE) < 200)
-
-    plt.plot(Zval, MLE)
-    plt.plot(Zval, np.asarray(exact_MLE), 'r--')
-    plt.show()
+if __name__ == "main":
+
+    n = 100
+    Zval= np.random.normal(0, 1, n)
+    sys.stderr.write("observed Z" + str(Zval) + "\n")
+    MLE = simple_problem(Zval, n=n, threshold=2, randomization_scale=1.)[0]
+    print(MLE)
+
+
+
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     plt.clf()
+#     Zval = np.linspace(-5, 5, 51)
+#     MLE = np.array([simple_problem(z)[0] for z in Zval])
+#
+#     mu_seq = np.linspace(-6, 6, 200)
+#     grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
+#
+#     plt.plot(Zval, MLE, label='+2')
+#     plt.plot(grad_partition, mu_seq, 'r--', label='MLE')
+#     plt.legend()
+#     plt.show()

From e374359848e4b7374fd52528933bbe1eaa18e7fd Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jj8.SUNet>
Date: Fri, 10 Nov 2017 15:50:41 -0800
Subject: [PATCH 350/617] commit changes

---
 selection/adjusted_MLE/selective_MLE.py  |  2 +-
 selection/adjusted_MLE/tests/test_MLE.py | 34 +++++++++++++++++++++---
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index d82060e84..96d535ad7 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -73,7 +73,7 @@ def solve_UMVU(target_transform,
 
     # XXX should be able to do vector version as well
     # but for now code assumes 1dim
-    assert ntarget == 1
+    #assert ntarget == 1
 
     # setup joint implied covariance matrix
 
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 9b8cebffa..ec86182f3 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -59,15 +59,41 @@ def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
                       target_cov,
                       randomizer_precision)
 
-if __name__ == "main":
+def bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.):
+    Zval = np.random.normal(true_mean, 1, n)
+    omega = np.random.normal(0, 1)
 
-    n = 100
-    Zval= np.random.normal(0, 1, n)
+    target_Z =  (np.sum(Zval)/np.sqrt(n))
+
+    check = target_Z + omega - threshold
+    if check>0.:
+        approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
+
+        boot_sample = []
+        for b in range(B):
+            Zval_boot = np.sum(Zval[np.random.sample(n, n, replace=True)]) / np.sqrt(n)
+            boot_sample.append(mle_map(Zval_boot)[0])
+
+        return boot_sample, np.mean(boot_sample), np.std(boot_sample), \
+               np.sqrt(n)*(boot_sample-np.mean(boot_sample))/np.std(boot_sample)
+
+
+if __name__ == "__main__":
+    n = 1000
+    Zval = np.random.normal(0, 1, n)
     sys.stderr.write("observed Z" + str(Zval) + "\n")
     MLE = simple_problem(Zval, n=n, threshold=2, randomization_scale=1.)[0]
-    print(MLE)
+    #print(MLE)
+
+    mu_seq = np.linspace(-6, 6, 200)
+    grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
 
+    exact_MLE = []
+    for k in range(Zval.shape[0]):
+        mle = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
+        exact_MLE.append(mle)
 
+    np.testing.assert_allclose(MLE, exact_MLE, rtol=2.0)
 
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt

From df5d307492a69ce0b116b778da2aa113aac1e4d2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jj8.SUNet>
Date: Fri, 10 Nov 2017 16:43:44 -0800
Subject: [PATCH 351/617] changed bootstrap function

---
 selection/adjusted_MLE/selective_MLE.py  | 31 ++++++++--
 selection/adjusted_MLE/tests/test_MLE.py | 73 ++++++++++++++----------
 2 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 96d535ad7..9beb07b77 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -54,6 +54,7 @@ def solve_map(self):
     #     self.offset_inactive = self.null_statistic[self.nactive:]
 
 import numpy as np
+import functools
 
 def solve_UMVU(target_transform,
                opt_transform,
@@ -79,10 +80,11 @@ def solve_UMVU(target_transform,
 
     target_precision = np.linalg.inv(target_cov)
     implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
+
     implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
     implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
     implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
-    implied_precision[nopt:,nopt:] = B.T.dot(randomizer_precision).dot(B)
+    implied_precision[ntarget:,ntarget:] = B.T.dot(randomizer_precision).dot(B)
     implied_cov = np.linalg.inv(implied_precision)
 
     implied_opt = implied_cov[ntarget:,ntarget:]
@@ -94,15 +96,34 @@ def solve_UMVU(target_transform,
     M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
     conditioned_value = data_offset + opt_offset
-    conditional_natural_parameter = (implied_cross.T.dot(np.linalg.inv(implied_target).dot(target_observed)) -
-                                     B.T.dot(randomizer_precision).dot(conditioned_value))
+
+    linear_term = implied_cross.T.dot(np.linalg.inv(implied_target))
+    offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
+    natparam_transform = (linear_term, offset_term)
+    conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
+
     conditional_precision = implied_precision[ntarget:,ntarget:]
 
     soln, value = solve_barrier_nonneg(conditional_natural_parameter,
                                        conditional_precision,
                                        feasible_point=feasible_point)
-    sel_MLE = -np.linalg.inv(M_1).dot(L.dot(soln))+ np.linalg.inv(M_1).dot(target_observed - M_2.dot(conditioned_value))
-    return np.squeeze(sel_MLE), value
+    M_1_inv = np.linalg.inv(M_1)
+    offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
+    linear_term = np.vstack([M_1_inv, -M_1_inv.dot(L)])
+    mle_transform = (M_1_inv, -M_1_inv.dot(L), offset_term)
+
+    def mle_map(natparam_transform, mle_transform, feasible_point, conditional_precision, target_observed):
+        param_lin, param_offset = natparam_transform
+        mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+        soln, value = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
+                                           conditional_precision,
+                                           feasible_point=feasible_point)
+        return mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset, value
+
+    mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, feasible_point, conditional_precision)
+    sel_MLE, value = mle_partial(target_observed)
+    return np.squeeze(sel_MLE), value, mle_partial
+
 
 def solve_barrier_nonneg(conjugate_arg,
                          precision,
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index ec86182f3..95ecfeba7 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -6,6 +6,7 @@
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from selection.adjusted_MLE.tests.exact_MLE import grad_CGF
+from statsmodels.distributions.empirical_distribution import ECDF
 
 def test(n=100, p=1, s=1, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
@@ -60,40 +61,41 @@ def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
                       randomizer_precision)
 
 def bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.):
-    Zval = np.random.normal(true_mean, 1, n)
-    omega = np.random.normal(0, 1)
 
-    target_Z =  (np.sum(Zval)/np.sqrt(n))
+    while True:
+        Zval = np.random.normal(true_mean, 1, n)
+        omega = np.random.normal(0, 1)
+        target_Z = (np.sum(Zval) / np.sqrt(n))
+        check = target_Z + omega - threshold
+        if check>0.:
+            break
 
-    check = target_Z + omega - threshold
-    if check>0.:
-        approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
+    approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
 
-        boot_sample = []
-        for b in range(B):
-            Zval_boot = np.sum(Zval[np.random.sample(n, n, replace=True)]) / np.sqrt(n)
-            boot_sample.append(mle_map(Zval_boot)[0])
+    boot_sample = []
+    for b in range(B):
+        Zval_boot = np.sum(Zval[np.random.choice(n, n, replace=True)]) / np.sqrt(n)
+        boot_sample.append(mle_map(Zval_boot)[0])
 
-        return boot_sample, np.mean(boot_sample), np.std(boot_sample), \
-               np.sqrt(n)*(boot_sample-np.mean(boot_sample))/np.std(boot_sample)
+    return boot_sample, np.mean(boot_sample), np.std(boot_sample), \
+           np.sqrt(n) * np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample))
 
-
-if __name__ == "__main__":
-    n = 1000
-    Zval = np.random.normal(0, 1, n)
-    sys.stderr.write("observed Z" + str(Zval) + "\n")
-    MLE = simple_problem(Zval, n=n, threshold=2, randomization_scale=1.)[0]
-    #print(MLE)
-
-    mu_seq = np.linspace(-6, 6, 200)
-    grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
-
-    exact_MLE = []
-    for k in range(Zval.shape[0]):
-        mle = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
-        exact_MLE.append(mle)
-
-    np.testing.assert_allclose(MLE, exact_MLE, rtol=2.0)
+# if __name__ == "__main__":
+#     n = 1000
+#     Zval = np.random.normal(0, 1, n)
+#     sys.stderr.write("observed Z" + str(Zval) + "\n")
+#     MLE = simple_problem(Zval, n=n, threshold=2, randomization_scale=1.)[0]
+#     #print(MLE)
+#
+#     mu_seq = np.linspace(-6, 6, 200)
+#     grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
+#
+#     exact_MLE = []
+#     for k in range(Zval.shape[0]):
+#         mle = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
+#         exact_MLE.append(mle)
+#
+#     np.testing.assert_allclose(MLE, exact_MLE, rtol=2.0)
 
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt
@@ -109,3 +111,16 @@ def bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.):
 #     plt.plot(grad_partition, mu_seq, 'r--', label='MLE')
 #     plt.legend()
 #     plt.show()
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    plt.clf()
+    boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
+    boot_pivot = boot_result[3]
+    #print("boot pivot", boot_pivot)
+    print("boot sample", boot_pivot.shape)
+    ecdf = ECDF(boot_pivot)
+    print("ecdf", ecdf(boot_pivot))
+    plt.plot(np.arange(1000), ecdf(np.sort(boot_pivot)), 'r--')
+    plt.show()

From cf17df8802101a21cea3a4868604062551f84d20 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 11 Nov 2017 14:54:16 -0800
Subject: [PATCH 352/617] commit changes so far

---
 selection/adjusted_MLE/tests/test_MLE.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 95ecfeba7..99642fa08 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -3,6 +3,7 @@
 
 import regreg.api as rr
 from selection.tests.instance import gaussian_instance
+from scipy.stats import norm as ndist
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from selection.adjusted_MLE.tests.exact_MLE import grad_CGF
@@ -60,7 +61,7 @@ def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
                       target_cov,
                       randomizer_precision)
 
-def bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.):
+def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 
     while True:
         Zval = np.random.normal(true_mean, 1, n)
@@ -77,8 +78,7 @@ def bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.):
         Zval_boot = np.sum(Zval[np.random.choice(n, n, replace=True)]) / np.sqrt(n)
         boot_sample.append(mle_map(Zval_boot)[0])
 
-    return boot_sample, np.mean(boot_sample), np.std(boot_sample), \
-           np.sqrt(n) * np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample))
+    return boot_sample, np.mean(boot_sample), np.std(boot_sample), np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample))
 
 # if __name__ == "__main__":
 #     n = 1000
@@ -118,9 +118,9 @@ def bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.):
     plt.clf()
     boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
     boot_pivot = boot_result[3]
-    #print("boot pivot", boot_pivot)
     print("boot sample", boot_pivot.shape)
-    ecdf = ECDF(boot_pivot)
-    print("ecdf", ecdf(boot_pivot))
-    plt.plot(np.arange(1000), ecdf(np.sort(boot_pivot)), 'r--')
+    ecdf = ECDF(ndist.cdf(boot_pivot))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.show()

From bbb60a48e0a9fba5d2ff0e6cfa1d13ff15c9f7b9 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 11 Nov 2017 16:09:16 -0800
Subject: [PATCH 353/617] checked bias for LASSO, n=100, p=50

---
 selection/adjusted_MLE/selective_MLE.py  |  3 +-
 selection/adjusted_MLE/tests/test_MLE.py | 60 ++++++++++++++----------
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 9beb07b77..b85dca52a 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -16,6 +16,8 @@ def solve_map(self):
             (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
         self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall],
                                                 _opt_affine_term[~self._overall]+self.observed_opt_state[nactive:]), 0)
+        self._opt_linear_term = self._opt_linear_term[:,:self._overall.sum()]
+        #print("shape", self._opt_linear_term[:,:self._overall.sum()] .shape)
         self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
 
         (_score_linear_term, _) = self.score_transform
@@ -203,6 +205,5 @@ def solve_barrier_nonneg(conjugate_arg,
 
 
 
-
 
 
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 99642fa08..c90b7382f 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -9,40 +9,50 @@
 from selection.adjusted_MLE.tests.exact_MLE import grad_CGF
 from statsmodels.distributions.empirical_distribution import ECDF
 
-def test(n=100, p=1, s=1, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
+def test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
+
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
-    np.random.seed(seed_n)
 
     lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
     loss = rr.glm.gaussian(X, y)
-
     epsilon = 1. / np.sqrt(n)
-
     W = np.ones(p) * lam
     penalty = rr.group_lasso(np.arange(p),
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
     randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-    #randomizer = randomization.gaussian(np.identity(p))
     M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
 
     M_est.solve_map()
     active = M_est._overall
+
+    true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+    #true_target = beta[active]
     nactive = np.sum(active)
     sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
     if nactive > 0:
-        mle = solve_UMVU(M_est.target_transform,
-                         M_est.opt_transform,
-                         M_est.target_observed,
-                         M_est.feasible_point,
-                         M_est.target_cov,
-                         M_est.randomizer_precision)
-
-        return mle[0], M_est.target_observed, nactive
+        approx_MLE, value, mle_map = solve_UMVU(M_est.target_transform,
+                                                M_est.opt_transform,
+                                                M_est.target_observed,
+                                                M_est.feasible_point,
+                                                M_est.target_cov,
+                                                M_est.randomizer_precision)
+
+        return np.mean(approx_MLE- true_target), approx_MLE, M_est.target_observed, nactive
     else:
         return None
 
+def test_bias_lasso(nsim = 500):
+
+    bias = 0
+    for _ in range(nsim):
+        bias += test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.)[0]
+
+    print(bias/nsim)
+
+test_bias_lasso()
+
 def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
     """
     Simple problem: randomizaiton of sd 1 and thresholded at 2 (default args)
@@ -112,15 +122,15 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 #     plt.legend()
 #     plt.show()
 
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    plt.clf()
-    boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
-    boot_pivot = boot_result[3]
-    print("boot sample", boot_pivot.shape)
-    ecdf = ECDF(ndist.cdf(boot_pivot))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.show()
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     plt.clf()
+#     boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
+#     boot_pivot = boot_result[3]
+#     print("boot sample", boot_pivot.shape)
+#     ecdf = ECDF(ndist.cdf(boot_pivot))
+#     grid = np.linspace(0, 1, 101)
+#     print("ecdf", ecdf(grid))
+#     plt.plot(grid, ecdf(grid), c='red', marker='^')
+#     plt.show()

From e95613fcc6c9f8de9923248590e4c671321f6242 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 11 Nov 2017 16:55:07 -0800
Subject: [PATCH 354/617] separated simple and LASSO problem

---
 selection/adjusted_MLE/tests/test_MLE.py      | 79 +++++++---------
 .../adjusted_MLE/tests/test_simple_problem.py | 90 +++++++++++++++++++
 2 files changed, 123 insertions(+), 46 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/test_simple_problem.py

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index c90b7382f..32a50c66b 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -39,7 +39,8 @@ def test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomizati
                                                 M_est.target_cov,
                                                 M_est.randomizer_precision)
 
-        return np.mean(approx_MLE- true_target), approx_MLE, M_est.target_observed, nactive
+        return np.mean(approx_MLE- true_target), approx_MLE, M_est.target_observed, active, X.T.dot(y), \
+               np.linalg.inv(X[:, active].T.dot(X[:, active])), mle_map
     else:
         return None
 
@@ -51,7 +52,25 @@ def test_bias_lasso(nsim = 500):
 
     print(bias/nsim)
 
-test_bias_lasso()
+#test_bias_lasso()
+
+def bootstrap_lasso(B=500):
+    p = 50
+    run_lasso = test_lasso(n=100, p=p, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.)
+
+    boot_sample = np.zeros((B,run_lasso[3].sum()))
+    for b in range(B):
+        boot_vector = (run_lasso[4])[np.random.choice(p, p, replace=True)]
+        #print("shape", boot_vector.shape)
+        active = run_lasso[3]
+        target_boot = (run_lasso[5]).dot(boot_vector[active])
+        boot_sample[b, :] = (run_lasso[6](target_boot))[0]
+
+    centered_boot_sample = boot_sample - boot_sample.mean(0)[None, :]
+    std_boot_sample = centered_boot_sample/(boot_sample.std(0)[None,:])
+
+    return std_boot_sample.reshape((B * run_lasso[3].sum(),))
+
 
 def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
     """
@@ -90,47 +109,15 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 
     return boot_sample, np.mean(boot_sample), np.std(boot_sample), np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample))
 
-# if __name__ == "__main__":
-#     n = 1000
-#     Zval = np.random.normal(0, 1, n)
-#     sys.stderr.write("observed Z" + str(Zval) + "\n")
-#     MLE = simple_problem(Zval, n=n, threshold=2, randomization_scale=1.)[0]
-#     #print(MLE)
-#
-#     mu_seq = np.linspace(-6, 6, 200)
-#     grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
-#
-#     exact_MLE = []
-#     for k in range(Zval.shape[0]):
-#         mle = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
-#         exact_MLE.append(mle)
-#
-#     np.testing.assert_allclose(MLE, exact_MLE, rtol=2.0)
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     plt.clf()
-#     Zval = np.linspace(-5, 5, 51)
-#     MLE = np.array([simple_problem(z)[0] for z in Zval])
-#
-#     mu_seq = np.linspace(-6, 6, 200)
-#     grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
-#
-#     plt.plot(Zval, MLE, label='+2')
-#     plt.plot(grad_partition, mu_seq, 'r--', label='MLE')
-#     plt.legend()
-#     plt.show()
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     plt.clf()
-#     boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
-#     boot_pivot = boot_result[3]
-#     print("boot sample", boot_pivot.shape)
-#     ecdf = ECDF(ndist.cdf(boot_pivot))
-#     grid = np.linspace(0, 1, 101)
-#     print("ecdf", ecdf(grid))
-#     plt.plot(grid, ecdf(grid), c='red', marker='^')
-#     plt.show()
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    plt.clf()
+    boot_pivot = bootstrap_lasso(B=10000)
+    ecdf = ECDF(ndist.cdf(boot_pivot))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='blue', marker='^')
+    plt.plot(grid, grid, c='red', marker='^')
+    plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/boot_selective_MLE_lasso.png")
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
new file mode 100644
index 000000000..5549ff0be
--- /dev/null
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -0,0 +1,90 @@
+from __future__ import print_function
+import numpy as np, sys
+
+from scipy.stats import norm as ndist
+from selection.adjusted_MLE.selective_MLE import solve_UMVU
+from selection.adjusted_MLE.tests.exact_MLE import grad_CGF
+from statsmodels.distributions.empirical_distribution import ECDF
+
+def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
+    """
+    Simple problem: randomizaiton of sd 1 and thresholded at 2 (default args)
+    """
+    target_observed = np.atleast_1d(target_observed)
+    target_transform = (-np.identity(n), np.zeros(n))
+    opt_transform = (np.identity(n), np.ones(n) * threshold)
+    feasible_point = np.ones(n)
+    randomizer_precision = np.identity(n) / randomization_scale ** 2
+    target_cov = np.identity(n)
+
+    return solve_UMVU(target_transform,
+                      opt_transform,
+                      target_observed,
+                      feasible_point,
+                      target_cov,
+                      randomizer_precision)
+
+def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
+
+    while True:
+        Zval = np.random.normal(true_mean, 1, n)
+        omega = np.random.normal(0, 1)
+        target_Z = (np.sum(Zval) / np.sqrt(n))
+        check = target_Z + omega - threshold
+        if check>0.:
+            break
+
+    approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
+
+    boot_sample = []
+    for b in range(B):
+        Zval_boot = np.sum(Zval[np.random.choice(n, n, replace=True)]) / np.sqrt(n)
+        boot_sample.append(mle_map(Zval_boot)[0])
+
+    return boot_sample, np.mean(boot_sample), np.std(boot_sample), \
+           np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample))
+
+# if __name__ == "__main__":
+#     n = 1000
+#     Zval = np.random.normal(0, 1, n)
+#     sys.stderr.write("observed Z" + str(Zval) + "\n")
+#     MLE = simple_problem(Zval, n=n, threshold=2, randomization_scale=1.)[0]
+#     #print(MLE)
+#
+#     mu_seq = np.linspace(-6, 6, 200)
+#     grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
+#
+#     exact_MLE = []
+#     for k in range(Zval.shape[0]):
+#         mle = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
+#         exact_MLE.append(mle)
+#
+#     np.testing.assert_allclose(MLE, exact_MLE, rtol=2.0)
+
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     plt.clf()
+#     Zval = np.linspace(-5, 5, 51)
+#     MLE = np.array([simple_problem(z)[0] for z in Zval])
+#
+#     mu_seq = np.linspace(-6, 6, 200)
+#     grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
+#
+#     plt.plot(Zval, MLE, label='+2')
+#     plt.plot(grad_partition, mu_seq, 'r--', label='MLE')
+#     plt.legend()
+#     plt.show()
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    plt.clf()
+    boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
+    boot_pivot = boot_result[3]
+    print("boot sample", boot_pivot.shape)
+    ecdf = ECDF(ndist.cdf(boot_pivot))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.show()
\ No newline at end of file

From 1e9b60c8729df773ac49915b580f8ae250385f98 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 11 Nov 2017 16:55:56 -0800
Subject: [PATCH 355/617] removed unnecessary import

---
 selection/adjusted_MLE/tests/test_MLE.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 32a50c66b..e4a386f49 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -6,7 +6,6 @@
 from scipy.stats import norm as ndist
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from selection.adjusted_MLE.tests.exact_MLE import grad_CGF
 from statsmodels.distributions.empirical_distribution import ECDF
 
 def test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):

From ddbfa2f7e7e63a059f235a33869b185d376dabbe Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 11 Nov 2017 18:21:19 -0800
Subject: [PATCH 356/617] removed simple problem from Lasso test

---
 selection/adjusted_MLE/tests/test_MLE.py      | 44 ++-----------------
 .../adjusted_MLE/tests/test_simple_problem.py |  2 +-
 2 files changed, 4 insertions(+), 42 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index e4a386f49..26e4d6417 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -54,8 +54,8 @@ def test_bias_lasso(nsim = 500):
 #test_bias_lasso()
 
 def bootstrap_lasso(B=500):
-    p = 50
-    run_lasso = test_lasso(n=100, p=p, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.)
+    p = 100
+    run_lasso = test_lasso(n=100, p=p, s=0, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.)
 
     boot_sample = np.zeros((B,run_lasso[3].sum()))
     for b in range(B):
@@ -71,44 +71,6 @@ def bootstrap_lasso(B=500):
     return std_boot_sample.reshape((B * run_lasso[3].sum(),))
 
 
-def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
-    """
-    Simple problem: randomizaiton of sd 1 and thresholded at 2 (default args)
-    """
-    target_observed = np.atleast_1d(target_observed)
-    target_transform = (-np.identity(n), np.zeros(n))
-    opt_transform = (np.identity(n), np.ones(n) * threshold)
-    feasible_point = np.ones(n)
-    randomizer_precision = np.identity(n) / randomization_scale ** 2
-    target_cov = np.identity(n)
-
-    return solve_UMVU(target_transform,
-                      opt_transform,
-                      target_observed,
-                      feasible_point,
-                      target_cov,
-                      randomizer_precision)
-
-def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
-
-    while True:
-        Zval = np.random.normal(true_mean, 1, n)
-        omega = np.random.normal(0, 1)
-        target_Z = (np.sum(Zval) / np.sqrt(n))
-        check = target_Z + omega - threshold
-        if check>0.:
-            break
-
-    approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
-
-    boot_sample = []
-    for b in range(B):
-        Zval_boot = np.sum(Zval[np.random.choice(n, n, replace=True)]) / np.sqrt(n)
-        boot_sample.append(mle_map(Zval_boot)[0])
-
-    return boot_sample, np.mean(boot_sample), np.std(boot_sample), np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample))
-
-
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
@@ -119,4 +81,4 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='blue', marker='^')
     plt.plot(grid, grid, c='red', marker='^')
-    plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/boot_selective_MLE_lasso.png")
+    plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/boot_selective_MLE_lasso_no_signal.png")
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index 5549ff0be..5fd9d7913 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -80,7 +80,7 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
     import matplotlib.pyplot as plt
 
     plt.clf()
-    boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
+    boot_result = bootstrap_simple(n= 100, B=1000, true_mean=1., threshold=2.)
     boot_pivot = boot_result[3]
     print("boot sample", boot_pivot.shape)
     ecdf = ECDF(ndist.cdf(boot_pivot))

From 81dfc674b4a062e9de07f2ca756357947bbee047 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 11 Nov 2017 21:32:11 -0800
Subject: [PATCH 357/617] added two more tests for the simple example

---
 selection/adjusted_MLE/selective_MLE.py       | 13 +------
 selection/adjusted_MLE/tests/test_MLE.py      | 14 ++++---
 .../adjusted_MLE/tests/test_simple_problem.py | 37 +++++++++++++++++++
 3 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index b85dca52a..4d857356a 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -1,4 +1,5 @@
-
+import numpy as np
+import functools
 from selection.randomized.M_estimator import M_estimator
 
 class M_estimator_map(M_estimator):
@@ -47,16 +48,6 @@ def solve_map(self):
         self.data_offset = self._score_linear_term.dot(self.observed_score_state)- self.A.dot(self.target_observed)
         self.target_transform = (self.A, self.data_offset )
 
-    # def setup_map(self, j):
-    #
-    #     self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
-    #     self.null_statistic = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
-    #
-    #     self.offset_active = self._opt_affine_term[:self.nactive] + self.null_statistic[:self.nactive]
-    #     self.offset_inactive = self.null_statistic[self.nactive:]
-
-import numpy as np
-import functools
 
 def solve_UMVU(target_transform,
                opt_transform,
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 26e4d6417..b9abb3e06 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -54,8 +54,8 @@ def test_bias_lasso(nsim = 500):
 #test_bias_lasso()
 
 def bootstrap_lasso(B=500):
-    p = 100
-    run_lasso = test_lasso(n=100, p=p, s=0, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.)
+    p = 200
+    run_lasso = test_lasso(n=100, p=p, s=10, signal=7., seed_n = 0, lam_frac=1., randomization_scale=1.)
 
     boot_sample = np.zeros((B,run_lasso[3].sum()))
     for b in range(B):
@@ -68,17 +68,19 @@ def bootstrap_lasso(B=500):
     centered_boot_sample = boot_sample - boot_sample.mean(0)[None, :]
     std_boot_sample = centered_boot_sample/(boot_sample.std(0)[None,:])
 
-    return std_boot_sample.reshape((B * run_lasso[3].sum(),))
-
+    return std_boot_sample.reshape((B * run_lasso[3].sum(),)), \
+           np.mean(centered_boot_sample.reshape((B * run_lasso[3].sum(),)))
 
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
     plt.clf()
-    boot_pivot = bootstrap_lasso(B=10000)
+    bootstrap = bootstrap_lasso(B=10000)
+    boot_pivot = bootstrap[0]
     ecdf = ECDF(ndist.cdf(boot_pivot))
     grid = np.linspace(0, 1, 101)
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='blue', marker='^')
     plt.plot(grid, grid, c='red', marker='^')
-    plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/boot_selective_MLE_lasso_no_signal.png")
+    #plt.show()
+    plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/boot_selective_MLE_lasso_p200.png")
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index 5fd9d7913..3efeed8dc 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -24,6 +24,43 @@ def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
                       target_cov,
                       randomizer_precision)
 
+
+def sim_simple_problem(true_mean, threshold=2, randomization_scale=1.):
+    while True:
+        Z, W = np.random.standard_normal(2)
+        Z += true_mean
+        W *= randomization_scale
+        if Z + W > threshold:
+            return Z
+
+
+def check_unbiased(true_mean, threshold=2, randomization_scale=1., nsim=5000):
+    bias = 0
+    for _ in range(nsim):
+        Z = sim_simple_problem(true_mean, threshold, randomization_scale)
+        est = simple_problem(Z, threshold=threshold, randomization_scale=randomization_scale)[0]
+        bias += est - true_mean
+
+    return bias / nsim
+
+
+def test_orthogonal_lasso(n=5):
+    Zval = np.random.normal(0, 1, n)
+    print("observed Z" + str(Zval) + "\n")
+    approx_MLE = simple_problem(Zval, threshold=2, randomization_scale=1.)[0]
+
+    approx_MLE2 = [simple_problem(z, threshold=2, randomization_scale=1.)[0] for z in Zval]
+    mu_seq = np.linspace(-6, 6, 2500)
+    grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
+
+    exact_MLE = []
+    for k in range(Zval.shape[0]):
+        mle = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
+        exact_MLE.append(mle)
+
+    return approx_MLE, np.asarray(exact_MLE), np.asarray(approx_MLE2)
+
+
 def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 
     while True:

From b603d6e9c0688e6e88e4798fbe834c4170a78de7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sun, 12 Nov 2017 11:39:38 -0800
Subject: [PATCH 358/617] C code for solving optimization problem in selective
 MLE

---
 C-software                                   |  2 +-
 selection/randomized/selective_MLE_utils.pyx | 48 ++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 selection/randomized/selective_MLE_utils.pyx

diff --git a/C-software b/C-software
index 563bf1aa3..0b35c6ed8 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 563bf1aa370b55f8343693224717047f1df0d0c3
+Subproject commit 0b35c6ed8537cef9aabed526b968b1c63d2f6cb8
diff --git a/selection/randomized/selective_MLE_utils.pyx b/selection/randomized/selective_MLE_utils.pyx
new file mode 100644
index 000000000..4ce8712db
--- /dev/null
+++ b/selection/randomized/selective_MLE_utils.pyx
@@ -0,0 +1,48 @@
+import warnings
+import numpy as np, cython
+from regreg.api import power_L
+
+cimport numpy as np
+
+DTYPE_float = np.float
+ctypedef np.float_t DTYPE_float_t
+DTYPE_int = np.int
+ctypedef np.int_t DTYPE_int_t
+
+cdef extern from "randomized_lasso.h":
+
+    void barrier_solve(double *gradient,                   # Gradient vector
+                       double *opt_variable,               # Optimization variable
+                       double *opt_proposed,               # New value of optimization variable
+                       double *conjugate_arg,              # Argument to conjugate of Gaussian
+                       double *precision,                  # Precision matrix of Gaussian
+                       double *scaling,                    # Diagonal scaling matrix for log barrier
+                       int ndim,                           # Dimension of opt_variable
+                       int max_iter,                       # Maximum number of iterations
+                       double value_tol,                   # Tolerance for convergence based on value
+                       double initial_step)                # Initial stepsize 
+
+def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient vector
+                   np.ndarray[DTYPE_float_t, ndim=1] opt_variable,  # Optimization variable
+                   np.ndarray[DTYPE_float_t, ndim=1] opt_proposed,  # New value of optimization variable
+                   np.ndarray[DTYPE_float_t, ndim=1] conjugate_arg, # Argument to conjugate of Gaussian
+                   np.ndarray[DTYPE_float_t, ndim=2] precision,     # Precision matrix of Gaussian
+                   np.ndarray[DTYPE_float_t, ndim=1] scaling,       # Diagonal scaling matrix for log barrier
+                   int max_iter=100,
+                   double value_tol=1.e-6):
+   
+    initial_step = power_L(precision)
+    ndim = precision.shape[0]
+
+    value = barrier_solve(<double *>gradient.data,
+                           <double *>opt_variable.data,
+                           <double *>opt_proposed.data,
+                           <double *>conjugate_arg.data,
+                           <double *>precision.data,
+                           <double *>scaling.data,
+                           ndim,
+                           max_iter,
+                           value_tol,
+                           initial_step)
+
+    return opt_variable, value

From a635a4bf4037d3bcae9fc39aa83457649ea0a174 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jm2.SUNet>
Date: Mon, 13 Nov 2017 09:37:01 -0800
Subject: [PATCH 359/617] corrected bootstrap

---
 selection/adjusted_MLE/tests/test_MLE.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index b9abb3e06..2ac50754c 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -38,7 +38,7 @@ def test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomizati
                                                 M_est.target_cov,
                                                 M_est.randomizer_precision)
 
-        return np.mean(approx_MLE- true_target), approx_MLE, M_est.target_observed, active, X.T.dot(y), \
+        return np.mean(approx_MLE- true_target), approx_MLE, M_est.target_observed, active, X, y,\
                np.linalg.inv(X[:, active].T.dot(X[:, active])), mle_map
     else:
         return None
@@ -54,16 +54,16 @@ def test_bias_lasso(nsim = 500):
 #test_bias_lasso()
 
 def bootstrap_lasso(B=500):
-    p = 200
+    p = 50
     run_lasso = test_lasso(n=100, p=p, s=10, signal=7., seed_n = 0, lam_frac=1., randomization_scale=1.)
 
     boot_sample = np.zeros((B,run_lasso[3].sum()))
     for b in range(B):
-        boot_vector = (run_lasso[4])[np.random.choice(p, p, replace=True)]
-        #print("shape", boot_vector.shape)
+        boot_indices = np.random.choice(p, p, replace=True)
+        boot_vector = ((run_lasso[4])[boot_indices,:]).T.dot((run_lasso[5])[boot_indices])
         active = run_lasso[3]
-        target_boot = (run_lasso[5]).dot(boot_vector[active])
-        boot_sample[b, :] = (run_lasso[6](target_boot))[0]
+        target_boot = (run_lasso[6]).dot(boot_vector[active])
+        boot_sample[b, :] = (run_lasso[7](target_boot))[0]
 
     centered_boot_sample = boot_sample - boot_sample.mean(0)[None, :]
     std_boot_sample = centered_boot_sample/(boot_sample.std(0)[None,:])
@@ -82,5 +82,5 @@ def bootstrap_lasso(B=500):
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='blue', marker='^')
     plt.plot(grid, grid, c='red', marker='^')
-    #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/boot_selective_MLE_lasso_p200.png")
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/boot_selective_MLE_lasso_p200.png")

From b04366ff39608428db2e08513cf97a87d8501158 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jm2.SUNet>
Date: Mon, 13 Nov 2017 09:41:26 -0800
Subject: [PATCH 360/617] small correction

---
 selection/adjusted_MLE/tests/test_MLE.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 2ac50754c..da2e5df9b 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -54,12 +54,13 @@ def test_bias_lasso(nsim = 500):
 #test_bias_lasso()
 
 def bootstrap_lasso(B=500):
-    p = 50
-    run_lasso = test_lasso(n=100, p=p, s=10, signal=7., seed_n = 0, lam_frac=1., randomization_scale=1.)
+    p = 200
+    n= 100
+    run_lasso = test_lasso(n=n, p=p, s=10, signal=7., seed_n = 0, lam_frac=1., randomization_scale=1.)
 
     boot_sample = np.zeros((B,run_lasso[3].sum()))
     for b in range(B):
-        boot_indices = np.random.choice(p, p, replace=True)
+        boot_indices = np.random.choice(n, n, replace=True)
         boot_vector = ((run_lasso[4])[boot_indices,:]).T.dot((run_lasso[5])[boot_indices])
         active = run_lasso[3]
         target_boot = (run_lasso[6]).dot(boot_vector[active])

From 7502f625878694e2b85344c5b1ec85236fe4f027 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jm2.SUNet>
Date: Mon, 13 Nov 2017 12:09:04 -0800
Subject: [PATCH 361/617] added map for one-dimensional problem

---
 selection/adjusted_MLE/selective_MLE.py       | 57 +++++++++------
 selection/adjusted_MLE/tests/test_MLE.py      | 11 +--
 .../adjusted_MLE/tests/test_MLE_univariate.py | 69 +++++++++++++++++++
 3 files changed, 110 insertions(+), 27 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/test_MLE_univariate.py

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 4d857356a..99d255d0f 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -9,44 +9,51 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
         self.randomizer = randomization
         self.randomization_scale = randomization_scale
 
-    def solve_map(self):
         self.solve()
-        nactive = self._overall.sum()
+        self.nactive = self._overall.sum()
         (_opt_linear_term, _opt_affine_term) = self.opt_transform
         self._opt_linear_term = np.concatenate(
             (_opt_linear_term[self._overall, :], _opt_linear_term[~self._overall, :]), 0)
         self._opt_affine_term = np.concatenate((_opt_affine_term[self._overall],
-                                                _opt_affine_term[~self._overall]+self.observed_opt_state[nactive:]), 0)
-        self._opt_linear_term = self._opt_linear_term[:,:self._overall.sum()]
-        #print("shape", self._opt_linear_term[:,:self._overall.sum()] .shape)
+                                                _opt_affine_term[~self._overall] + self.observed_opt_state[self.nactive:]),
+                                               0)
+        self._opt_linear_term = self._opt_linear_term[:, :self._overall.sum()]
         self.opt_transform = (self._opt_linear_term, self._opt_affine_term)
-
         (_score_linear_term, _) = self.score_transform
         self._score_linear_term = np.concatenate(
             (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
         self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
 
-        self.feasible_point = np.abs(self.initial_soln[self._overall])
-
         X, _ = self.loss.data
         n, p = X.shape
         self.p = p
-        self.randomizer_precision = (1./self.randomization_scale)* np.identity(p)
+        self.randomizer_precision = (1. / self.randomization_scale) * np.identity(p)
 
         score_cov = np.zeros((p, p))
-        X_active_inv = np.linalg.inv(X[:,self._overall].T.dot(X[:,self._overall]))
-        projection_perp = np.identity(n) - X[:,self._overall].dot(X_active_inv).dot( X[:,self._overall].T)
-        score_cov[:nactive, :nactive] = X_active_inv
-        score_cov[nactive:, nactive:] = X[:,~self._overall].T.dot(projection_perp).dot(X[:,~self._overall])
-
-        self.score_target_cov = score_cov[:, :nactive]
-        self.target_cov = score_cov[:nactive, :nactive]
-        self.target_observed = self.observed_internal_state[:nactive]
+        X_active_inv = np.linalg.inv(X[:, self._overall].T.dot(X[:, self._overall]))
+        projection_perp = np.identity(n) - X[:, self._overall].dot(X_active_inv).dot(X[:, self._overall].T)
+        score_cov[:self.nactive, :self.nactive] = X_active_inv
+        score_cov[self.nactive:, self.nactive:] = X[:, ~self._overall].T.dot(projection_perp).dot(X[:, ~self._overall])
+        self.score_cov = score_cov
         self.observed_score_state = self.observed_internal_state
+        self.target_observed = self.observed_internal_state[:self.nactive]
+        self.score_target_cov = self.score_cov[:, :self.nactive]
+        self.target_cov = self.score_cov[:self.nactive, :self.nactive]
 
-        self.A = np.dot(self._score_linear_term, self.score_target_cov[:,:nactive]).dot(np.linalg.inv(self.target_cov))
+    def solve_map(self):
+        self.feasible_point = np.abs(self.initial_soln[self._overall])
+
+        self.A = np.dot(self._score_linear_term, self.score_target_cov).dot(np.linalg.inv(self.target_cov))
         self.data_offset = self._score_linear_term.dot(self.observed_score_state)- self.A.dot(self.target_observed)
-        self.target_transform = (self.A, self.data_offset )
+        self.target_transform = (self.A, self.data_offset)
+
+    def solve_map_univariate_target(self, j):
+        self.feasible_point = np.abs(self.initial_soln[self._overall])[j]
+
+        self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
+        self.data_offset = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
+        self.target_transform = (self.A.reshape((self.A.shape[0],1)),
+                                 self.data_offset.reshape((self.data_offset.shape[0],1)))
 
 
 def solve_UMVU(target_transform,
@@ -65,15 +72,18 @@ def solve_UMVU(target_transform,
     nopt = B.shape[1]
     ntarget = A.shape[1]
 
-    # XXX should be able to do vector version as well
-    # but for now code assumes 1dim
     #assert ntarget == 1
 
     # setup joint implied covariance matrix
+    if ntarget>1:
+        target_precision = np.linalg.inv(target_cov)
+    else:
+        target_precision = 1./target_cov
+        opt_offset = opt_offset.reshape((opt_offset.shape[0],1))
 
-    target_precision = np.linalg.inv(target_cov)
     implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
 
+    #print("shapes", A.shape, (A.T.dot(randomizer_precision).dot(A)).shape, target_precision.shape)
     implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
     implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
     implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
@@ -89,14 +99,17 @@ def solve_UMVU(target_transform,
     M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
     conditioned_value = data_offset + opt_offset
+    #print("shapes", data_offset.shape, opt_offset.shape, conditioned_value.shape)
 
     linear_term = implied_cross.T.dot(np.linalg.inv(implied_target))
     offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
+    #print("check shapes", linear_term.dot(target_observed).shape, offset_term.shape)
     natparam_transform = (linear_term, offset_term)
     conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
 
     conditional_precision = implied_precision[ntarget:,ntarget:]
 
+    #print("check shapes", conditional_natural_parameter.shape, conditional_precision.shape)
     soln, value = solve_barrier_nonneg(conditional_natural_parameter,
                                        conditional_precision,
                                        feasible_point=feasible_point)
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index da2e5df9b..e202e6dd3 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -39,7 +39,7 @@ def test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomizati
                                                 M_est.randomizer_precision)
 
         return np.mean(approx_MLE- true_target), approx_MLE, M_est.target_observed, active, X, y,\
-               np.linalg.inv(X[:, active].T.dot(X[:, active])), mle_map
+               np.linalg.inv(X[:, active].T.dot(X[:, active])), mle_map, true_target
     else:
         return None
 
@@ -56,7 +56,7 @@ def test_bias_lasso(nsim = 500):
 def bootstrap_lasso(B=500):
     p = 200
     n= 100
-    run_lasso = test_lasso(n=n, p=p, s=10, signal=7., seed_n = 0, lam_frac=1., randomization_scale=1.)
+    run_lasso = test_lasso(n=n, p=p, s=20, signal=7., seed_n = 0, lam_frac=1., randomization_scale=1.)
 
     boot_sample = np.zeros((B,run_lasso[3].sum()))
     for b in range(B):
@@ -65,8 +65,9 @@ def bootstrap_lasso(B=500):
         active = run_lasso[3]
         target_boot = (run_lasso[6]).dot(boot_vector[active])
         boot_sample[b, :] = (run_lasso[7](target_boot))[0]
-
-    centered_boot_sample = boot_sample - boot_sample.mean(0)[None, :]
+    true_target = run_lasso[8]
+    #centered_boot_sample = boot_sample - boot_sample.mean(0)[None, :]
+    centered_boot_sample = boot_sample - true_target[None, :]
     std_boot_sample = centered_boot_sample/(boot_sample.std(0)[None,:])
 
     return std_boot_sample.reshape((B * run_lasso[3].sum(),)), \
@@ -84,4 +85,4 @@ def bootstrap_lasso(B=500):
     plt.plot(grid, ecdf(grid), c='blue', marker='^')
     plt.plot(grid, grid, c='red', marker='^')
     plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/boot_selective_MLE_lasso_p200.png")
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/true_target_boot_selective_MLE_lasso_p200.png")
diff --git a/selection/adjusted_MLE/tests/test_MLE_univariate.py b/selection/adjusted_MLE/tests/test_MLE_univariate.py
new file mode 100644
index 000000000..b29365c6f
--- /dev/null
+++ b/selection/adjusted_MLE/tests/test_MLE_univariate.py
@@ -0,0 +1,69 @@
+from __future__ import print_function
+import numpy as np, sys
+
+import regreg.api as rr
+from selection.tests.instance import gaussian_instance
+from scipy.stats import norm as ndist
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
+from statsmodels.distributions.empirical_distribution import ECDF
+
+def boot_lasso(n=100, p=50, s=5, signal=5., B=1000, seed_n = 0, lam_frac=1., randomization_scale=1.):
+
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+    n, p = X.shape
+
+    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    loss = rr.glm.gaussian(X, y)
+    epsilon = 1. / np.sqrt(n)
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
+    active = M_est._overall
+    nactive = np.sum(active)
+    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
+
+    true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+
+    if nactive > 0:
+        boot_sample = np.zeros((B, nactive))
+        for k in range(nactive):
+            M_est.solve_map_univariate_target(k)
+            approx_MLE, value, mle_map = solve_UMVU(M_est.target_transform,
+                                                    M_est.opt_transform,
+                                                    np.array([M_est.target_observed]),
+                                                    M_est.feasible_point,
+                                                    M_est.target_cov[k,k],
+                                                    M_est.randomizer_precision)
+
+            for b in range(B):
+                boot_indices = np.random.choice(n, n, replace=True)
+                boot_vector = (X[boot_indices, :]).T.dot(y[boot_indices])
+                target_boot = ((np.linalg.inv(X[:, active].T.dot(X[:, active]))).dot(boot_vector[active]))[j]
+                boot_sample[b,k] = (mle_map(target_boot))[0]
+
+            sys.stderr.write("iteration completed" + str(k) + "\n")
+
+        centered_boot_sample = boot_sample - boot_sample.mean(0)[None, :]
+        std_boot_sample = centered_boot_sample / (boot_sample.std(0)[None, :])
+
+        return std_boot_sample.reshape((B * nactive,))
+    else:
+        return None
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    plt.clf()
+    bootstrap = boot_lasso(n=100, p=50, s=5, signal=5., B=5000, seed_n = 0, lam_frac=1., randomization_scale=1.)
+    boot_pivot = bootstrap
+    ecdf = ECDF(ndist.cdf(boot_pivot))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='blue', marker='^')
+    #plt.plot(grid, grid, c='red', marker='^')
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/only_boot_selective_MLE_lasso_p50.png")
\ No newline at end of file

From c49a9cc6784a6282fde591a77e8125cc2f409512 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jm2.SUNet>
Date: Mon, 13 Nov 2017 12:45:33 -0800
Subject: [PATCH 362/617] approx sd by bootstrap

---
 selection/adjusted_MLE/tests/test_MLE.py | 27 +++++++++++++-----------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index e202e6dd3..d5480c37d 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -51,12 +51,11 @@ def test_bias_lasso(nsim = 500):
 
     print(bias/nsim)
 
-#test_bias_lasso()
 
-def bootstrap_lasso(B=500):
-    p = 200
+def bootstrap_lasso(B=500, seed_n=0):
+    p = 50
     n= 100
-    run_lasso = test_lasso(n=n, p=p, s=20, signal=7., seed_n = 0, lam_frac=1., randomization_scale=1.)
+    run_lasso = test_lasso(n=n, p=p, s=10, signal=7., seed_n = seed_n, lam_frac=1., randomization_scale=1.)
 
     boot_sample = np.zeros((B,run_lasso[3].sum()))
     for b in range(B):
@@ -65,21 +64,25 @@ def bootstrap_lasso(B=500):
         active = run_lasso[3]
         target_boot = (run_lasso[6]).dot(boot_vector[active])
         boot_sample[b, :] = (run_lasso[7](target_boot))[0]
+
     true_target = run_lasso[8]
-    #centered_boot_sample = boot_sample - boot_sample.mean(0)[None, :]
-    centered_boot_sample = boot_sample - true_target[None, :]
-    std_boot_sample = centered_boot_sample/(boot_sample.std(0)[None,:])
+    std_boot_sample = np.true_divide((run_lasso[1]- true_target),boot_sample.std(0))
 
-    return std_boot_sample.reshape((B * run_lasso[3].sum(),)), \
-           np.mean(centered_boot_sample.reshape((B * run_lasso[3].sum(),)))
+    return std_boot_sample
 
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
+    ndraw = 50
+    boot_pivot= []
+    for i in range(ndraw):
+        pivot = bootstrap_lasso(B=5000, seed_n=i)
+        for j in range(pivot.shape[0]):
+            boot_pivot.append(pivot[j])
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+    print("boot pivot", boot_pivot)
     plt.clf()
-    bootstrap = bootstrap_lasso(B=10000)
-    boot_pivot = bootstrap[0]
-    ecdf = ECDF(ndist.cdf(boot_pivot))
+    ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
     grid = np.linspace(0, 1, 101)
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='blue', marker='^')

From 460181b615f443fa651bfec48da110f0bac93fcd Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jm2.SUNet>
Date: Mon, 13 Nov 2017 13:47:37 -0800
Subject: [PATCH 363/617] rearranged code

---
 selection/adjusted_MLE/selective_MLE.py       |  7 ++--
 selection/adjusted_MLE/tests/test_MLE.py      | 36 +++++++------------
 .../adjusted_MLE/tests/test_simple_problem.py | 22 ++++++++----
 3 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 99d255d0f..fd09b87db 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -75,11 +75,8 @@ def solve_UMVU(target_transform,
     #assert ntarget == 1
 
     # setup joint implied covariance matrix
-    if ntarget>1:
-        target_precision = np.linalg.inv(target_cov)
-    else:
-        target_precision = 1./target_cov
-        opt_offset = opt_offset.reshape((opt_offset.shape[0],1))
+
+    target_precision = np.linalg.inv(target_cov)
 
     implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
 
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index d5480c37d..83918b14c 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -8,7 +8,7 @@
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from statsmodels.distributions.empirical_distribution import ECDF
 
-def test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
+def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., randomization_scale=1.):
 
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
@@ -38,8 +38,15 @@ def test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomizati
                                                 M_est.target_cov,
                                                 M_est.randomizer_precision)
 
-        return np.mean(approx_MLE- true_target), approx_MLE, M_est.target_observed, active, X, y,\
-               np.linalg.inv(X[:, active].T.dot(X[:, active])), mle_map, true_target
+        boot_sample = np.zeros((B, nactive))
+        for b in range(B):
+            boot_indices = np.random.choice(n, n, replace=True)
+            boot_vector = (X[boot_indices, :]).T.dot(y[boot_indices])
+            target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector[active])
+            boot_sample[b, :] = mle_map(target_boot)[0]
+
+        print("estimated sd", boot_sample.std(0))
+        return np.true_divide((approx_MLE- true_target), boot_sample.std(0))
     else:
         return None
 
@@ -52,35 +59,16 @@ def test_bias_lasso(nsim = 500):
     print(bias/nsim)
 
 
-def bootstrap_lasso(B=500, seed_n=0):
-    p = 50
-    n= 100
-    run_lasso = test_lasso(n=n, p=p, s=10, signal=7., seed_n = seed_n, lam_frac=1., randomization_scale=1.)
-
-    boot_sample = np.zeros((B,run_lasso[3].sum()))
-    for b in range(B):
-        boot_indices = np.random.choice(n, n, replace=True)
-        boot_vector = ((run_lasso[4])[boot_indices,:]).T.dot((run_lasso[5])[boot_indices])
-        active = run_lasso[3]
-        target_boot = (run_lasso[6]).dot(boot_vector[active])
-        boot_sample[b, :] = (run_lasso[7](target_boot))[0]
-
-    true_target = run_lasso[8]
-    std_boot_sample = np.true_divide((run_lasso[1]- true_target),boot_sample.std(0))
-
-    return std_boot_sample
-
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
     ndraw = 50
     boot_pivot= []
     for i in range(ndraw):
-        pivot = bootstrap_lasso(B=5000, seed_n=i)
+        pivot = test_lasso(n=100, p=50, s=5, signal=5., B= 5000, seed_n = 0)
         for j in range(pivot.shape[0]):
             boot_pivot.append(pivot[j])
         sys.stderr.write("iteration completed" + str(i) + "\n")
-    print("boot pivot", boot_pivot)
     plt.clf()
     ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
     grid = np.linspace(0, 1, 101)
@@ -88,4 +76,4 @@ def bootstrap_lasso(B=500, seed_n=0):
     plt.plot(grid, ecdf(grid), c='blue', marker='^')
     plt.plot(grid, grid, c='red', marker='^')
     plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/true_target_boot_selective_MLE_lasso_p200.png")
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/boot_selective_MLE_lasso_p50.png")
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index 3efeed8dc..8aa7d80b4 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -66,11 +66,12 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
     while True:
         Zval = np.random.normal(true_mean, 1, n)
         omega = np.random.normal(0, 1)
-        target_Z = (np.sum(Zval) / np.sqrt(n))
+        target_Z = ((Zval).sum())/np.sqrt(n)
         check = target_Z + omega - threshold
         if check>0.:
             break
 
+    print("target Z", Zval, target_Z)
     approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
 
     boot_sample = []
@@ -78,8 +79,10 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
         Zval_boot = np.sum(Zval[np.random.choice(n, n, replace=True)]) / np.sqrt(n)
         boot_sample.append(mle_map(Zval_boot)[0])
 
+    print("approx_MLE", approx_MLE, np.std(boot_sample), true_mean)
     return boot_sample, np.mean(boot_sample), np.std(boot_sample), \
-           np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample))
+           np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample)), \
+           np.true_divide(approx_MLE - true_mean, np.std(boot_sample))
 
 # if __name__ == "__main__":
 #     n = 1000
@@ -116,12 +119,17 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    plt.clf()
-    boot_result = bootstrap_simple(n= 100, B=1000, true_mean=1., threshold=2.)
-    boot_pivot = boot_result[3]
-    print("boot sample", boot_pivot.shape)
-    ecdf = ECDF(ndist.cdf(boot_pivot))
+    ndraw = 100
+    boot_pivot=[]
+    for i in range(ndraw):
+        boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
+        boot_pivot.append(boot_result[4])
+
+    print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)
+    ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
     grid = np.linspace(0, 1, 101)
+
+    plt.clf()
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.show()
\ No newline at end of file

From 2fb75638ee9992418ec0e2a0c4c365c5ea5397e5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jm2.SUNet>
Date: Mon, 13 Nov 2017 13:57:37 -0800
Subject: [PATCH 364/617] cleaned bootstrap for simple problem

---
 selection/adjusted_MLE/tests/test_simple_problem.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index 8aa7d80b4..edcda158f 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -71,7 +71,6 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
         if check>0.:
             break
 
-    print("target Z", Zval, target_Z)
     approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
 
     boot_sample = []
@@ -82,7 +81,7 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
     print("approx_MLE", approx_MLE, np.std(boot_sample), true_mean)
     return boot_sample, np.mean(boot_sample), np.std(boot_sample), \
            np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample)), \
-           np.true_divide(approx_MLE - true_mean, np.std(boot_sample))
+           np.true_divide(approx_MLE - np.sqrt(n)*true_mean, np.std(boot_sample))
 
 # if __name__ == "__main__":
 #     n = 1000
@@ -122,7 +121,7 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
     ndraw = 100
     boot_pivot=[]
     for i in range(ndraw):
-        boot_result = bootstrap_simple(n= 100, B=1000, true_mean=0., threshold=2.)
+        boot_result = bootstrap_simple(n= 100, B=1000, true_mean=1., threshold=2.)
         boot_pivot.append(boot_result[4])
 
     print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)

From af20b632defc09716c68c4940c442e68d90cb0c2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jm2.SUNet>
Date: Mon, 13 Nov 2017 14:27:12 -0800
Subject: [PATCH 365/617] set seed properly

---
 selection/adjusted_MLE/tests/test_MLE.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 83918b14c..bcc7d115d 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -9,7 +9,7 @@
 from statsmodels.distributions.empirical_distribution import ECDF
 
 def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., randomization_scale=1.):
-
+    np.random.seed(seed_n)
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
 
@@ -65,9 +65,11 @@ def test_bias_lasso(nsim = 500):
     ndraw = 50
     boot_pivot= []
     for i in range(ndraw):
-        pivot = test_lasso(n=100, p=50, s=5, signal=5., B= 5000, seed_n = 0)
-        for j in range(pivot.shape[0]):
-            boot_pivot.append(pivot[j])
+        pivot = test_lasso(n=100, p=50, s=0, signal=5., B= 5000, seed_n = i)
+        if pivot is not None:
+            for j in range(pivot.shape[0]):
+                boot_pivot.append(pivot[j])
+
         sys.stderr.write("iteration completed" + str(i) + "\n")
     plt.clf()
     ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))

From 7067ea5084ae4074c91d25dbeb3630663d3a348a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525jm2.SUNet>
Date: Mon, 13 Nov 2017 15:18:04 -0800
Subject: [PATCH 366/617] added bias to test_MLE

---
 selection/adjusted_MLE/tests/test_MLE.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index bcc7d115d..87d206bf7 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -46,7 +46,7 @@ def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., ran
             boot_sample[b, :] = mle_map(target_boot)[0]
 
         print("estimated sd", boot_sample.std(0))
-        return np.true_divide((approx_MLE- true_target), boot_sample.std(0))
+        return np.true_divide((approx_MLE- true_target), boot_sample.std(0)), ((approx_MLE- true_target).sum())/float(nactive)
     else:
         return None
 
@@ -64,13 +64,17 @@ def test_bias_lasso(nsim = 500):
 
     ndraw = 50
     boot_pivot= []
+    bias = 0.
     for i in range(ndraw):
-        pivot = test_lasso(n=100, p=50, s=0, signal=5., B= 5000, seed_n = i)
-        if pivot is not None:
+        boot = test_lasso(n=100, p=50, s=0, signal=5., B= 10000, seed_n = i)
+        if boot is not None:
+            pivot = boot[0]
+            bias += boot[1]
             for j in range(pivot.shape[0]):
                 boot_pivot.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
+    sys.stderr.write("overall_bias" + str(bias/float(ndraw)) + "\n")
     plt.clf()
     ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
     grid = np.linspace(0, 1, 101)

From c3cceae9c996c25ceb05f71a5d573296c16916c0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 13 Nov 2017 17:51:49 -0800
Subject: [PATCH 367/617] work on bootstrap stuff

---
 selection/adjusted_MLE/tests/test_MLE.py      | 33 ++++++++++--------
 .../adjusted_MLE/tests/test_simple_problem.py | 34 +++++++++++--------
 2 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 87d206bf7..c8b6c63d8 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -9,7 +9,7 @@
 from statsmodels.distributions.empirical_distribution import ECDF
 
 def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., randomization_scale=1.):
-    np.random.seed(seed_n)
+    #np.random.seed(seed_n)
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
 
@@ -31,6 +31,7 @@ def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., ran
     nactive = np.sum(active)
     sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
     if nactive > 0:
+
         approx_MLE, value, mle_map = solve_UMVU(M_est.target_transform,
                                                 M_est.opt_transform,
                                                 M_est.target_observed,
@@ -39,14 +40,17 @@ def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., ran
                                                 M_est.randomizer_precision)
 
         boot_sample = np.zeros((B, nactive))
+        beta_obs = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:,active].T).dot(y)
+        resid = y - X[:, active].dot(beta_obs)
         for b in range(B):
             boot_indices = np.random.choice(n, n, replace=True)
-            boot_vector = (X[boot_indices, :]).T.dot(y[boot_indices])
-            target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector[active])
+            boot_vector = (X[boot_indices, :] [:,active]).T.dot(resid[boot_indices])
+            target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + beta_obs
             boot_sample[b, :] = mle_map(target_boot)[0]
 
         print("estimated sd", boot_sample.std(0))
         return np.true_divide((approx_MLE- true_target), boot_sample.std(0)), ((approx_MLE- true_target).sum())/float(nactive)
+        
     else:
         return None
 
@@ -62,11 +66,11 @@ def test_bias_lasso(nsim = 500):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 50
+    ndraw = 100
     boot_pivot= []
     bias = 0.
     for i in range(ndraw):
-        boot = test_lasso(n=100, p=50, s=0, signal=5., B= 10000, seed_n = i)
+        boot = test_lasso(n=300, p=1, s=1, signal=5., B= 1000, seed_n = i)
         if boot is not None:
             pivot = boot[0]
             bias += boot[1]
@@ -74,12 +78,13 @@ def test_bias_lasso(nsim = 500):
                 boot_pivot.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
-    sys.stderr.write("overall_bias" + str(bias/float(ndraw)) + "\n")
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='blue', marker='^')
-    plt.plot(grid, grid, c='red', marker='^')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/boot_selective_MLE_lasso_p50.png")
+        sys.stderr.write("overall_bias" + str(bias/float(ndraw)) + "\n")
+        if i % 10 == 0:
+            plt.clf()
+            ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
+            grid = np.linspace(0, 1, 101)
+            print("ecdf", ecdf(grid))
+            plt.plot(grid, ecdf(grid), c='red', marker='^')
+            plt.plot(grid, grid, 'k--')
+            plt.savefig("boot_selective_MLE_lasso_p50.png")
+
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index edcda158f..9a5f55810 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -63,12 +63,16 @@ def test_orthogonal_lasso(n=5):
 
 def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 
+    resid_matrix = np.identity(n) - np.ones((n,n)) / n
+    U, D, V = np.linalg.svd(resid_matrix)
+    U = U[:,:-1]
+
     while True:
-        Zval = np.random.normal(true_mean, 1, n)
-        omega = np.random.normal(0, 1)
-        target_Z = ((Zval).sum())/np.sqrt(n)
-        check = target_Z + omega - threshold
-        if check>0.:
+        target_Z, omega = np.random.standard_normal(2)
+        target_Z += true_mean * np.sqrt(n)
+        if target_Z + omega > threshold:
+            Zval = U.dot(np.random.standard_normal(n-1))
+            Zval += target_Z * np.ones(n) / np.sqrt(n)
             break
 
     approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
@@ -118,17 +122,19 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 100
+    ndraw = 200
     boot_pivot=[]
     for i in range(ndraw):
-        boot_result = bootstrap_simple(n= 100, B=1000, true_mean=1., threshold=2.)
+        boot_result = bootstrap_simple(n=300, B=5000, true_mean=0., threshold=2.)
         boot_pivot.append(boot_result[4])
 
-    print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)
-    ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
-    grid = np.linspace(0, 1, 101)
+        print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)
+        ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
+        grid = np.linspace(0, 1, 101)
 
-    plt.clf()
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.show()
\ No newline at end of file
+        if i % 10 == 0:
+            plt.clf()
+            print("ecdf", ecdf(grid))
+            plt.plot(grid, ecdf(grid), c='red', marker='^')
+            plt.plot([0,1],[0,1], 'k--')
+            plt.savefig('bootstrap_simple.png')

From 9a81eeaf204efd5908ab858bdec8730e5e23d08e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Mon, 13 Nov 2017 23:57:23 -0800
Subject: [PATCH 368/617] update bootstrap

---
 selection/adjusted_MLE/tests/test_MLE.py      | 47 ++++++++++---------
 .../adjusted_MLE/tests/test_simple_problem.py | 34 ++++++++------
 2 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 87d206bf7..1fb003825 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -8,8 +8,9 @@
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from statsmodels.distributions.empirical_distribution import ECDF
 
-def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., randomization_scale=1.):
-    np.random.seed(seed_n)
+
+def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., randomization_scale=1.):
+    # np.random.seed(seed_n)
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
 
@@ -27,10 +28,11 @@ def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., ran
     active = M_est._overall
 
     true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-    #true_target = beta[active]
+    # true_target = beta[active]
     nactive = np.sum(active)
     sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
     if nactive > 0:
+
         approx_MLE, value, mle_map = solve_UMVU(M_est.target_transform,
                                                 M_est.opt_transform,
                                                 M_est.target_observed,
@@ -39,34 +41,38 @@ def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., ran
                                                 M_est.randomizer_precision)
 
         boot_sample = np.zeros((B, nactive))
+        beta_obs = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(y)
+        resid = y - X[:, active].dot(beta_obs)
         for b in range(B):
             boot_indices = np.random.choice(n, n, replace=True)
-            boot_vector = (X[boot_indices, :]).T.dot(y[boot_indices])
-            target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector[active])
+            boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
+            target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + beta_obs
             boot_sample[b, :] = mle_map(target_boot)[0]
 
         print("estimated sd", boot_sample.std(0))
-        return np.true_divide((approx_MLE- true_target), boot_sample.std(0)), ((approx_MLE- true_target).sum())/float(nactive)
+        return np.true_divide((approx_MLE - true_target), boot_sample.std(0)), (
+        (approx_MLE - true_target).sum()) / float(nactive)
+
     else:
         return None
 
-def test_bias_lasso(nsim = 500):
 
+def test_bias_lasso(nsim=500):
     bias = 0
     for _ in range(nsim):
-        bias += test_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.)[0]
+        bias += test_lasso(n=100, p=50, s=5, signal=5., seed_n=0, lam_frac=1., randomization_scale=1.)[0]
 
-    print(bias/nsim)
+    print(bias / nsim)
 
 
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 50
-    boot_pivot= []
+    ndraw = 100
+    boot_pivot = []
     bias = 0.
     for i in range(ndraw):
-        boot = test_lasso(n=100, p=50, s=0, signal=5., B= 10000, seed_n = i)
+        boot = test_lasso(n=300, p=1, s=1, signal=5., B=1000, seed_n=i)
         if boot is not None:
             pivot = boot[0]
             bias += boot[1]
@@ -74,12 +80,11 @@ def test_bias_lasso(nsim = 500):
                 boot_pivot.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
-    sys.stderr.write("overall_bias" + str(bias/float(ndraw)) + "\n")
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='blue', marker='^')
-    plt.plot(grid, grid, c='red', marker='^')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/boot_selective_MLE_lasso_p50.png")
+        sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
+        if i % 10 == 0:
+            plt.clf()
+            ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
+            grid = np.linspace(0, 1, 101)
+            print("ecdf", ecdf(grid))
+            plt.plot(grid, ecdf(grid), c='red', marker='^')
+            plt.plot(grid, grid, 'k--')
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index edcda158f..aa6a07da1 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -63,12 +63,16 @@ def test_orthogonal_lasso(n=5):
 
 def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 
+    resid_matrix = np.identity(n) - np.ones((n,n)) / n
+    U, D, V = np.linalg.svd(resid_matrix)
+    U = U[:,:-1]
+
     while True:
-        Zval = np.random.normal(true_mean, 1, n)
-        omega = np.random.normal(0, 1)
-        target_Z = ((Zval).sum())/np.sqrt(n)
-        check = target_Z + omega - threshold
-        if check>0.:
+        target_Z, omega = np.random.standard_normal(2)
+        target_Z += true_mean * np.sqrt(n)
+        if target_Z + omega > threshold:
+            Zval = U.dot(np.random.standard_normal(n-1))
+            Zval += target_Z * np.ones(n) / np.sqrt(n)
             break
 
     approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
@@ -118,17 +122,19 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 100
+    ndraw = 200
     boot_pivot=[]
     for i in range(ndraw):
-        boot_result = bootstrap_simple(n= 100, B=1000, true_mean=1., threshold=2.)
+        boot_result = bootstrap_simple(n=300, B=5000, true_mean=0., threshold=2.)
         boot_pivot.append(boot_result[4])
 
-    print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)
-    ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
-    grid = np.linspace(0, 1, 101)
+        print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)
+        ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
+        grid = np.linspace(0, 1, 101)
 
-    plt.clf()
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.show()
\ No newline at end of file
+        if i % 10 == 0:
+            plt.clf()
+            print("ecdf", ecdf(grid))
+            plt.plot(grid, ecdf(grid), c='red', marker='^')
+            plt.plot([0,1],[0,1], 'k--')
+            plt.savefig('bootstrap_simple.png')
\ No newline at end of file

From 4a0e15ace9903ab0f57916a8ed9db817455d4fd8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Tue, 14 Nov 2017 00:17:36 -0800
Subject: [PATCH 369/617] added hessian-- need to check

---
 selection/adjusted_MLE/selective_MLE.py  | 20 ++++++----
 selection/adjusted_MLE/tests/test_MLE.py | 47 ++++++++++++------------
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index fd09b87db..f61d4fad3 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -107,9 +107,9 @@ def solve_UMVU(target_transform,
     conditional_precision = implied_precision[ntarget:,ntarget:]
 
     #print("check shapes", conditional_natural_parameter.shape, conditional_precision.shape)
-    soln, value = solve_barrier_nonneg(conditional_natural_parameter,
-                                       conditional_precision,
-                                       feasible_point=feasible_point)
+    soln, value, hess = solve_barrier_nonneg(conditional_natural_parameter,
+                                             conditional_precision,
+                                             feasible_point=feasible_point)
     M_1_inv = np.linalg.inv(M_1)
     offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
     linear_term = np.vstack([M_1_inv, -M_1_inv.dot(L)])
@@ -118,14 +118,15 @@ def solve_UMVU(target_transform,
     def mle_map(natparam_transform, mle_transform, feasible_point, conditional_precision, target_observed):
         param_lin, param_offset = natparam_transform
         mle_target_lin, mle_soln_lin, mle_offset = mle_transform
-        soln, value = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
+        soln, value, hess = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
                                            conditional_precision,
                                            feasible_point=feasible_point)
-        return mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset, value
+        hessian = mle_target_lin+ mle_soln_lin.dot(hess).dot(conditional_precision).dot(param_lin)
+        return mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset, value, hessian
 
     mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, feasible_point, conditional_precision)
-    sel_MLE, value = mle_partial(target_observed)
-    return np.squeeze(sel_MLE), value, mle_partial
+    sel_MLE, value, hessian = mle_partial(target_observed)
+    return np.squeeze(sel_MLE), value, hessian, mle_partial
 
 
 def solve_barrier_nonneg(conjugate_arg,
@@ -143,6 +144,7 @@ def solve_barrier_nonneg(conjugate_arg,
 
     objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
     grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
+    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
 
     current = feasible_point
     current_value = np.inf
@@ -185,7 +187,9 @@ def solve_barrier_nonneg(conjugate_arg,
         if itercount % 4 == 0:
             step *= 2
 
-    return current, current_value
+    print("check", np.diag(barrier_hessian(current)))
+    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
+    return current, current_value, hess
 
 
 
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 1fb003825..bef5bbb9b 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -56,7 +56,6 @@ def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., random
     else:
         return None
 
-
 def test_bias_lasso(nsim=500):
     bias = 0
     for _ in range(nsim):
@@ -65,26 +64,26 @@ def test_bias_lasso(nsim=500):
     print(bias / nsim)
 
 
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 100
-    boot_pivot = []
-    bias = 0.
-    for i in range(ndraw):
-        boot = test_lasso(n=300, p=1, s=1, signal=5., B=1000, seed_n=i)
-        if boot is not None:
-            pivot = boot[0]
-            bias += boot[1]
-            for j in range(pivot.shape[0]):
-                boot_pivot.append(pivot[j])
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
-        if i % 10 == 0:
-            plt.clf()
-            ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
-            grid = np.linspace(0, 1, 101)
-            print("ecdf", ecdf(grid))
-            plt.plot(grid, ecdf(grid), c='red', marker='^')
-            plt.plot(grid, grid, 'k--')
\ No newline at end of file
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 100
+#     boot_pivot = []
+#     bias = 0.
+#     for i in range(ndraw):
+#         boot = test_lasso(n=300, p=1, s=1, signal=5., B=1000, seed_n=i)
+#         if boot is not None:
+#             pivot = boot[0]
+#             bias += boot[1]
+#             for j in range(pivot.shape[0]):
+#                 boot_pivot.append(pivot[j])
+#
+#         sys.stderr.write("iteration completed" + str(i) + "\n")
+#         sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
+#         if i % 10 == 0:
+#             plt.clf()
+#             ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
+#             grid = np.linspace(0, 1, 101)
+#             print("ecdf", ecdf(grid))
+#             plt.plot(grid, ecdf(grid), c='red', marker='^')
+#             plt.plot(grid, grid, 'k--')
\ No newline at end of file

From 082109a8e1beb9d251fb3b2d3f086a0b6e6079d0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c277.SUNet>
Date: Tue, 14 Nov 2017 09:43:22 -0800
Subject: [PATCH 370/617] added hessian argument to mle

---
 selection/adjusted_MLE/selective_MLE.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index f61d4fad3..584988628 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -96,31 +96,27 @@ def solve_UMVU(target_transform,
     M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
     conditioned_value = data_offset + opt_offset
-    #print("shapes", data_offset.shape, opt_offset.shape, conditioned_value.shape)
 
     linear_term = implied_cross.T.dot(np.linalg.inv(implied_target))
     offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
-    #print("check shapes", linear_term.dot(target_observed).shape, offset_term.shape)
     natparam_transform = (linear_term, offset_term)
     conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
 
     conditional_precision = implied_precision[ntarget:,ntarget:]
 
-    #print("check shapes", conditional_natural_parameter.shape, conditional_precision.shape)
     soln, value, hess = solve_barrier_nonneg(conditional_natural_parameter,
                                              conditional_precision,
                                              feasible_point=feasible_point)
     M_1_inv = np.linalg.inv(M_1)
     offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
-    linear_term = np.vstack([M_1_inv, -M_1_inv.dot(L)])
     mle_transform = (M_1_inv, -M_1_inv.dot(L), offset_term)
 
     def mle_map(natparam_transform, mle_transform, feasible_point, conditional_precision, target_observed):
         param_lin, param_offset = natparam_transform
         mle_target_lin, mle_soln_lin, mle_offset = mle_transform
         soln, value, hess = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
-                                           conditional_precision,
-                                           feasible_point=feasible_point)
+                                                 conditional_precision,
+                                                 feasible_point=feasible_point)
         hessian = mle_target_lin+ mle_soln_lin.dot(hess).dot(conditional_precision).dot(param_lin)
         return mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset, value, hessian
 
@@ -187,7 +183,6 @@ def solve_barrier_nonneg(conjugate_arg,
         if itercount % 4 == 0:
             step *= 2
 
-    print("check", np.diag(barrier_hessian(current)))
     hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
     return current, current_value, hess
 

From ddbf278a34ef6f48640f0c9995ac9debf2cb8555 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c277.SUNet>
Date: Tue, 14 Nov 2017 10:05:15 -0800
Subject: [PATCH 371/617] added approx fisher info in test

---
 selection/adjusted_MLE/selective_MLE.py  |  1 -
 selection/adjusted_MLE/tests/test_MLE.py | 78 +++++++++++++++++++++---
 2 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 584988628..bb7fa53bb 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -132,7 +132,6 @@ def solve_barrier_nonneg(conjugate_arg,
                          nstep=30,
                          tol=1.e-8):
 
-    #conjugate_arg = precision.dot(mean_vec)
     scaling = np.sqrt(np.diag(precision))
 
     if feasible_point is None:
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index bef5bbb9b..75290da3b 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -33,12 +33,12 @@ def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., random
     sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
     if nactive > 0:
 
-        approx_MLE, value, mle_map = solve_UMVU(M_est.target_transform,
-                                                M_est.opt_transform,
-                                                M_est.target_observed,
-                                                M_est.feasible_point,
-                                                M_est.target_cov,
-                                                M_est.randomizer_precision)
+        approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                     M_est.opt_transform,
+                                                     M_est.target_observed,
+                                                     M_est.feasible_point,
+                                                     M_est.target_cov,
+                                                     M_est.randomizer_precision)
 
         boot_sample = np.zeros((B, nactive))
         beta_obs = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(y)
@@ -56,6 +56,45 @@ def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., random
     else:
         return None
 
+def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=1.):
+    # np.random.seed(seed_n)
+    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+    n, p = X.shape
+
+    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    loss = rr.glm.gaussian(X, y)
+    epsilon = 1. / np.sqrt(n)
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
+
+    M_est.solve_map()
+    active = M_est._overall
+
+    true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+    # true_target = beta[active]
+    nactive = np.sum(active)
+    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
+    if nactive > 0:
+
+        approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                     M_est.opt_transform,
+                                                     M_est.target_observed,
+                                                     M_est.feasible_point,
+                                                     M_est.target_cov,
+                                                     M_est.randomizer_precision)
+
+        approx_std = np.sqrt(np.diag(var))
+        print("approx_std", approx_std)
+        return np.true_divide((approx_MLE - true_target), approx_std), ((approx_MLE - true_target).sum()) / float(nactive)
+
+    else:
+        return None
+
+
 def test_bias_lasso(nsim=500):
     bias = 0
     for _ in range(nsim):
@@ -86,4 +125,29 @@ def test_bias_lasso(nsim=500):
 #             grid = np.linspace(0, 1, 101)
 #             print("ecdf", ecdf(grid))
 #             plt.plot(grid, ecdf(grid), c='red', marker='^')
-#             plt.plot(grid, grid, 'k--')
\ No newline at end of file
+#             plt.plot(grid, grid, 'k--')
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    ndraw = 100
+    bias = 0.
+    pivot_obs_info= []
+    for i in range(ndraw):
+        approx = test_lasso_approx_var(n=300, p=10, s=1, signal=5.)
+        if approx is not None:
+            pivot = approx[0]
+            bias += approx[1]
+            for j in range(pivot.shape[0]):
+                pivot_obs_info.append(pivot[j])
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
+
+    plt.clf()
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
\ No newline at end of file

From 2ee8e816a35ae3436a7dfaca3c3c58fc3a382a3c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c277.SUNet>
Date: Tue, 14 Nov 2017 10:14:53 -0800
Subject: [PATCH 372/617] test for approx fisher info

---
 selection/adjusted_MLE/tests/test_MLE.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 75290da3b..94939d813 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -130,11 +130,11 @@ def test_bias_lasso(nsim=500):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 100
+    ndraw = 200
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=10, s=1, signal=5.)
+        approx = test_lasso_approx_var(n=300, p=1, s=0, signal=5.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -144,10 +144,13 @@ def test_bias_lasso(nsim=500):
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
 
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
\ No newline at end of file
+        if i % 10 == 0:
+            plt.clf()
+            ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+            grid = np.linspace(0, 1, 101)
+            print("ecdf", ecdf(grid))
+            plt.plot(grid, ecdf(grid), c='red', marker='^')
+            plt.plot(grid, grid, 'k--')
+            #plt.show()
+            plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1.png")
+

From 6ef7070671061323dd414af0fc39a6e0f6c9d1f0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c277.SUNet>
Date: Tue, 14 Nov 2017 10:52:07 -0800
Subject: [PATCH 373/617] checked diff between approx and exact fisher info
 based sd-- simple problem

---
 .../adjusted_MLE/tests/test_simple_problem.py | 55 ++++++++++++-------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index aa6a07da1..b13bc7d33 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -3,7 +3,7 @@
 
 from scipy.stats import norm as ndist
 from selection.adjusted_MLE.selective_MLE import solve_UMVU
-from selection.adjusted_MLE.tests.exact_MLE import grad_CGF
+from selection.adjusted_MLE.tests.exact_MLE import grad_CGF, fisher_info
 from statsmodels.distributions.empirical_distribution import ECDF
 
 def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
@@ -87,6 +87,21 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
            np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample)), \
            np.true_divide(approx_MLE - np.sqrt(n)*true_mean, np.std(boot_sample))
 
+def check_approx_fisher_simple(true_mean, threshold=2, randomization_scale=1., nsim=200):
+    diff = 0.
+    for _ in range(nsim):
+        Z = sim_simple_problem(true_mean, threshold, randomization_scale)
+        approx = simple_problem(Z, threshold=threshold, randomization_scale=randomization_scale)
+        approx_std = np.sqrt(np.diag(approx[2]))
+
+        exact_std = 1./np.sqrt(fisher_info(approx[0], randomization_scale = 1., threshold = 2))
+        diff += np.abs(exact_std-approx_std)
+        print("difference", np.abs(exact_std-approx_std))
+
+    print(diff/float(nsim))
+
+check_approx_fisher_simple(true_mean=-1., threshold=2, randomization_scale=1., nsim=100)
+
 # if __name__ == "__main__":
 #     n = 1000
 #     Zval = np.random.normal(0, 1, n)
@@ -119,22 +134,22 @@ def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
 #     plt.legend()
 #     plt.show()
 
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 200
-    boot_pivot=[]
-    for i in range(ndraw):
-        boot_result = bootstrap_simple(n=300, B=5000, true_mean=0., threshold=2.)
-        boot_pivot.append(boot_result[4])
-
-        print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)
-        ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
-        grid = np.linspace(0, 1, 101)
-
-        if i % 10 == 0:
-            plt.clf()
-            print("ecdf", ecdf(grid))
-            plt.plot(grid, ecdf(grid), c='red', marker='^')
-            plt.plot([0,1],[0,1], 'k--')
-            plt.savefig('bootstrap_simple.png')
\ No newline at end of file
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 200
+#     boot_pivot=[]
+#     for i in range(ndraw):
+#         boot_result = bootstrap_simple(n=300, B=5000, true_mean=0., threshold=2.)
+#         boot_pivot.append(boot_result[4])
+#
+#         print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)
+#         ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
+#         grid = np.linspace(0, 1, 101)
+#
+#         if i % 10 == 0:
+#             plt.clf()
+#             print("ecdf", ecdf(grid))
+#             plt.plot(grid, ecdf(grid), c='red', marker='^')
+#             plt.plot([0,1],[0,1], 'k--')
+#             plt.savefig('bootstrap_simple.png')
\ No newline at end of file

From 3c6e141fb9a32a876546be74ffa37209db3a4d19 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c277.SUNet>
Date: Tue, 14 Nov 2017 11:04:36 -0800
Subject: [PATCH 374/617] checked pivot in simple example

---
 .../adjusted_MLE/tests/test_simple_problem.py | 42 ++++++++++++++++++-
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index b13bc7d33..7a19838c4 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -100,7 +100,24 @@ def check_approx_fisher_simple(true_mean, threshold=2, randomization_scale=1., n
 
     print(diff/float(nsim))
 
-check_approx_fisher_simple(true_mean=-1., threshold=2, randomization_scale=1., nsim=100)
+def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2):
+
+    resid_matrix = np.identity(n) - np.ones((n, n)) / n
+    U, D, V = np.linalg.svd(resid_matrix)
+    U = U[:, :-1]
+
+    while True:
+        target_Z, omega = np.random.standard_normal(2)
+        target_Z += true_mean * np.sqrt(n)
+        if target_Z + omega > threshold:
+            Zval = U.dot(np.random.standard_normal(n - 1))
+            Zval += target_Z * np.ones(n) / np.sqrt(n)
+            break
+
+    approx_MLE, value, var, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
+    return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var))
+
+#check_approx_fisher_simple(true_mean=-1., threshold=2, randomization_scale=1., nsim=100)
 
 # if __name__ == "__main__":
 #     n = 1000
@@ -152,4 +169,25 @@ def check_approx_fisher_simple(true_mean, threshold=2, randomization_scale=1., n
 #             print("ecdf", ecdf(grid))
 #             plt.plot(grid, ecdf(grid), c='red', marker='^')
 #             plt.plot([0,1],[0,1], 'k--')
-#             plt.savefig('bootstrap_simple.png')
\ No newline at end of file
+#             plt.savefig('bootstrap_simple.png')
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    ndraw = 200
+    pivot_obs_info=[]
+    for i in range(ndraw):
+        result = pivot_approx_fisher_simple(n=300, true_mean = -0.1, threshold=2)
+        pivot_obs_info.append(result)
+
+    print("here", np.asarray(pivot_obs_info))
+
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    grid = np.linspace(0, 1, 101)
+
+    plt.clf()
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot([0,1],[0,1], 'k--')
+    plt.show()
+    #plt.savefig('bootstrap_simple.png')
\ No newline at end of file

From 1bc5c50133834fac6d2cd70413c9c9645cae9dce Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c66e.SUNet>
Date: Wed, 15 Nov 2017 09:18:30 -0800
Subject: [PATCH 375/617] cleaned tests

---
 selection/adjusted_MLE/selective_MLE.py       |   6 +-
 .../tests/compare_lasso_simple.py             | 139 ++++++++++++++++++
 selection/adjusted_MLE/tests/test_MLE.py      |  89 +++++------
 .../adjusted_MLE/tests/test_simple_problem.py |  31 +++-
 selection/tests/instance.py                   |   2 +-
 5 files changed, 211 insertions(+), 56 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/compare_lasso_simple.py

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index bb7fa53bb..dcf9c5144 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -41,8 +41,8 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
         self.target_cov = self.score_cov[:self.nactive, :self.nactive]
 
     def solve_map(self):
-        self.feasible_point = np.abs(self.initial_soln[self._overall])
-
+        #self.feasible_point = np.abs(self.initial_soln[self._overall])
+        self.feasible_point = np.ones(self._overall.sum())
         self.A = np.dot(self._score_linear_term, self.score_target_cov).dot(np.linalg.inv(self.target_cov))
         self.data_offset = self._score_linear_term.dot(self.observed_score_state)- self.A.dot(self.target_observed)
         self.target_transform = (self.A, self.data_offset)
@@ -79,8 +79,6 @@ def solve_UMVU(target_transform,
     target_precision = np.linalg.inv(target_cov)
 
     implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
-
-    #print("shapes", A.shape, (A.T.dot(randomizer_precision).dot(A)).shape, target_precision.shape)
     implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
     implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
     implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
diff --git a/selection/adjusted_MLE/tests/compare_lasso_simple.py b/selection/adjusted_MLE/tests/compare_lasso_simple.py
new file mode 100644
index 000000000..9d789b342
--- /dev/null
+++ b/selection/adjusted_MLE/tests/compare_lasso_simple.py
@@ -0,0 +1,139 @@
+from __future__ import print_function
+import numpy as np, sys
+
+import regreg.api as rr
+from selection.tests.instance import gaussian_instance
+from scipy.stats import norm as ndist
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
+from statsmodels.distributions.empirical_distribution import ECDF
+
+def test_lasso_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization_scale=1.):
+
+    lam = 2.
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 0.
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
+
+        M_est.solve_map()
+        active = M_est._overall
+
+        nactive = np.sum(active)
+        if nactive > 0:
+            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+            approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                         M_est.opt_transform,
+                                                         M_est.target_observed,
+                                                         M_est.feasible_point,
+                                                         M_est.target_cov,
+                                                         M_est.randomizer_precision)
+
+            #print("check maps", M_est.opt_transform, M_est.target_transform, M_est.feasible_point, M_est.target_cov,
+            #      M_est.randomizer_precision, M_est.target_observed)
+
+            _ , opt_offset = M_est.opt_transform
+            target_observed = np.atleast_1d(M_est.target_observed)
+            target_transform = (-np.identity(1), np.zeros(1))
+            s = np.asscalar(np.sign(opt_offset))
+            opt_transform = (s * np.identity(1), np.ones(1) * (s * 2.))
+            feasible_point = np.ones(1)
+            randomizer_precision = np.identity(1) / randomization_scale ** 2
+            target_cov = np.identity(1)
+            approx_MLE_0, value_0, var_0, mle_map_0= solve_UMVU(target_transform,
+                                                                opt_transform,
+                                                                target_observed,
+                                                                feasible_point,
+                                                                target_cov,
+                                                                randomizer_precision)
+            break
+
+    return np.squeeze((approx_MLE - true_target)/float(np.sqrt(var))), (approx_MLE - true_target), \
+           np.squeeze((approx_MLE_0 - true_target)/float(np.sqrt(var_0))), (approx_MLE_0 - true_target)
+
+
+def test_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization_scale=1.):
+
+    lam = 2.
+    while True:
+        X = np.ones((n, p)) / float(np.sqrt(n))
+        n, p = X.shape
+        beta = signal
+        y = np.random.standard_normal(n)
+        y += (beta / np.sqrt(n))
+        omega = np.random.standard_normal(1)
+
+        true_target = beta * np.sqrt(n)
+        target_observed = y.sum()/float(np.sqrt(n))
+        if np.abs(target_observed + omega) > lam :
+
+            target_transform = (-np.identity(1), np.zeros(1))
+            s = np.asscalar(np.sign(target_observed + omega))
+            opt_transform = (s * np.identity(1), np.ones(1) * (s * 2.))
+            feasible_point = np.ones(1)
+            randomizer_precision = np.identity(1) / randomization_scale ** 2
+            target_cov = np.identity(1)
+            approx_MLE_0, value_0, var_0, mle_map_0= solve_UMVU(target_transform,
+                                                                opt_transform,
+                                                                target_observed,
+                                                                feasible_point,
+                                                                target_cov,
+                                                                randomizer_precision)
+            break
+
+    return np.squeeze((approx_MLE_0 - true_target)/float(np.sqrt(var_0))), (approx_MLE_0 - true_target)
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    ndraw = 400
+    pivot_lasso = []
+    pivot_simple = []
+    diff = 0.
+    for i in range(ndraw):
+        approx = test_lasso_approx_var(n=300, p=1, s=1, signal=-2.)
+        if approx is not None:
+            pivot_lasso.append(approx[0])
+            pivot_simple.append(approx[2])
+            diff += approx[0]-approx[2]
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+
+    sys.stderr.write("diff" + str(diff) + "\n")
+
+    #if i % 10 == 0:
+    plt.clf()
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_lasso)))
+    ecdf_0 = ECDF(ndist.cdf(np.asarray(pivot_simple)))
+    grid = np.linspace(0, 1, 101)
+    #print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot(grid, ecdf_0(grid), '-b')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_lasso_selective_MLE_lasso_p1_amp5.png")
+
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 200
+#     pivot_simple = []
+#     diff = 0.
+#     for i in range(ndraw):
+#         approx = test_approx_var(n=300, p=1, s=0, signal=0.)
+#         print("here")
+#         pivot_simple.append(approx[0])
+#         sys.stderr.write("iteration completed" + str(i) + "\n")
+#
+#     #if i % 10 == 0:
+#     plt.clf()
+#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_simple)))
+#     grid = np.linspace(0, 1, 101)
+#     plt.plot(grid, ecdf(grid), c='red', marker='^')
+#     plt.plot(grid, grid, 'k--')
+#     plt.show()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 94939d813..7d6c82309 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -10,11 +10,13 @@
 
 
 def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., randomization_scale=1.):
-    # np.random.seed(seed_n)
     X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
     n, p = X.shape
+    if p>1:
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+    else:
+        lam = 2.
 
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
     loss = rr.glm.gaussian(X, y)
     epsilon = 1. / np.sqrt(n)
     W = np.ones(p) * lam
@@ -57,42 +59,41 @@ def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., random
         return None
 
 def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=1.):
-    # np.random.seed(seed_n)
-    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
-    n, p = X.shape
 
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-    loss = rr.glm.gaussian(X, y)
-    epsilon = 1. / np.sqrt(n)
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+        n, p = X.shape
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
 
-    M_est.solve_map()
-    active = M_est._overall
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1./np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-    true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-    # true_target = beta[active]
-    nactive = np.sum(active)
-    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
-    if nactive > 0:
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
 
-        approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                     M_est.opt_transform,
-                                                     M_est.target_observed,
-                                                     M_est.feasible_point,
-                                                     M_est.target_cov,
-                                                     M_est.randomizer_precision)
+        M_est.solve_map()
+        active = M_est._overall
 
-        approx_std = np.sqrt(np.diag(var))
-        print("approx_std", approx_std)
-        return np.true_divide((approx_MLE - true_target), approx_std), ((approx_MLE - true_target).sum()) / float(nactive)
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        nactive = np.sum(active)
 
-    else:
-        return None
+        # sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
+        if nactive > 0:
+            #print("true target", true_target)
+            approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                         M_est.opt_transform,
+                                                         M_est.target_observed,
+                                                         M_est.feasible_point,
+                                                         M_est.target_cov,
+                                                         M_est.randomizer_precision)
+
+            break
+
+    return (approx_MLE - true_target)/np.sqrt(np.diag(var)), (approx_MLE - true_target).sum()/float(nactive)
 
 
 def test_bias_lasso(nsim=500):
@@ -130,11 +131,11 @@ def test_bias_lasso(nsim=500):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 200
+    ndraw = 500
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=1, s=0, signal=5.)
+        approx = test_lasso_approx_var(n=300, p=50, s=5, signal=5.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -142,15 +143,15 @@ def test_bias_lasso(nsim=500):
                 pivot_obs_info.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
-
-        if i % 10 == 0:
-            plt.clf()
-            ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-            grid = np.linspace(0, 1, 101)
-            print("ecdf", ecdf(grid))
-            plt.plot(grid, ecdf(grid), c='red', marker='^')
-            plt.plot(grid, grid, 'k--')
-            #plt.show()
-            plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1.png")
+        sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+
+    #if i % 10 == 0:
+    plt.clf()
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1_amp5.png")
 
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index 7a19838c4..9e988f889 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -109,13 +109,29 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2):
     while True:
         target_Z, omega = np.random.standard_normal(2)
         target_Z += true_mean * np.sqrt(n)
-        if target_Z + omega > threshold:
+        if np.abs(target_Z + omega) > threshold:
             Zval = U.dot(np.random.standard_normal(n - 1))
             Zval += target_Z * np.ones(n) / np.sqrt(n)
             break
 
-    approx_MLE, value, var, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
-    return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var))
+    n1 =1
+    target_observed = np.atleast_1d(target_Z)
+    target_transform = (-np.identity(n1), np.zeros(n1))
+    s = np.asscalar(np.sign(target_Z + omega))
+    opt_transform = (s*np.identity(n1), np.ones(n1) * (s*threshold))
+    feasible_point = np.ones(n1)
+    randomization_scale = 1.
+    randomizer_precision = np.identity(n1) / randomization_scale ** 2
+    target_cov = np.identity(n1)
+
+    approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
+                                                 opt_transform,
+                                                 target_observed,
+                                                 feasible_point,
+                                                 target_cov,
+                                                 randomizer_precision)
+
+    return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var)), approx_MLE - np.sqrt(n)*true_mean
 
 #check_approx_fisher_simple(true_mean=-1., threshold=2, randomization_scale=1., nsim=100)
 
@@ -176,18 +192,19 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2):
 
     ndraw = 200
     pivot_obs_info=[]
+    bias = 0.
     for i in range(ndraw):
         result = pivot_approx_fisher_simple(n=300, true_mean = -0.1, threshold=2)
-        pivot_obs_info.append(result)
+        pivot_obs_info.append(result[0])
+        bias += result[1]
 
-    print("here", np.asarray(pivot_obs_info))
+    sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
 
     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
     grid = np.linspace(0, 1, 101)
 
     plt.clf()
-    print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot([0,1],[0,1], 'k--')
     plt.show()
-    #plt.savefig('bootstrap_simple.png')
\ No newline at end of file
+    #plt.savefig('/Users/snigdhapanigrahi/Desktop/signed_approx_info_simple_amp_neg1.png')
\ No newline at end of file
diff --git a/selection/tests/instance.py b/selection/tests/instance.py
index 34487d697..d502b7ab2 100644
--- a/selection/tests/instance.py
+++ b/selection/tests/instance.py
@@ -20,7 +20,7 @@ def AR1(rho, p):
         X = np.random.standard_normal((n, p)).dot(cholX.T)
     return X
 
-def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0.3, signal=7,
+def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7,
                       random_signs=False, df=np.inf,
                       scale=True, center=True,
                       equicorrelated=True):

From d0458196ab1049f9ed712320879b5158eb6a9f06 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c66e.SUNet>
Date: Wed, 15 Nov 2017 11:32:44 -0800
Subject: [PATCH 376/617] added orthogonal LASSO

---
 selection/adjusted_MLE/tests/test_MLE.py | 85 ++++++++++++++++++++++--
 1 file changed, 80 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 7d6c82309..6ba1ec726 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -81,9 +81,7 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
         true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
         nactive = np.sum(active)
 
-        # sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
         if nactive > 0:
-            #print("true target", true_target)
             approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
                                                          M_est.opt_transform,
                                                          M_est.target_observed,
@@ -91,10 +89,60 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
                                                          M_est.target_cov,
                                                          M_est.randomizer_precision)
 
+            print("approx_MLE", approx_MLE)
             break
 
+
+
     return (approx_MLE - true_target)/np.sqrt(np.diag(var)), (approx_MLE - true_target).sum()/float(nactive)
 
+def orthogonal_lasso_approx(n=100, p=5, s=1, signal=0., lam_frac=1., randomization_scale=1.):
+
+    while True:
+        beta = np.zeros(p)
+
+        signal = np.atleast_1d(signal)
+        if signal.shape == (1,):
+            beta[:s] = signal[0]
+        else:
+            beta[:s] = np.linspace(signal[0], signal[1], s)
+
+        X = np.identity(n)[:,:p]
+        X -= X.mean(0)[None, :]
+        X /= (X.std(0)[None, :] * np.sqrt(n))
+        sigma = 1.
+        y = (X.dot(beta) + sigma* np.random.standard_normal(n))
+
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1. / np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
+
+        M_est.solve_map()
+        active = M_est._overall
+
+        nactive = np.sum(active)
+
+        if nactive > 0:
+            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+            print("true_target", true_target)
+            approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                         M_est.opt_transform,
+                                                         M_est.target_observed,
+                                                         M_est.feasible_point,
+                                                         M_est.target_cov,
+                                                         M_est.randomizer_precision)
+            print("approx sd", np.sqrt(np.diag(var)), approx_MLE)
+            break
+
+    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
+
 
 def test_bias_lasso(nsim=500):
     bias = 0
@@ -128,6 +176,33 @@ def test_bias_lasso(nsim=500):
 #             plt.plot(grid, ecdf(grid), c='red', marker='^')
 #             plt.plot(grid, grid, 'k--')
 
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 500
+#     bias = 0.
+#     pivot_obs_info= []
+#     for i in range(ndraw):
+#         approx = test_lasso_approx_var(n=300, p=50, s=5, signal=0.)
+#         if approx is not None:
+#             pivot = approx[0]
+#             bias += approx[1]
+#             for j in range(pivot.shape[0]):
+#                 pivot_obs_info.append(pivot[j])
+#
+#         sys.stderr.write("iteration completed" + str(i) + "\n")
+#         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+#
+#     #if i % 10 == 0:
+#     plt.clf()
+#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+#     grid = np.linspace(0, 1, 101)
+#     print("ecdf", ecdf(grid))
+#     plt.plot(grid, ecdf(grid), c='red', marker='^')
+#     plt.plot(grid, grid, 'k--')
+#     plt.show()
+#     plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1_amp5.png")
+
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
@@ -135,7 +210,7 @@ def test_bias_lasso(nsim=500):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=50, s=5, signal=5.)
+        approx = orthogonal_lasso_approx(n=300, p=5, s=3, signal=7.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -152,6 +227,6 @@ def test_bias_lasso(nsim=500):
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1_amp5.png")
+    #plt.show()
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp7.png")
 

From 7e4eebe28479065557a0fd0164a15e692b3844d5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c66e.SUNet>
Date: Wed, 15 Nov 2017 11:48:29 -0800
Subject: [PATCH 377/617] orthogonal LASSO

---
 selection/adjusted_MLE/tests/test_MLE.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 6ba1ec726..ee75d79de 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -108,8 +108,6 @@ def orthogonal_lasso_approx(n=100, p=5, s=1, signal=0., lam_frac=1., randomizati
             beta[:s] = np.linspace(signal[0], signal[1], s)
 
         X = np.identity(n)[:,:p]
-        X -= X.mean(0)[None, :]
-        X /= (X.std(0)[None, :] * np.sqrt(n))
         sigma = 1.
         y = (X.dot(beta) + sigma* np.random.standard_normal(n))
 
@@ -210,7 +208,7 @@ def test_bias_lasso(nsim=500):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = orthogonal_lasso_approx(n=300, p=5, s=3, signal=7.)
+        approx = orthogonal_lasso_approx(n=300, p=5, s=3, signal=5.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -228,5 +226,5 @@ def test_bias_lasso(nsim=500):
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
     #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp7.png")
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
 

From 0ce36cbd175e61f3dc114c8cdd30ce3abe3dd17b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 15 Nov 2017 14:56:24 -0800
Subject: [PATCH 378/617] commit changes

---
 .../tests/compare_lasso_simple.py             | 12 ++++++++----
 selection/adjusted_MLE/tests/test_MLE.py      | 19 ++++++++++---------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_lasso_simple.py b/selection/adjusted_MLE/tests/compare_lasso_simple.py
index 9d789b342..9e2727176 100644
--- a/selection/adjusted_MLE/tests/compare_lasso_simple.py
+++ b/selection/adjusted_MLE/tests/compare_lasso_simple.py
@@ -28,6 +28,7 @@ def test_lasso_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization
         nactive = np.sum(active)
         if nactive > 0:
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+            print("true target", true_target)
             approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
                                                          M_est.opt_transform,
                                                          M_est.target_observed,
@@ -35,6 +36,7 @@ def test_lasso_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization
                                                          M_est.target_cov,
                                                          M_est.randomizer_precision)
 
+            print("approx_MLE", approx_MLE)
             #print("check maps", M_est.opt_transform, M_est.target_transform, M_est.feasible_point, M_est.target_cov,
             #      M_est.randomizer_precision, M_est.target_observed)
 
@@ -96,15 +98,17 @@ def test_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization_scale
     pivot_lasso = []
     pivot_simple = []
     diff = 0.
+    bias = 0.
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=1, s=1, signal=-2.)
+        approx = test_lasso_approx_var(n=300, p=1, s=1, signal=5.)
         if approx is not None:
             pivot_lasso.append(approx[0])
             pivot_simple.append(approx[2])
-            diff += approx[0]-approx[2]
+            bias += approx[1]
+            #diff += approx[0]-approx[2]
         sys.stderr.write("iteration completed" + str(i) + "\n")
-
-    sys.stderr.write("diff" + str(diff) + "\n")
+        sys.stderr.write("bias" + str(bias/float(i)) + "\n")
+    #sys.stderr.write("diff" + str(diff) + "\n")
 
     #if i % 10 == 0:
     plt.clf()
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index ee75d79de..875ce7f0c 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -96,7 +96,7 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
 
     return (approx_MLE - true_target)/np.sqrt(np.diag(var)), (approx_MLE - true_target).sum()/float(nactive)
 
-def orthogonal_lasso_approx(n=100, p=5, s=1, signal=0., lam_frac=1., randomization_scale=1.):
+def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomization_scale=1.):
 
     while True:
         beta = np.zeros(p)
@@ -111,10 +111,11 @@ def orthogonal_lasso_approx(n=100, p=5, s=1, signal=0., lam_frac=1., randomizati
         sigma = 1.
         y = (X.dot(beta) + sigma* np.random.standard_normal(n))
 
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
+        #lam = 2.
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+        print("lam", lam)
         loss = rr.glm.gaussian(X, y)
-        epsilon = 1. / np.sqrt(n)
+        epsilon = 0.
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
@@ -127,7 +128,7 @@ def orthogonal_lasso_approx(n=100, p=5, s=1, signal=0., lam_frac=1., randomizati
 
         nactive = np.sum(active)
 
-        if nactive > 0:
+        if nactive >0:
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
             print("true_target", true_target)
             approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
@@ -212,19 +213,19 @@ def test_bias_lasso(nsim=500):
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
+            print("bias in iteration", approx[1])
             for j in range(pivot.shape[0]):
                 pivot_obs_info.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
-
-    #if i % 10 == 0:
+    print("pivot", np.asarray(pivot_obs_info))
     plt.clf()
     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
     grid = np.linspace(0, 1, 101)
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
-    #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
 

From 0b7dcf6c5c06c72ed820d7bc08f719fb3de70338 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 15 Nov 2017 17:08:04 -0800
Subject: [PATCH 379/617] simple problem not unbiased with ridge

---
 selection/adjusted_MLE/tests/compare_lasso_simple.py | 6 +++---
 selection/adjusted_MLE/tests/test_MLE.py             | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_lasso_simple.py b/selection/adjusted_MLE/tests/compare_lasso_simple.py
index 9e2727176..d5b7619cc 100644
--- a/selection/adjusted_MLE/tests/compare_lasso_simple.py
+++ b/selection/adjusted_MLE/tests/compare_lasso_simple.py
@@ -14,7 +14,7 @@ def test_lasso_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
         loss = rr.glm.gaussian(X, y)
-        epsilon = 0.
+        epsilon = 1./np.sqrt(n)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
@@ -44,7 +44,7 @@ def test_lasso_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization
             target_observed = np.atleast_1d(M_est.target_observed)
             target_transform = (-np.identity(1), np.zeros(1))
             s = np.asscalar(np.sign(opt_offset))
-            opt_transform = (s * np.identity(1), np.ones(1) * (s * 2.))
+            opt_transform = (s * (np.identity(1)+epsilon), np.ones(1) * (s * 2.))
             feasible_point = np.ones(1)
             randomizer_precision = np.identity(1) / randomization_scale ** 2
             target_cov = np.identity(1)
@@ -100,7 +100,7 @@ def test_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization_scale
     diff = 0.
     bias = 0.
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=1, s=1, signal=5.)
+        approx = test_lasso_approx_var(n=300, p=1, s=1, signal=-1.)
         if approx is not None:
             pivot_lasso.append(approx[0])
             pivot_simple.append(approx[2])
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 875ce7f0c..030820606 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -113,7 +113,6 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
 
         #lam = 2.
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        print("lam", lam)
         loss = rr.glm.gaussian(X, y)
         epsilon = 0.
         W = np.ones(p) * lam
@@ -209,7 +208,7 @@ def test_bias_lasso(nsim=500):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = orthogonal_lasso_approx(n=300, p=5, s=3, signal=5.)
+        approx = orthogonal_lasso_approx(n=300, p=5, s=5, signal=0.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]

From 268d2891328714c2d31df41399ceb68e552bb8b8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 15 Nov 2017 20:10:42 -0800
Subject: [PATCH 380/617] commit changes

---
 selection/adjusted_MLE/selective_MLE.py       |  2 +-
 .../adjusted_MLE/tests/test_simple_problem.py | 31 +++++++++----------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index dcf9c5144..1edc050c3 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -127,7 +127,7 @@ def solve_barrier_nonneg(conjugate_arg,
                          precision,
                          feasible_point=None,
                          step=1,
-                         nstep=30,
+                         nstep=100,
                          tol=1.e-8):
 
     scaling = np.sqrt(np.diag(precision))
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index 9e988f889..ca6fd6761 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -6,13 +6,13 @@
 from selection.adjusted_MLE.tests.exact_MLE import grad_CGF, fisher_info
 from statsmodels.distributions.empirical_distribution import ECDF
 
-def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
+def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1., epsilon = 0.05):
     """
     Simple problem: randomizaiton of sd 1 and thresholded at 2 (default args)
     """
     target_observed = np.atleast_1d(target_observed)
     target_transform = (-np.identity(n), np.zeros(n))
-    opt_transform = (np.identity(n), np.ones(n) * threshold)
+    opt_transform = (np.identity(n)+ epsilon, np.ones(n) * threshold)
     feasible_point = np.ones(n)
     randomizer_precision = np.identity(n) / randomization_scale ** 2
     target_cov = np.identity(n)
@@ -25,16 +25,16 @@ def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1.):
                       randomizer_precision)
 
 
-def sim_simple_problem(true_mean, threshold=2, randomization_scale=1.):
+def sim_simple_problem(true_mean, threshold=2, randomization_scale=1., epsilon = 0.05):
     while True:
         Z, W = np.random.standard_normal(2)
         Z += true_mean
         W *= randomization_scale
-        if Z + W > threshold:
+        if ((Z + W) - threshold)/(1.+epsilon)>0.:
             return Z
 
 
-def check_unbiased(true_mean, threshold=2, randomization_scale=1., nsim=5000):
+def check_unbiased(true_mean, threshold=2, randomization_scale=1., nsim=5000, epsilon = 0.05):
     bias = 0
     for _ in range(nsim):
         Z = sim_simple_problem(true_mean, threshold, randomization_scale)
@@ -43,6 +43,7 @@ def check_unbiased(true_mean, threshold=2, randomization_scale=1., nsim=5000):
 
     return bias / nsim
 
+#print(check_unbiased(-1., threshold=2, randomization_scale=1., nsim=5000, epsilon = 0.05))
 
 def test_orthogonal_lasso(n=5):
     Zval = np.random.normal(0, 1, n)
@@ -100,25 +101,19 @@ def check_approx_fisher_simple(true_mean, threshold=2, randomization_scale=1., n
 
     print(diff/float(nsim))
 
-def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2):
-
-    resid_matrix = np.identity(n) - np.ones((n, n)) / n
-    U, D, V = np.linalg.svd(resid_matrix)
-    U = U[:, :-1]
+def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2):
 
     while True:
         target_Z, omega = np.random.standard_normal(2)
         target_Z += true_mean * np.sqrt(n)
-        if np.abs(target_Z + omega) > threshold:
-            Zval = U.dot(np.random.standard_normal(n - 1))
-            Zval += target_Z * np.ones(n) / np.sqrt(n)
+        if ((target_Z + omega) - threshold)/(1.+epsilon)>0.:
             break
 
     n1 =1
     target_observed = np.atleast_1d(target_Z)
     target_transform = (-np.identity(n1), np.zeros(n1))
-    s = np.asscalar(np.sign(target_Z + omega))
-    opt_transform = (s*np.identity(n1), np.ones(n1) * (s*threshold))
+    #s = np.asscalar(np.sign(target_Z + omega))
+    opt_transform = ((np.identity(n1)+epsilon), np.ones(n1) * (threshold))
     feasible_point = np.ones(n1)
     randomization_scale = 1.
     randomizer_precision = np.identity(n1) / randomization_scale ** 2
@@ -131,6 +126,7 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2):
                                                  target_cov,
                                                  randomizer_precision)
 
+    print("approx MLE", approx_MLE, np.sqrt(n)*true_mean)
     return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var)), approx_MLE - np.sqrt(n)*true_mean
 
 #check_approx_fisher_simple(true_mean=-1., threshold=2, randomization_scale=1., nsim=100)
@@ -194,9 +190,10 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2):
     pivot_obs_info=[]
     bias = 0.
     for i in range(ndraw):
-        result = pivot_approx_fisher_simple(n=300, true_mean = -0.1, threshold=2)
+        result = pivot_approx_fisher_simple(n=300, true_mean = 0.3, threshold=2)
         pivot_obs_info.append(result[0])
         bias += result[1]
+        sys.stderr.write("bias" + str(bias / float(i)) + "\n")
 
     sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
 
@@ -207,4 +204,4 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2):
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot([0,1],[0,1], 'k--')
     plt.show()
-    #plt.savefig('/Users/snigdhapanigrahi/Desktop/signed_approx_info_simple_amp_neg1.png')
\ No newline at end of file
+#     #plt.savefig('/Users/snigdhapanigrahi/Desktop/signed_approx_info_simple_amp_neg1.png')
\ No newline at end of file

From ed72b133f59f294d5e42993ce10423d09c05cd1a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c254.SUNet>
Date: Thu, 16 Nov 2017 14:31:12 -0800
Subject: [PATCH 381/617] fixed bug in conditional mean

---
 selection/adjusted_MLE/selective_MLE.py       |  6 ++++-
 .../adjusted_MLE/tests/test_simple_problem.py | 27 +++++++++++++++++--
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 1edc050c3..2ed7b8112 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -93,15 +93,19 @@ def solve_UMVU(target_transform,
     M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
     M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
+    print("check matrices", M_1, M_2, L, data_offset, opt_offset)
+
     conditioned_value = data_offset + opt_offset
 
-    linear_term = implied_cross.T.dot(np.linalg.inv(implied_target))
+    linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
     offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
     natparam_transform = (linear_term, offset_term)
     conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
 
     conditional_precision = implied_precision[ntarget:,ntarget:]
 
+    print("check conditional parameters", conditional_natural_parameter-(1.2*target_observed)+2.4, conditional_precision)
+
     soln, value, hess = solve_barrier_nonneg(conditional_natural_parameter,
                                              conditional_precision,
                                              feasible_point=feasible_point)
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index ca6fd6761..24fd6128c 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -129,7 +129,30 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     print("approx MLE", approx_MLE, np.sqrt(n)*true_mean)
     return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var)), approx_MLE - np.sqrt(n)*true_mean
 
-#check_approx_fisher_simple(true_mean=-1., threshold=2, randomization_scale=1., nsim=100)
+def test_matrices_simple(true_mean = 0., threshold=2, epsilon = 0.2):
+
+    while True:
+        target_Z, omega = np.random.standard_normal(2)
+        target_Z += true_mean
+        if ((target_Z + omega) - threshold)>0.:
+            break
+
+    target_observed = np.atleast_1d(target_Z)
+    target_transform = (-np.identity(1), np.zeros(1))
+    opt_transform = ((np.identity(1) + epsilon), np.ones(1) * (threshold))
+    feasible_point = np.ones(1)
+    randomization_scale = 1.
+    randomizer_precision = np.identity(1) / randomization_scale ** 2.
+    target_cov = np.identity(1)
+
+    approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
+                                                 opt_transform,
+                                                 target_observed,
+                                                 feasible_point,
+                                                 target_cov,
+                                                 randomizer_precision)
+
+#test_matrices_simple(true_mean=2., threshold=2, epsilon=0.2)
 
 # if __name__ == "__main__":
 #     n = 1000
@@ -190,7 +213,7 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     pivot_obs_info=[]
     bias = 0.
     for i in range(ndraw):
-        result = pivot_approx_fisher_simple(n=300, true_mean = 0.3, threshold=2)
+        result = pivot_approx_fisher_simple(n=300, true_mean = 0.2, threshold=2)
         pivot_obs_info.append(result[0])
         bias += result[1]
         sys.stderr.write("bias" + str(bias / float(i)) + "\n")

From 273172b6c4832bd19f6ce99e652df8ceb080e876 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c254.SUNet>
Date: Thu, 16 Nov 2017 15:27:05 -0800
Subject: [PATCH 382/617] commit all changes

---
 selection/adjusted_MLE/selective_MLE.py             | 4 ++--
 selection/adjusted_MLE/tests/test_MLE.py            | 4 ++--
 selection/adjusted_MLE/tests/test_simple_problem.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 2ed7b8112..2dac09e8d 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -93,7 +93,7 @@ def solve_UMVU(target_transform,
     M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
     M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
-    print("check matrices", M_1, M_2, L, data_offset, opt_offset)
+    #print("check matrices", M_1, M_2, L, data_offset, opt_offset)
 
     conditioned_value = data_offset + opt_offset
 
@@ -104,7 +104,7 @@ def solve_UMVU(target_transform,
 
     conditional_precision = implied_precision[ntarget:,ntarget:]
 
-    print("check conditional parameters", conditional_natural_parameter-(1.2*target_observed)+2.4, conditional_precision)
+    #print("check conditional parameters", conditional_natural_parameter-(1.2*target_observed)+2.4, conditional_precision)
 
     soln, value, hess = solve_barrier_nonneg(conditional_natural_parameter,
                                              conditional_precision,
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 030820606..632c4a000 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -114,7 +114,7 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
         #lam = 2.
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
         loss = rr.glm.gaussian(X, y)
-        epsilon = 0.
+        epsilon = 1./np.sqrt(n)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
@@ -208,7 +208,7 @@ def test_bias_lasso(nsim=500):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = orthogonal_lasso_approx(n=300, p=5, s=5, signal=0.)
+        approx = orthogonal_lasso_approx(n=300, p=5, s=5, signal=5.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index 24fd6128c..3228ec10a 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -213,7 +213,7 @@ def test_matrices_simple(true_mean = 0., threshold=2, epsilon = 0.2):
     pivot_obs_info=[]
     bias = 0.
     for i in range(ndraw):
-        result = pivot_approx_fisher_simple(n=300, true_mean = 0.2, threshold=2)
+        result = pivot_approx_fisher_simple(n=300, true_mean = -0.1, threshold=2)
         pivot_obs_info.append(result[0])
         bias += result[1]
         sys.stderr.write("bias" + str(bias / float(i)) + "\n")

From 65569232ff74212b2e98dd148f9c4e19ae504064 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 16 Nov 2017 15:32:52 -0800
Subject: [PATCH 383/617] setup for C code for umvu

---
 setup.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/setup.py b/setup.py
index 4b4a4cc53..4ea768a38 100755
--- a/setup.py
+++ b/setup.py
@@ -58,6 +58,12 @@
                       libraries=['m'],
                       include_dirs=['C-software/src']))
 
+EXTS.append(Extension('selection.randomized.selective_MLE_utils',
+                      ['selection/randomized/selective_MLE_utils.pyx',
+                       'C-software/src/randomized_lasso.c'],
+                      libraries=['m'],
+                      include_dirs=['C-software/src']))
+
 # Cython is a dependency for building extensions, iff we don't have stamped
 # up pyx and c files.
 build_ext, need_cython = cyproc_exts(EXTS,

From da4134e2efb6a7d9b09c75835429f4b2714209c4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 16 Nov 2017 15:55:08 -0800
Subject: [PATCH 384/617] changed test a little

---
 selection/adjusted_MLE/tests/test_MLE.py | 27 ++++++++++++------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 632c4a000..b5b98dc43 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -103,18 +103,18 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
 
         signal = np.atleast_1d(signal)
         if signal.shape == (1,):
-            beta[:s] = signal[0]
+            beta[:s] = signal[0] * (1 + np.fabs(np.random.standard_normal(s)))
         else:
             beta[:s] = np.linspace(signal[0], signal[1], s)
 
-        X = np.identity(n)[:,:p]
+        X = np.linalg.svd(np.random.standard_normal((n,p)))[0][:,:p]
+
         sigma = 1.
-        y = (X.dot(beta) + sigma* np.random.standard_normal(n))
+        y = sigma * (X.dot(beta) + np.random.standard_normal(n))
 
-        #lam = 2.
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+        lam = sigma * lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
         loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
+        epsilon = sigma / np.sqrt(n)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
@@ -126,7 +126,7 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
         active = M_est._overall
 
         nactive = np.sum(active)
-
+        print('nactive', nactive)
         if nactive >0:
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
             print("true_target", true_target)
@@ -142,10 +142,10 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
     return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
 
 
-def test_bias_lasso(nsim=500):
+def test_bias_lasso(nsim=2000):
     bias = 0
     for _ in range(nsim):
-        bias += test_lasso(n=100, p=50, s=5, signal=5., seed_n=0, lam_frac=1., randomization_scale=1.)[0]
+        bias += test_lasso(n=100, p=50, s=5, signal=2.5, seed_n=0, lam_frac=1., randomization_scale=1.)[0]
 
     print(bias / nsim)
 
@@ -204,21 +204,20 @@ def test_bias_lasso(nsim=500):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 500
+    ndraw = 1000
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = orthogonal_lasso_approx(n=300, p=5, s=5, signal=5.)
+        approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=2.8, lam_frac=0.8)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
             print("bias in iteration", approx[1])
-            for j in range(pivot.shape[0]):
-                pivot_obs_info.append(pivot[j])
+            pivot_obs_info.extend(pivot)
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
-    print("pivot", np.asarray(pivot_obs_info))
+
     plt.clf()
     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
     grid = np.linspace(0, 1, 101)

From 52e073d92aefd270912deb678372f7604d0bf6b7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c254.SUNet>
Date: Thu, 16 Nov 2017 16:18:21 -0800
Subject: [PATCH 385/617] commit changes

---
 selection/adjusted_MLE/tests/test_MLE.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 632c4a000..bf2922acb 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -226,5 +226,4 @@ def test_bias_lasso(nsim=500):
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
     plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
-
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
\ No newline at end of file

From 6d87c8f241e1fd68aa6fc03191d20d506ff5a12c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j95.SUNet>
Date: Fri, 17 Nov 2017 09:20:52 -0800
Subject: [PATCH 386/617] commit test for non-orthogonal LASSO

---
 selection/adjusted_MLE/tests/test_MLE.py | 64 ++++++++++++------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 990fdb2ca..563172541 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -174,24 +174,50 @@ def test_bias_lasso(nsim=2000):
 #             plt.plot(grid, ecdf(grid), c='red', marker='^')
 #             plt.plot(grid, grid, 'k--')
 
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    ndraw = 500
+    bias = 0.
+    pivot_obs_info= []
+    for i in range(ndraw):
+        approx = test_lasso_approx_var(n=300, p=200, s=5, signal=3.)
+        if approx is not None:
+            pivot = approx[0]
+            bias += approx[1]
+            for j in range(pivot.shape[0]):
+                pivot_obs_info.append(pivot[j])
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+
+    #if i % 10 == 0:
+    plt.clf()
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot(grid, grid, 'k--')
+    #plt.show()
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p200_n300_amp_3.png")
+
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt
 #
-#     ndraw = 500
+#     ndraw = 1000
 #     bias = 0.
 #     pivot_obs_info= []
 #     for i in range(ndraw):
-#         approx = test_lasso_approx_var(n=300, p=50, s=5, signal=0.)
+#         approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=2.8, lam_frac=0.8)
 #         if approx is not None:
 #             pivot = approx[0]
 #             bias += approx[1]
-#             for j in range(pivot.shape[0]):
-#                 pivot_obs_info.append(pivot[j])
+#             print("bias in iteration", approx[1])
+#             pivot_obs_info.extend(pivot)
 #
 #         sys.stderr.write("iteration completed" + str(i) + "\n")
 #         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
 #
-#     #if i % 10 == 0:
 #     plt.clf()
 #     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
 #     grid = np.linspace(0, 1, 101)
@@ -199,30 +225,4 @@ def test_bias_lasso(nsim=2000):
 #     plt.plot(grid, ecdf(grid), c='red', marker='^')
 #     plt.plot(grid, grid, 'k--')
 #     plt.show()
-#     plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1_amp5.png")
-
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 1000
-    bias = 0.
-    pivot_obs_info= []
-    for i in range(ndraw):
-        approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=2.8, lam_frac=0.8)
-        if approx is not None:
-            pivot = approx[0]
-            bias += approx[1]
-            print("bias in iteration", approx[1])
-            pivot_obs_info.extend(pivot)
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
-
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
\ No newline at end of file
+#     #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
\ No newline at end of file

From 32cabc3d9f8a5b13eb6cf43c6e089807dc09d679 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN525j95.SUNet>
Date: Fri, 17 Nov 2017 11:26:24 -0800
Subject: [PATCH 387/617] added test for comparison of variances

---
 selection/adjusted_MLE/tests/approx_MLE.py    |  2 +-
 selection/adjusted_MLE/tests/test_MLE.py      |  8 ++---
 .../adjusted_MLE/tests/test_simple_problem.py | 36 +++++--------------
 3 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/selection/adjusted_MLE/tests/approx_MLE.py b/selection/adjusted_MLE/tests/approx_MLE.py
index 078866c8c..fc86317f9 100644
--- a/selection/adjusted_MLE/tests/approx_MLE.py
+++ b/selection/adjusted_MLE/tests/approx_MLE.py
@@ -71,7 +71,7 @@ def approx_fisher_info(mu, randomization_scale=0.5, threshold=2):
 
     variance = 1 + randomization_scale ** 2.
     minimizer = approx_grad_cgf(mu)[2]
-    return (1./ variance**2.)* (1./((1./variance) + grad_log_hessian(minimizer, variance)))+ ((randomization_scale ** 2.)/variance)
+    return (1./ variance**2.)* (1./((1./variance) + grad_log_hessian(minimizer, randomization_scale**2.)))+ ((randomization_scale ** 2.)/variance)
 
 def simulate_truncated(mu, randomization_scale = 0.5, threshold = 2):
     while True:
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 563172541..1030d2170 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -89,7 +89,7 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
                                                          M_est.target_cov,
                                                          M_est.randomizer_precision)
 
-            print("approx_MLE", approx_MLE)
+            print("approx_MLE and sd", approx_MLE, np.sqrt(np.diag(var)))
             break
 
 
@@ -181,7 +181,7 @@ def test_bias_lasso(nsim=2000):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=200, s=5, signal=3.)
+        approx = test_lasso_approx_var(n=300, p=50, s=5, signal=3.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -198,8 +198,8 @@ def test_bias_lasso(nsim=2000):
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
-    #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p200_n300_amp_3.png")
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p200_n300_amp_3.png")
 
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index 3228ec10a..f69d6eb84 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -5,6 +5,7 @@
 from selection.adjusted_MLE.selective_MLE import solve_UMVU
 from selection.adjusted_MLE.tests.exact_MLE import grad_CGF, fisher_info
 from statsmodels.distributions.empirical_distribution import ECDF
+from selection.adjusted_MLE.tests.approx_MLE import approx_fisher_info
 
 def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1., epsilon = 0.05):
     """
@@ -118,6 +119,7 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     randomization_scale = 1.
     randomizer_precision = np.identity(n1) / randomization_scale ** 2
     target_cov = np.identity(n1)
+    simple_var = 1./approx_fisher_info(np.sqrt(n)*true_mean, randomization_scale=1., threshold=2)
 
     approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
                                                  opt_transform,
@@ -126,31 +128,11 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
                                                  target_cov,
                                                  randomizer_precision)
 
-    print("approx MLE", approx_MLE, np.sqrt(n)*true_mean)
-    return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var)), approx_MLE - np.sqrt(n)*true_mean
+    print("approx MLE", approx_MLE, np.sqrt(n)*true_mean, var)
+    print("diff", simple_var- var)
+    return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var)), approx_MLE - np.sqrt(n)*true_mean, \
+           np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(simple_var))
 
-def test_matrices_simple(true_mean = 0., threshold=2, epsilon = 0.2):
-
-    while True:
-        target_Z, omega = np.random.standard_normal(2)
-        target_Z += true_mean
-        if ((target_Z + omega) - threshold)>0.:
-            break
-
-    target_observed = np.atleast_1d(target_Z)
-    target_transform = (-np.identity(1), np.zeros(1))
-    opt_transform = ((np.identity(1) + epsilon), np.ones(1) * (threshold))
-    feasible_point = np.ones(1)
-    randomization_scale = 1.
-    randomizer_precision = np.identity(1) / randomization_scale ** 2.
-    target_cov = np.identity(1)
-
-    approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
-                                                 opt_transform,
-                                                 target_observed,
-                                                 feasible_point,
-                                                 target_cov,
-                                                 randomizer_precision)
 
 #test_matrices_simple(true_mean=2., threshold=2, epsilon=0.2)
 
@@ -209,12 +191,12 @@ def test_matrices_simple(true_mean = 0., threshold=2, epsilon = 0.2):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 200
+    ndraw = 500
     pivot_obs_info=[]
     bias = 0.
     for i in range(ndraw):
-        result = pivot_approx_fisher_simple(n=300, true_mean = -0.1, threshold=2)
-        pivot_obs_info.append(result[0])
+        result = pivot_approx_fisher_simple(n=300, true_mean = -0.2, threshold=2)
+        pivot_obs_info.append(result[2])
         bias += result[1]
         sys.stderr.write("bias" + str(bias / float(i)) + "\n")
 

From 6be3e95868c3a8b39bf11d9da37b6a32b4cd6b2a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Fri, 17 Nov 2017 22:23:19 -0800
Subject: [PATCH 388/617] commit changes

---
 selection/adjusted_MLE/tests/test_MLE.py            |  2 +-
 selection/adjusted_MLE/tests/test_simple_problem.py | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 1030d2170..ec0c1c790 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -181,7 +181,7 @@ def test_bias_lasso(nsim=2000):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=50, s=5, signal=3.)
+        approx = test_lasso_approx_var(n=300, p=200, s=5, signal=3.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index f69d6eb84..e5dfc34a1 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -119,7 +119,7 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     randomization_scale = 1.
     randomizer_precision = np.identity(n1) / randomization_scale ** 2
     target_cov = np.identity(n1)
-    simple_var = 1./approx_fisher_info(np.sqrt(n)*true_mean, randomization_scale=1., threshold=2)
+    simple_var = 1./approx_fisher_info(target_observed, randomization_scale=1., threshold=2)
 
     approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
                                                  opt_transform,
@@ -131,7 +131,7 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     print("approx MLE", approx_MLE, np.sqrt(n)*true_mean, var)
     print("diff", simple_var- var)
     return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var)), approx_MLE - np.sqrt(n)*true_mean, \
-           np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(simple_var))
+           np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(simple_var)), simple_var- var
 
 
 #test_matrices_simple(true_mean=2., threshold=2, epsilon=0.2)
@@ -194,13 +194,16 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     ndraw = 500
     pivot_obs_info=[]
     bias = 0.
+    diff = 0.
     for i in range(ndraw):
-        result = pivot_approx_fisher_simple(n=300, true_mean = -0.2, threshold=2)
+        result = pivot_approx_fisher_simple(n=300, true_mean = -0.3, threshold=2)
         pivot_obs_info.append(result[2])
+        diff += result[3]
         bias += result[1]
         sys.stderr.write("bias" + str(bias / float(i)) + "\n")
 
     sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
+    sys.stderr.write("difference between variances" + str(diff / float(ndraw)) + "\n")
 
     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
     grid = np.linspace(0, 1, 101)
@@ -209,4 +212,4 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot([0,1],[0,1], 'k--')
     plt.show()
-#     #plt.savefig('/Users/snigdhapanigrahi/Desktop/signed_approx_info_simple_amp_neg1.png')
\ No newline at end of file
+#   #plt.savefig('/Users/snigdhapanigrahi/Desktop/signed_approx_info_simple_amp_neg1.png')
\ No newline at end of file

From 509c98bda2877f6a7c79f1c3d2f31d2ebeb536ba Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 18 Nov 2017 18:00:44 -0800
Subject: [PATCH 389/617] new computation for observed Fisher info

---
 selection/adjusted_MLE/selective_MLE.py       | 22 ++++++++++++-------
 selection/adjusted_MLE/tests/test_MLE.py      |  6 ++---
 .../adjusted_MLE/tests/test_simple_problem.py |  4 ++--
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 2dac09e8d..d3b368142 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -106,9 +106,6 @@ def solve_UMVU(target_transform,
 
     #print("check conditional parameters", conditional_natural_parameter-(1.2*target_observed)+2.4, conditional_precision)
 
-    soln, value, hess = solve_barrier_nonneg(conditional_natural_parameter,
-                                             conditional_precision,
-                                             feasible_point=feasible_point)
     M_1_inv = np.linalg.inv(M_1)
     offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
     mle_transform = (M_1_inv, -M_1_inv.dot(L), offset_term)
@@ -116,15 +113,24 @@ def solve_UMVU(target_transform,
     def mle_map(natparam_transform, mle_transform, feasible_point, conditional_precision, target_observed):
         param_lin, param_offset = natparam_transform
         mle_target_lin, mle_soln_lin, mle_offset = mle_transform
-        soln, value, hess = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
+        soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
                                                  conditional_precision,
                                                  feasible_point=feasible_point)
-        hessian = mle_target_lin+ mle_soln_lin.dot(hess).dot(conditional_precision).dot(param_lin)
-        return mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset, value, hessian
+
+        return mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset, value
 
     mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, feasible_point, conditional_precision)
-    sel_MLE, value, hessian = mle_partial(target_observed)
-    return np.squeeze(sel_MLE), value, hessian, mle_partial
+    sel_MLE, value = mle_partial(target_observed)
+
+    conditional_par = -implied_precision[ntarget:,:ntarget].dot(M_1.dot(sel_MLE)+ M_2.dot(conditioned_value))
+    _ , _ , hess = solve_barrier_nonneg(conditional_par  + offset_term,
+                                        np.linalg.inv(implied_opt),
+                                        feasible_point=feasible_point)
+
+    cross_covariance = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(implied_precision[:ntarget,ntarget:])
+    hessian = target_precision.dot(np.linalg.inv(implied_precision[:ntarget,:ntarget])
+                                   + cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
+    return np.squeeze(sel_MLE), value, np.linalg.inv(hessian), mle_partial
 
 
 def solve_barrier_nonneg(conjugate_arg,
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index ec0c1c790..c8f7f7e3f 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -89,7 +89,8 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
                                                          M_est.target_cov,
                                                          M_est.randomizer_precision)
 
-            print("approx_MLE and sd", approx_MLE, np.sqrt(np.diag(var)))
+            #print("approx_MLE and sd", approx_MLE, np.sqrt(np.diag(var)))
+            #print("approx sd", var)
             break
 
 
@@ -136,7 +137,6 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
                                                          M_est.feasible_point,
                                                          M_est.target_cov,
                                                          M_est.randomizer_precision)
-            print("approx sd", np.sqrt(np.diag(var)), approx_MLE)
             break
 
     return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
@@ -208,7 +208,7 @@ def test_bias_lasso(nsim=2000):
 #     bias = 0.
 #     pivot_obs_info= []
 #     for i in range(ndraw):
-#         approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=2.8, lam_frac=0.8)
+#         approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=-1.3, lam_frac=0.8)
 #         if approx is not None:
 #             pivot = approx[0]
 #             bias += approx[1]
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index e5dfc34a1..a413ee98c 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -196,8 +196,8 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     bias = 0.
     diff = 0.
     for i in range(ndraw):
-        result = pivot_approx_fisher_simple(n=300, true_mean = -0.3, threshold=2)
-        pivot_obs_info.append(result[2])
+        result = pivot_approx_fisher_simple(n=300, true_mean = -0.1, threshold=2)
+        pivot_obs_info.append(result[0])
         diff += result[3]
         bias += result[1]
         sys.stderr.write("bias" + str(bias / float(i)) + "\n")

From 3f110abfbc91fbe7424dc0150d363bcd61d74b93 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 18 Nov 2017 18:05:19 -0800
Subject: [PATCH 390/617] checked non orthogonal lasso p 200

---
 selection/adjusted_MLE/tests/test_MLE.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index c8f7f7e3f..441090551 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -90,11 +90,9 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
                                                          M_est.randomizer_precision)
 
             #print("approx_MLE and sd", approx_MLE, np.sqrt(np.diag(var)))
-            #print("approx sd", var)
+            print("approx sd", np.sqrt(np.diag(var)))
             break
 
-
-
     return (approx_MLE - true_target)/np.sqrt(np.diag(var)), (approx_MLE - true_target).sum()/float(nactive)
 
 def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomization_scale=1.):
@@ -137,6 +135,8 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
                                                          M_est.feasible_point,
                                                          M_est.target_cov,
                                                          M_est.randomizer_precision)
+
+            print("approx sd", np.sqrt(np.diag(var)))
             break
 
     return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
@@ -177,11 +177,11 @@ def test_bias_lasso(nsim=2000):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 500
+    ndraw = 1000
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=200, s=5, signal=3.)
+        approx = test_lasso_approx_var(n=300, p=200, s=10, signal=3.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]

From 55f2a03d76d39e4557217838d4a3b878e808b60a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c1f5.SUNet>
Date: Mon, 20 Nov 2017 14:25:58 -0800
Subject: [PATCH 391/617] added test to compute univariate MLE based on E maps

---
 selection/adjusted_MLE/selective_MLE.py       | 13 ++--
 selection/adjusted_MLE/tests/test_MLE.py      | 17 ++---
 .../adjusted_MLE/tests/test_MLE_univariate.py | 66 +++++++++++++++++--
 .../adjusted_MLE/tests/test_simple_problem.py |  2 +
 4 files changed, 77 insertions(+), 21 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index d3b368142..eac5dfbca 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -48,12 +48,11 @@ def solve_map(self):
         self.target_transform = (self.A, self.data_offset)
 
     def solve_map_univariate_target(self, j):
-        self.feasible_point = np.abs(self.initial_soln[self._overall])[j]
-
+        #self.feasible_point = np.abs(self.initial_soln[self._overall])[j]
+        self.feasible_point = np.ones(self._overall.sum())
         self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
         self.data_offset = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
-        self.target_transform = (self.A.reshape((self.A.shape[0],1)),
-                                 self.data_offset.reshape((self.data_offset.shape[0],1)))
+        self.target_transform = (self.A.reshape((self.A.shape[0],1)),self.data_offset)
 
 
 def solve_UMVU(target_transform,
@@ -103,7 +102,6 @@ def solve_UMVU(target_transform,
     conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
 
     conditional_precision = implied_precision[ntarget:,ntarget:]
-
     #print("check conditional parameters", conditional_natural_parameter-(1.2*target_observed)+2.4, conditional_precision)
 
     M_1_inv = np.linalg.inv(M_1)
@@ -123,13 +121,14 @@ def mle_map(natparam_transform, mle_transform, feasible_point, conditional_preci
     sel_MLE, value = mle_partial(target_observed)
 
     conditional_par = -implied_precision[ntarget:,:ntarget].dot(M_1.dot(sel_MLE)+ M_2.dot(conditioned_value))
-    _ , _ , hess = solve_barrier_nonneg(conditional_par  + offset_term,
+    _ , _ , hess = solve_barrier_nonneg(conditional_par + offset_term,
                                         np.linalg.inv(implied_opt),
                                         feasible_point=feasible_point)
 
     cross_covariance = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(implied_precision[:ntarget,ntarget:])
     hessian = target_precision.dot(np.linalg.inv(implied_precision[:ntarget,:ntarget])
                                    + cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
+
     return np.squeeze(sel_MLE), value, np.linalg.inv(hessian), mle_partial
 
 
@@ -137,7 +136,7 @@ def solve_barrier_nonneg(conjugate_arg,
                          precision,
                          feasible_point=None,
                          step=1,
-                         nstep=100,
+                         nstep=150,
                          tol=1.e-8):
 
     scaling = np.sqrt(np.diag(precision))
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 441090551..aa0cbb476 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -52,8 +52,8 @@ def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., random
             boot_sample[b, :] = mle_map(target_boot)[0]
 
         print("estimated sd", boot_sample.std(0))
-        return np.true_divide((approx_MLE - true_target), boot_sample.std(0)), (
-        (approx_MLE - true_target).sum()) / float(nactive)
+        return np.true_divide((approx_MLE - true_target), boot_sample.std(0)),\
+               ((approx_MLE - true_target).sum()) / float(nactive)
 
     else:
         return None
@@ -62,7 +62,8 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
 
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=1.,
+                                                       random_signs=True, equicorrelated=False)
         n, p = X.shape
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
 
@@ -93,7 +94,7 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
             print("approx sd", np.sqrt(np.diag(var)))
             break
 
-    return (approx_MLE - true_target)/np.sqrt(np.diag(var)), (approx_MLE - true_target).sum()/float(nactive)
+    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive)
 
 def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomization_scale=1.):
 
@@ -177,11 +178,11 @@ def test_bias_lasso(nsim=2000):
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 1000
+    ndraw = 500
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=200, s=10, signal=3.)
+        approx = test_lasso_approx_var(n=3000, p=1000, s=20, signal=3.5)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -189,7 +190,7 @@ def test_bias_lasso(nsim=2000):
                 pivot_obs_info.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
 
     #if i % 10 == 0:
     plt.clf()
@@ -199,7 +200,7 @@ def test_bias_lasso(nsim=2000):
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
     plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p200_n300_amp_3.png")
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1000_n3000_amp_0_AR1_0.2.png")
 
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt
diff --git a/selection/adjusted_MLE/tests/test_MLE_univariate.py b/selection/adjusted_MLE/tests/test_MLE_univariate.py
index b29365c6f..8b05c28a7 100644
--- a/selection/adjusted_MLE/tests/test_MLE_univariate.py
+++ b/selection/adjusted_MLE/tests/test_MLE_univariate.py
@@ -54,16 +54,70 @@ def boot_lasso(n=100, p=50, s=5, signal=5., B=1000, seed_n = 0, lam_frac=1., ran
     else:
         return None
 
+def approx_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
+
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.,
+                                                       random_signs=False, equicorrelated=False)
+        n, p = X.shape
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1./np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
+
+        active = M_est._overall
+        nactive = np.sum(active)
+        sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
+
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        selective_MLE = np.zeros(nactive)
+        var_MLE = np.zeros(nactive)
+        if nactive > 0:
+            for k in range(nactive):
+                M_est.solve_map_univariate_target(k)
+                approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                             M_est.opt_transform,
+                                                             M_est.target_observed[k]*np.identity(1).reshape((1,)),
+                                                             M_est.feasible_point,
+                                                             M_est.target_cov[k, k]*np.identity(1),
+                                                             M_est.randomizer_precision)
+
+                selective_MLE[k] = approx_MLE
+                var_MLE[k] = var
+            break
+
+    print("selective_MLE, approx_sd", selective_MLE, np.sqrt(var_MLE))
+    return np.true_divide((selective_MLE - true_target), np.sqrt(var_MLE)), (selective_MLE - true_target).sum()/float(nactive)
+
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
+    ndraw = 500
+    bias = 0.
+    pivot_obs_info= []
+    for i in range(ndraw):
+        approx = approx_lasso(n=300, p=200, s=10, signal=3.5)
+        if approx is not None:
+            pivot = approx[0]
+            bias += approx[1]
+            for j in range(pivot.shape[0]):
+                pivot_obs_info.append(pivot[j])
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
+
+    #if i % 10 == 0:
     plt.clf()
-    bootstrap = boot_lasso(n=100, p=50, s=5, signal=5., B=5000, seed_n = 0, lam_frac=1., randomization_scale=1.)
-    boot_pivot = bootstrap
-    ecdf = ECDF(ndist.cdf(boot_pivot))
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
     grid = np.linspace(0, 1, 101)
     print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='blue', marker='^')
-    #plt.plot(grid, grid, c='red', marker='^')
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot(grid, grid, 'k--')
     plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/selective_mle/Plots/only_boot_selective_MLE_lasso_p50.png")
\ No newline at end of file
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1000_n3000_amp_0_AR1_0.2.png")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
index a413ee98c..97be885d2 100644
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ b/selection/adjusted_MLE/tests/test_simple_problem.py
@@ -115,6 +115,8 @@ def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2
     target_transform = (-np.identity(n1), np.zeros(n1))
     #s = np.asscalar(np.sign(target_Z + omega))
     opt_transform = ((np.identity(n1)+epsilon), np.ones(n1) * (threshold))
+    print("shapes", (np.ones(n1) * (threshold)).shape, (np.identity(n1)+epsilon).shape, np.identity(n1).shape,
+          np.zeros(n1).shape, target_observed.shape)
     feasible_point = np.ones(n1)
     randomization_scale = 1.
     randomizer_precision = np.identity(n1) / randomization_scale ** 2

From 36ef7b601b41693068debde7996d4ba512179346 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Mon, 20 Nov 2017 19:57:03 -0800
Subject: [PATCH 392/617] test BH with orthogonal design

---
 selection/adjusted_MLE/tests/test_BH.py  | 86 ++++++++++++++++++++++++
 selection/adjusted_MLE/tests/test_MLE.py | 65 +++++++++---------
 2 files changed, 118 insertions(+), 33 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/test_BH.py

diff --git a/selection/adjusted_MLE/tests/test_BH.py b/selection/adjusted_MLE/tests/test_BH.py
new file mode 100644
index 000000000..362bf5826
--- /dev/null
+++ b/selection/adjusted_MLE/tests/test_BH.py
@@ -0,0 +1,86 @@
+from __future__ import print_function
+import numpy as np, sys
+
+import regreg.api as rr
+from scipy.stats import norm as ndist
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import solve_UMVU
+from statsmodels.distributions.empirical_distribution import ECDF
+
+def BH_selection(p_values, level):
+
+    m = p_values.shape[0]
+    p_sorted = np.sort(p_values)
+    indices = np.arange(m)
+    indices_order = np.argsort(p_values)
+    order_sig = np.max(indices[p_sorted - np.true_divide(level * (np.arange(m) + 1.), m) <= 0])
+    E_sel = indices_order[:(order_sig+1)]
+
+    active = np.zeros(m, np.bool)
+    active[E_sel] = 1
+    return order_sig+1, active
+
+
+def orthogonal_lasso_approx(n=100, s=3, signal=3, randomization_scale=1., sigma = 1., level=0.10):
+
+    while True:
+        beta = np.zeros(n)
+
+        signal = np.atleast_1d(signal)
+        if signal.shape == (1,):
+            beta[:s] = signal[0] * (1 + np.fabs(np.random.standard_normal(s)))
+        else:
+            beta[:s] = np.linspace(signal[0], signal[1], s)
+
+        y = sigma * (beta + np.random.standard_normal(n))
+        omega = randomization_scale * np.random.standard_normal(n)
+
+        p_values = 2.*(1. - ndist.cdf(np.abs(y+omega)/np.sqrt(1.+ randomization_scale**2.)))
+        K, active = BH_selection(p_values, level)
+
+        threshold = np.sqrt(1.+ randomization_scale**2.)*ndist.ppf(1.-(K*level)/n)
+        target_observed = y[active]
+        target_transform = (-np.identity(K), np.zeros(K))
+        s = np.sign(target_observed + omega[active])
+        opt_transform = (np.identity(K)*s[None, :], threshold*s*np.ones(K))
+        nactive = np.sum(active)
+        feasible_point= np.ones(nactive)
+
+        if nactive >0:
+            true_target = beta[active]
+            print("true_target", true_target)
+            approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
+                                                         opt_transform,
+                                                         target_observed,
+                                                         feasible_point,
+                                                         sigma*np.identity(nactive),
+                                                         randomization_scale*np.identity(nactive))
+
+            print("approx sd", np.sqrt(np.diag(var)))
+            break
+
+    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    ndraw = 500
+    bias = 0.
+    pivot_obs_info= []
+    for i in range(ndraw):
+        approx = orthogonal_lasso_approx(n=100, s=20, signal=2.5, randomization_scale=1., sigma = 1., level=0.10)
+        if approx is not None:
+            pivot = approx[0]
+            bias += approx[1]
+            print("bias in iteration", approx[1])
+            pivot_obs_info.extend(pivot)
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+
+    plt.clf()
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    grid = np.linspace(0, 1, 101)
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index aa0cbb476..49a66026b 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -96,7 +96,7 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
 
     return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive)
 
-def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomization_scale=1.):
+def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomization_scale=1., sigma = 1.):
 
     while True:
         beta = np.zeros(p)
@@ -109,7 +109,6 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
 
         X = np.linalg.svd(np.random.standard_normal((n,p)))[0][:,:p]
 
-        sigma = 1.
         y = sigma * (X.dot(beta) + np.random.standard_normal(n))
 
         lam = sigma * lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
@@ -175,6 +174,33 @@ def test_bias_lasso(nsim=2000):
 #             plt.plot(grid, ecdf(grid), c='red', marker='^')
 #             plt.plot(grid, grid, 'k--')
 
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 500
+#     bias = 0.
+#     pivot_obs_info= []
+#     for i in range(ndraw):
+#         approx = test_lasso_approx_var(n=3000, p=1000, s=20, signal=3.5)
+#         if approx is not None:
+#             pivot = approx[0]
+#             bias += approx[1]
+#             for j in range(pivot.shape[0]):
+#                 pivot_obs_info.append(pivot[j])
+#
+#         sys.stderr.write("iteration completed" + str(i) + "\n")
+#         sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
+#
+#     #if i % 10 == 0:
+#     plt.clf()
+#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+#     grid = np.linspace(0, 1, 101)
+#     print("ecdf", ecdf(grid))
+#     plt.plot(grid, ecdf(grid), c='red', marker='^')
+#     plt.plot(grid, grid, 'k--')
+#     plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1000_n3000_amp_0_AR1_0.2.png")
+
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
@@ -182,17 +208,16 @@ def test_bias_lasso(nsim=2000):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=3000, p=1000, s=20, signal=3.5)
+        approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=2.8, lam_frac=0.8)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
-            for j in range(pivot.shape[0]):
-                pivot_obs_info.append(pivot[j])
+            print("bias in iteration", approx[1])
+            pivot_obs_info.extend(pivot)
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
 
-    #if i % 10 == 0:
     plt.clf()
     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
     grid = np.linspace(0, 1, 101)
@@ -200,30 +225,4 @@ def test_bias_lasso(nsim=2000):
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
     plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1000_n3000_amp_0_AR1_0.2.png")
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 1000
-#     bias = 0.
-#     pivot_obs_info= []
-#     for i in range(ndraw):
-#         approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=-1.3, lam_frac=0.8)
-#         if approx is not None:
-#             pivot = approx[0]
-#             bias += approx[1]
-#             print("bias in iteration", approx[1])
-#             pivot_obs_info.extend(pivot)
-#
-#         sys.stderr.write("iteration completed" + str(i) + "\n")
-#         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
-#
-#     plt.clf()
-#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-#     grid = np.linspace(0, 1, 101)
-#     print("ecdf", ecdf(grid))
-#     plt.plot(grid, ecdf(grid), c='red', marker='^')
-#     plt.plot(grid, grid, 'k--')
-#     plt.show()
 #     #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
\ No newline at end of file

From caae0b0fe9d1688621a28a36327423a32d527abd Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c860.SUNet>
Date: Tue, 21 Nov 2017 09:50:33 -0800
Subject: [PATCH 393/617] added test for BH screening

---
 selection/adjusted_MLE/tests/test_BH.py | 91 +++++++++++++++++++++++--
 1 file changed, 87 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_BH.py b/selection/adjusted_MLE/tests/test_BH.py
index 362bf5826..c1015cfe7 100644
--- a/selection/adjusted_MLE/tests/test_BH.py
+++ b/selection/adjusted_MLE/tests/test_BH.py
@@ -4,6 +4,7 @@
 import regreg.api as rr
 from scipy.stats import norm as ndist
 from selection.randomized.api import randomization
+from selection.tests.instance import gaussian_instance
 from selection.adjusted_MLE.selective_MLE import solve_UMVU
 from statsmodels.distributions.empirical_distribution import ECDF
 
@@ -20,8 +21,7 @@ def BH_selection(p_values, level):
     active[E_sel] = 1
     return order_sig+1, active
 
-
-def orthogonal_lasso_approx(n=100, s=3, signal=3, randomization_scale=1., sigma = 1., level=0.10):
+def orthogonal_BH_approx(n=100, s=3, signal=3, randomization_scale=1., sigma = 1., level=0.10):
 
     while True:
         beta = np.zeros(n)
@@ -61,6 +61,89 @@ def orthogonal_lasso_approx(n=100, s=3, signal=3, randomization_scale=1., sigma
 
     return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
 
+
+def BH_approx(n=100, p=50, s=5, signal=5., randomization_scale=1., sigma=1., level=0.10):
+
+    while True:
+
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=sigma,
+                                                       random_signs=True, equicorrelated=False)
+
+        omega = randomization_scale * np.random.standard_normal(p)
+        p_values = 2.*(1. - ndist.cdf(np.abs(X.T.dot(y)+omega)/np.sqrt(1.+ randomization_scale**2.)))
+        K, active = BH_selection(p_values, level)
+        nactive = active.sum()
+
+        if nactive >0:
+
+            threshold = np.sqrt(1. + randomization_scale ** 2.) * ndist.ppf(1. - (K * level) / n)
+
+            X_active_inv = np.linalg.inv(X[:, active].T.dot(X[:, active]))
+            projection_perp = np.identity(n) - X[:, active].dot(X_active_inv).dot(X[:, active].T)
+            observed_score_state = np.hstack(
+                [np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(y),
+                 X[:, ~active].T.dot(projection_perp).dot(y)])
+            target_observed = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(y)
+            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+            active_signs = np.sign(X[:, active].T.dot(y) + omega[active])
+
+            _opt_linear_term = np.vstack([np.diag(active_signs), np.zeros((p - nactive,nactive))])
+            _opt_affine_term = np.concatenate([threshold * active_signs, X[:, ~active].T.dot(y) + omega[~active]])
+            opt_transform = (_opt_linear_term, _opt_affine_term)
+
+            _score_linear_term = np.zeros((p, p))
+            _score_linear_term[:nactive, :nactive] = -X[:, active].T.dot(X[:, active])
+            _score_linear_term[nactive:, :nactive] = -X[:, ~active].T.dot(X[:, active])
+            _score_linear_term[nactive:, nactive:] = -np.identity(p - nactive)
+
+            score_cov = np.zeros((p, p))
+            score_cov[:nactive, :nactive] = X_active_inv
+            score_cov[nactive:, nactive:] = X[:, ~active].T.dot(projection_perp).dot(X[:, ~active])
+            score_target_cov = score_cov[:, :nactive]
+            target_cov = score_cov[:nactive, :nactive]
+
+            A = np.dot(_score_linear_term, score_target_cov).dot(np.linalg.inv(target_cov))
+            data_offset = _score_linear_term.dot(observed_score_state) - A.dot(target_observed)
+            target_transform = (A, data_offset)
+
+            feasible_point = np.ones(nactive)
+
+            approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
+                                                         opt_transform,
+                                                         target_observed,
+                                                         feasible_point,
+                                                         sigma*np.identity(nactive),
+                                                         randomization_scale*np.identity(p))
+
+            print("approx sd", np.sqrt(np.diag(var)))
+            break
+
+    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
+
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 500
+#     bias = 0.
+#     pivot_obs_info= []
+#     for i in range(ndraw):
+#         approx = orthogonal_BH_approx(n=100, s=20, signal=2.5, randomization_scale=1., sigma = 1., level=0.10)
+#         if approx is not None:
+#             pivot = approx[0]
+#             bias += approx[1]
+#             print("bias in iteration", approx[1])
+#             pivot_obs_info.extend(pivot)
+#
+#         sys.stderr.write("iteration completed" + str(i) + "\n")
+#         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+#
+#     plt.clf()
+#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+#     grid = np.linspace(0, 1, 101)
+#     plt.plot(grid, ecdf(grid), c='red', marker='^')
+#     plt.plot(grid, grid, 'k--')
+#     plt.show()
+
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
@@ -68,7 +151,7 @@ def orthogonal_lasso_approx(n=100, s=3, signal=3, randomization_scale=1., sigma
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = orthogonal_lasso_approx(n=100, s=20, signal=2.5, randomization_scale=1., sigma = 1., level=0.10)
+        approx = BH_approx(n=300, p=1000, s=50, signal=3.5, randomization_scale=1., sigma=1., level=0.10)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -76,7 +159,7 @@ def orthogonal_lasso_approx(n=100, s=3, signal=3, randomization_scale=1., sigma
             pivot_obs_info.extend(pivot)
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
 
     plt.clf()
     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))

From e837b86820b872447e219decff4a0851a660e06c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c860.SUNet>
Date: Tue, 21 Nov 2017 11:23:02 -0800
Subject: [PATCH 394/617] commit changes

---
 selection/adjusted_MLE/tests/test_BH.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_BH.py b/selection/adjusted_MLE/tests/test_BH.py
index c1015cfe7..45d5f25fd 100644
--- a/selection/adjusted_MLE/tests/test_BH.py
+++ b/selection/adjusted_MLE/tests/test_BH.py
@@ -67,7 +67,7 @@ def BH_approx(n=100, p=50, s=5, signal=5., randomization_scale=1., sigma=1., lev
     while True:
 
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=sigma,
-                                                       random_signs=True, equicorrelated=False)
+                                                       random_signs=False, equicorrelated=False)
 
         omega = randomization_scale * np.random.standard_normal(p)
         p_values = 2.*(1. - ndist.cdf(np.abs(X.T.dot(y)+omega)/np.sqrt(1.+ randomization_scale**2.)))
@@ -115,7 +115,7 @@ def BH_approx(n=100, p=50, s=5, signal=5., randomization_scale=1., sigma=1., lev
                                                          sigma*np.identity(nactive),
                                                          randomization_scale*np.identity(p))
 
-            print("approx sd", np.sqrt(np.diag(var)))
+            #print("approx sd", np.sqrt(np.diag(var)))
             break
 
     return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
@@ -151,7 +151,7 @@ def BH_approx(n=100, p=50, s=5, signal=5., randomization_scale=1., sigma=1., lev
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = BH_approx(n=300, p=1000, s=50, signal=3.5, randomization_scale=1., sigma=1., level=0.10)
+        approx = BH_approx(n=1000, p=2000, s=100, signal=3.5, randomization_scale=1., sigma=1., level=0.10)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]

From e42ab38b8eb48a236ac09ef34fdf6e20e6e51d6d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c860.SUNet>
Date: Tue, 21 Nov 2017 12:22:42 -0800
Subject: [PATCH 395/617] added additional constraint in BH

---
 selection/adjusted_MLE/tests/test_BH.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_BH.py b/selection/adjusted_MLE/tests/test_BH.py
index 45d5f25fd..1fb86722b 100644
--- a/selection/adjusted_MLE/tests/test_BH.py
+++ b/selection/adjusted_MLE/tests/test_BH.py
@@ -19,7 +19,7 @@ def BH_selection(p_values, level):
 
     active = np.zeros(m, np.bool)
     active[E_sel] = 1
-    return order_sig+1, active
+    return order_sig+1, active, p_values[indices_order[order_sig+1]]
 
 def orthogonal_BH_approx(n=100, s=3, signal=3, randomization_scale=1., sigma = 1., level=0.10):
 
@@ -36,9 +36,9 @@ def orthogonal_BH_approx(n=100, s=3, signal=3, randomization_scale=1., sigma = 1
         omega = randomization_scale * np.random.standard_normal(n)
 
         p_values = 2.*(1. - ndist.cdf(np.abs(y+omega)/np.sqrt(1.+ randomization_scale**2.)))
-        K, active = BH_selection(p_values, level)
+        K, active, p_threshold = BH_selection(p_values, level)
 
-        threshold = np.sqrt(1.+ randomization_scale**2.)*ndist.ppf(1.-(K*level)/n)
+        threshold = np.sqrt(1.+ randomization_scale**2.)*ndist.ppf(1.-np.max((K*level)/n, p_threshold))
         target_observed = y[active]
         target_transform = (-np.identity(K), np.zeros(K))
         s = np.sign(target_observed + omega[active])
@@ -71,12 +71,12 @@ def BH_approx(n=100, p=50, s=5, signal=5., randomization_scale=1., sigma=1., lev
 
         omega = randomization_scale * np.random.standard_normal(p)
         p_values = 2.*(1. - ndist.cdf(np.abs(X.T.dot(y)+omega)/np.sqrt(1.+ randomization_scale**2.)))
-        K, active = BH_selection(p_values, level)
+        K, active, p_threshold = BH_selection(p_values, level)
         nactive = active.sum()
 
         if nactive >0:
 
-            threshold = np.sqrt(1. + randomization_scale ** 2.) * ndist.ppf(1. - (K * level) / n)
+            threshold = np.sqrt(1. + randomization_scale ** 2.) * ndist.ppf(1.-max((K*level)/n, p_threshold))
 
             X_active_inv = np.linalg.inv(X[:, active].T.dot(X[:, active]))
             projection_perp = np.identity(n) - X[:, active].dot(X_active_inv).dot(X[:, active].T)

From 9b0b3ca493fa7da83126f64498e9f44cb48b620d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c46b.SUNet>
Date: Fri, 24 Nov 2017 11:20:40 -0800
Subject: [PATCH 396/617] commit comparsion with bootstrapped variance

---
 selection/adjusted_MLE/selective_MLE.py       |  5 +-
 selection/adjusted_MLE/tests/test_MLE.py      | 65 +++++++-------
 selection/adjusted_MLE/tests/test_MLE_boot.py | 90 +++++++++++++++++++
 3 files changed, 125 insertions(+), 35 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/test_MLE_boot.py

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index eac5dfbca..747d69805 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -4,7 +4,7 @@
 
 class M_estimator_map(M_estimator):
 
-    def __init__(self, loss, epsilon, penalty, randomization, randomization_scale = 1.):
+    def __init__(self, loss, epsilon, penalty, randomization, randomization_scale = 1., sigma= 1.):
         M_estimator.__init__(self, loss, epsilon, penalty, randomization)
         self.randomizer = randomization
         self.randomization_scale = randomization_scale
@@ -34,7 +34,8 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
         projection_perp = np.identity(n) - X[:, self._overall].dot(X_active_inv).dot(X[:, self._overall].T)
         score_cov[:self.nactive, :self.nactive] = X_active_inv
         score_cov[self.nactive:, self.nactive:] = X[:, ~self._overall].T.dot(projection_perp).dot(X[:, ~self._overall])
-        self.score_cov = score_cov
+        self.score_cov = (sigma**2.) * score_cov
+
         self.observed_score_state = self.observed_internal_state
         self.target_observed = self.observed_internal_state[:self.nactive]
         self.score_target_cov = self.score_cov[:, :self.nactive]
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 49a66026b..b96d6c558 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -60,9 +60,8 @@ def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., random
 
 def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=1.):
 
-
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=1.,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
@@ -174,33 +173,6 @@ def test_bias_lasso(nsim=2000):
 #             plt.plot(grid, ecdf(grid), c='red', marker='^')
 #             plt.plot(grid, grid, 'k--')
 
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 500
-#     bias = 0.
-#     pivot_obs_info= []
-#     for i in range(ndraw):
-#         approx = test_lasso_approx_var(n=3000, p=1000, s=20, signal=3.5)
-#         if approx is not None:
-#             pivot = approx[0]
-#             bias += approx[1]
-#             for j in range(pivot.shape[0]):
-#                 pivot_obs_info.append(pivot[j])
-#
-#         sys.stderr.write("iteration completed" + str(i) + "\n")
-#         sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
-#
-#     #if i % 10 == 0:
-#     plt.clf()
-#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-#     grid = np.linspace(0, 1, 101)
-#     print("ecdf", ecdf(grid))
-#     plt.plot(grid, ecdf(grid), c='red', marker='^')
-#     plt.plot(grid, grid, 'k--')
-#     plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1000_n3000_amp_0_AR1_0.2.png")
-
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
@@ -208,16 +180,17 @@ def test_bias_lasso(nsim=2000):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=2.8, lam_frac=0.8)
+        approx = test_lasso_approx_var(n=100, p=50, s=5, signal=3.5)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
-            print("bias in iteration", approx[1])
-            pivot_obs_info.extend(pivot)
+            for j in range(pivot.shape[0]):
+                pivot_obs_info.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
 
+    #if i % 10 == 0:
     plt.clf()
     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
     grid = np.linspace(0, 1, 101)
@@ -225,4 +198,30 @@ def test_bias_lasso(nsim=2000):
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
     plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n1000_amp_3.5.png")
+
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 500
+#     bias = 0.
+#     pivot_obs_info= []
+#     for i in range(ndraw):
+#         approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=2.8, lam_frac=0.8)
+#         if approx is not None:
+#             pivot = approx[0]
+#             bias += approx[1]
+#             print("bias in iteration", approx[1])
+#             pivot_obs_info.extend(pivot)
+#
+#         sys.stderr.write("iteration completed" + str(i) + "\n")
+#         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
+#
+#     plt.clf()
+#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+#     grid = np.linspace(0, 1, 101)
+#     print("ecdf", ecdf(grid))
+#     plt.plot(grid, ecdf(grid), c='red', marker='^')
+#     plt.plot(grid, grid, 'k--')
+#     plt.show()
 #     #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
new file mode 100644
index 000000000..1616caf2e
--- /dev/null
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -0,0 +1,90 @@
+from __future__ import print_function
+import numpy as np, sys
+
+import regreg.api as rr
+from selection.tests.instance import gaussian_instance
+from scipy.stats import norm as ndist
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
+from statsmodels.distributions.empirical_distribution import ECDF
+
+def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
+
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
+                                                       random_signs=True, equicorrelated=False)
+        n, p = X.shape
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1./np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma)
+
+        M_est.solve_map()
+        active = M_est._overall
+
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        nactive = np.sum(active)
+
+        if nactive > 0:
+            approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                         M_est.opt_transform,
+                                                         M_est.target_observed,
+                                                         M_est.feasible_point,
+                                                         M_est.target_cov,
+                                                         M_est.randomizer_precision)
+
+            boot_sample = np.zeros((B, nactive))
+            resid = y - X[:, active].dot(M_est.target_observed)
+            for b in range(B):
+                boot_indices = np.random.choice(n, n, replace=True)
+                boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
+                target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
+                boot_sample[b, :] = mle_map(target_boot)[0]
+
+            print("estimated sd", boot_sample.std(0), np.sqrt(np.diag(var)))
+            return np.true_divide((approx_MLE - true_target), boot_sample.std(0)), \
+                   ((approx_MLE - true_target).sum()) / float(nactive), \
+                   np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var)))
+            break
+
+    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive)
+
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    ndraw = 500
+    bias = 0.
+    pivot_obs_info= []
+    pivot_bootstrap = []
+    for i in range(ndraw):
+        approx = test_lasso_approx_var(n=300, p=50, s=5, signal=3.5)
+        if approx is not None:
+            pivot_boot = approx[0]
+            pivot_approx_info = approx[2]
+            bias += approx[1]
+            for j in range(pivot_boot.shape[0]):
+                pivot_obs_info.append(pivot_approx_info[j])
+                pivot_bootstrap.append(pivot_boot[j])
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
+        #print("pivots", pivot_approx_info, pivot_boot)
+
+    #if i % 10 == 0:
+    plt.clf()
+    ecdf_approx = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_bootstrap)))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf_boot(grid))
+    plt.plot(grid, ecdf_approx(grid), c='red', marker='^')
+    plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
+    plt.plot(grid, grid, 'k--')
+    #plt.show()
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n300_p50_amp3.5_sigma2.png")
\ No newline at end of file

From 4d208f8e8718b6256b1c158f0501076399149a5e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c46b.SUNet>
Date: Fri, 24 Nov 2017 11:53:28 -0800
Subject: [PATCH 397/617] changed map for sigma not equal to 1.

---
 selection/adjusted_MLE/tests/test_MLE_boot.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index 1616caf2e..c063ee16e 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -8,7 +8,7 @@
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from statsmodels.distributions.empirical_distribution import ECDF
 
-def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
+def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.5):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
@@ -87,4 +87,4 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
     plt.plot(grid, grid, 'k--')
     #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n300_p50_amp3.5_sigma2.png")
\ No newline at end of file
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n300_p50_amp3.5_sigma1.5.png")
\ No newline at end of file

From 56a2051a7da0a90af265ec5fedc2912c6bf0608c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 25 Nov 2017 09:56:17 -0800
Subject: [PATCH 398/617] added observed fisher info as a function of target

---
 selection/adjusted_MLE/selective_MLE.py       | 38 +++++++++++++------
 selection/adjusted_MLE/tests/test_MLE_boot.py | 20 +++++-----
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 747d69805..ac3e7624f 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -108,29 +108,43 @@ def solve_UMVU(target_transform,
     M_1_inv = np.linalg.inv(M_1)
     offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
     mle_transform = (M_1_inv, -M_1_inv.dot(L), offset_term)
+    var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
+                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
+
+    cross_covariance = np.linalg.inv(implied_precision[:ntarget, :ntarget]).dot(implied_precision[:ntarget, ntarget:])
+    var_matrices = (np.linalg.inv(implied_opt), np.linalg.inv(implied_precision[:ntarget,:ntarget]),
+                    cross_covariance,target_precision)
+
+    def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
+                feasible_point, conditional_precision, target_observed):
 
-    def mle_map(natparam_transform, mle_transform, feasible_point, conditional_precision, target_observed):
         param_lin, param_offset = natparam_transform
         mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+
+
         soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
                                                  conditional_precision,
                                                  feasible_point=feasible_point)
+        selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
 
-        return mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset, value
+        var_target_lin, var_offset = var_transform
+        var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
+        _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
+                                          var_precision,
+                                          feasible_point=None,
+                                          step=1,
+                                          nstep=250)
 
-    mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, feasible_point, conditional_precision)
-    sel_MLE, value = mle_partial(target_observed)
+        hessian = target_precision.dot(inv_precision_target +
+                                       cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
 
-    conditional_par = -implied_precision[ntarget:,:ntarget].dot(M_1.dot(sel_MLE)+ M_2.dot(conditioned_value))
-    _ , _ , hess = solve_barrier_nonneg(conditional_par + offset_term,
-                                        np.linalg.inv(implied_opt),
-                                        feasible_point=feasible_point)
+        return selective_MLE, hessian
 
-    cross_covariance = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(implied_precision[:ntarget,ntarget:])
-    hessian = target_precision.dot(np.linalg.inv(implied_precision[:ntarget,:ntarget])
-                                   + cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
+    mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, var_transform, var_matrices,
+                                    feasible_point, conditional_precision)
+    sel_MLE, hessian = mle_partial(target_observed)
 
-    return np.squeeze(sel_MLE), value, np.linalg.inv(hessian), mle_partial
+    return np.squeeze(sel_MLE), np.linalg.inv(hessian), mle_partial
 
 
 def solve_barrier_nonneg(conjugate_arg,
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index c063ee16e..5882f63f4 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -8,7 +8,7 @@
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from statsmodels.distributions.empirical_distribution import ECDF
 
-def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.5):
+def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
@@ -32,12 +32,12 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
         nactive = np.sum(active)
 
         if nactive > 0:
-            approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                         M_est.opt_transform,
-                                                         M_est.target_observed,
-                                                         M_est.feasible_point,
-                                                         M_est.target_cov,
-                                                         M_est.randomizer_precision)
+            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                  M_est.opt_transform,
+                                                  M_est.target_observed,
+                                                  M_est.feasible_point,
+                                                  M_est.target_cov,
+                                                  M_est.randomizer_precision)
 
             boot_sample = np.zeros((B, nactive))
             resid = y - X[:, active].dot(M_est.target_observed)
@@ -59,7 +59,7 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 500
+    ndraw = 100
     bias = 0.
     pivot_obs_info= []
     pivot_bootstrap = []
@@ -86,5 +86,5 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     plt.plot(grid, ecdf_approx(grid), c='red', marker='^')
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
     plt.plot(grid, grid, 'k--')
-    #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n300_p50_amp3.5_sigma1.5.png")
\ No newline at end of file
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n2000_p2000_amp3.5_sigma1.png")
\ No newline at end of file

From 7fda4fb372c154ce25a1e188abdd573d818914d7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 25 Nov 2017 11:07:31 -0800
Subject: [PATCH 399/617] return inv of hessian

---
 selection/adjusted_MLE/selective_MLE.py       |   6 +-
 selection/adjusted_MLE/tests/test_MLE_boot.py | 108 ++++++++++++++----
 2 files changed, 88 insertions(+), 26 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index ac3e7624f..089f34b11 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -138,13 +138,13 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
         hessian = target_precision.dot(inv_precision_target +
                                        cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
 
-        return selective_MLE, hessian
+        return selective_MLE, np.linalg.inv(hessian)
 
     mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, var_transform, var_matrices,
                                     feasible_point, conditional_precision)
-    sel_MLE, hessian = mle_partial(target_observed)
+    sel_MLE, inv_hessian = mle_partial(target_observed)
 
-    return np.squeeze(sel_MLE), np.linalg.inv(hessian), mle_partial
+    return np.squeeze(sel_MLE), inv_hessian, mle_partial
 
 
 def solve_barrier_nonneg(conjugate_arg,
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index 5882f63f4..2f124a68c 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -8,7 +8,7 @@
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from statsmodels.distributions.empirical_distribution import ECDF
 
-def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
+def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
@@ -51,39 +51,101 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
             return np.true_divide((approx_MLE - true_target), boot_sample.std(0)), \
                    ((approx_MLE - true_target).sum()) / float(nactive), \
                    np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var)))
+
             break
 
-    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive)
+def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=0.8, randomization_scale=1., sigma= 1.):
+
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
+                                                       random_signs=True, equicorrelated=False)
+        n, p = X.shape
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1./np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma)
+
+        M_est.solve_map()
+        active = M_est._overall
+
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        nactive = np.sum(active)
+
+        if nactive > 0:
+            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                  M_est.opt_transform,
+                                                  M_est.target_observed,
+                                                  M_est.feasible_point,
+                                                  M_est.target_cov,
+                                                  M_est.randomizer_precision)
 
+            boot_pivot = np.zeros((B, nactive))
+            resid = y - X[:, active].dot(M_est.target_observed)
+            for b in range(B):
+                boot_indices = np.random.choice(n, n, replace=True)
+                boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
+                target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
+                boot_mle = mle_map(target_boot)
+                boot_pivot[b, :] = np.true_divide(boot_mle[0]- approx_MLE, np.sqrt(np.diag(boot_mle[1])))
+
+            break
+
+    return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0)
+
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 100
+#     bias = 0.
+#     pivot_obs_info= []
+#     pivot_bootstrap = []
+#     for i in range(ndraw):
+#         approx = boot_lasso_approx_var(n=300, p=50, s=5, signal=3.5)
+#         if approx is not None:
+#             pivot_boot = approx[0]
+#             pivot_approx_info = approx[2]
+#             bias += approx[1]
+#             for j in range(pivot_boot.shape[0]):
+#                 pivot_obs_info.append(pivot_approx_info[j])
+#                 pivot_bootstrap.append(pivot_boot[j])
+#
+#         sys.stderr.write("iteration completed" + str(i) + "\n")
+#         sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
+#         #print("pivots", pivot_approx_info, pivot_boot)
+#
+#     #if i % 10 == 0:
+#     plt.clf()
+#     ecdf_approx = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+#     ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_bootstrap)))
+#     grid = np.linspace(0, 1, 101)
+#     print("ecdf", ecdf_boot(grid))
+#     plt.plot(grid, ecdf_approx(grid), c='red', marker='^')
+#     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
+#     plt.plot(grid, grid, 'k--')
+#     plt.show()
+#     #plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n2000_p2000_amp3.5_sigma1.png")
 
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 100
     bias = 0.
-    pivot_obs_info= []
-    pivot_bootstrap = []
-    for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=50, s=5, signal=3.5)
-        if approx is not None:
-            pivot_boot = approx[0]
-            pivot_approx_info = approx[2]
-            bias += approx[1]
-            for j in range(pivot_boot.shape[0]):
-                pivot_obs_info.append(pivot_approx_info[j])
-                pivot_bootstrap.append(pivot_boot[j])
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
-        #print("pivots", pivot_approx_info, pivot_boot)
-
-    #if i % 10 == 0:
+    approx = boot_pivot_approx_var(n=1000, p=2000, s=20, signal=3.5)
+    if approx is not None:
+        pivot_boot = approx[0]
+        bias = approx[1]
+
+    sys.stderr.write("overall_bias" + str(bias) + "\n")
+
     plt.clf()
-    ecdf_approx = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_bootstrap)))
+    ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_boot)))
     grid = np.linspace(0, 1, 101)
     print("ecdf", ecdf_boot(grid))
-    plt.plot(grid, ecdf_approx(grid), c='red', marker='^')
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
     plt.plot(grid, grid, 'k--')
     plt.show()

From ee18a0befaa598bb8a924d338a82cffe96768bee Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 25 Nov 2017 12:27:05 -0800
Subject: [PATCH 400/617] changed previous tests for MLE

---
 selection/adjusted_MLE/tests/test_MLE.py      | 26 +++++++++----------
 selection/adjusted_MLE/tests/test_MLE_boot.py |  6 +++--
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index b96d6c558..4f677fad8 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -82,12 +82,12 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
         nactive = np.sum(active)
 
         if nactive > 0:
-            approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                         M_est.opt_transform,
-                                                         M_est.target_observed,
-                                                         M_est.feasible_point,
-                                                         M_est.target_cov,
-                                                         M_est.randomizer_precision)
+            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                  M_est.opt_transform,
+                                                  M_est.target_observed,
+                                                  M_est.feasible_point,
+                                                  M_est.target_cov,
+                                                  M_est.randomizer_precision)
 
             #print("approx_MLE and sd", approx_MLE, np.sqrt(np.diag(var)))
             print("approx sd", np.sqrt(np.diag(var)))
@@ -128,12 +128,12 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
         if nactive >0:
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
             print("true_target", true_target)
-            approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                         M_est.opt_transform,
-                                                         M_est.target_observed,
-                                                         M_est.feasible_point,
-                                                         M_est.target_cov,
-                                                         M_est.randomizer_precision)
+            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
+                                                  M_est.opt_transform,
+                                                  M_est.target_observed,
+                                                  M_est.feasible_point,
+                                                  M_est.target_cov,
+                                                  M_est.randomizer_precision)
 
             print("approx sd", np.sqrt(np.diag(var)))
             break
@@ -180,7 +180,7 @@ def test_bias_lasso(nsim=2000):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=100, p=50, s=5, signal=3.5)
+        approx = test_lasso_approx_var(n=1000, p=300, s=20, signal=3.5)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index 2f124a68c..a7c0b3c03 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -54,7 +54,7 @@ def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 
             break
 
-def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=0.8, randomization_scale=1., sigma= 1.):
+def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=50000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
@@ -94,6 +94,8 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=0.8, ran
                 boot_mle = mle_map(target_boot)
                 boot_pivot[b, :] = np.true_divide(boot_mle[0]- approx_MLE, np.sqrt(np.diag(boot_mle[1])))
 
+                sys.stderr.write("bootstrap sample" + str(b) + "\n")
+
             break
 
     return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0)
@@ -135,7 +137,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=0.8, ran
     import matplotlib.pyplot as plt
 
     bias = 0.
-    approx = boot_pivot_approx_var(n=1000, p=2000, s=20, signal=3.5)
+    approx = boot_pivot_approx_var(n=1000, p=300, s=20, signal=3.5)
     if approx is not None:
         pivot_boot = approx[0]
         bias = approx[1]

From 07e283fd442721ddfc602fc29f6f44c7aedb6f39 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 25 Nov 2017 12:49:16 -0800
Subject: [PATCH 401/617] commit changes

---
 selection/adjusted_MLE/tests/test_MLE.py      | 2 +-
 selection/adjusted_MLE/tests/test_MLE_boot.py | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 4f677fad8..e467d5b0e 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -180,7 +180,7 @@ def test_bias_lasso(nsim=2000):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=1000, p=300, s=20, signal=3.5)
+        approx = test_lasso_approx_var(n=4000, p=2000, s=20, signal=3.5)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index a7c0b3c03..dbb98cdc8 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -54,7 +54,7 @@ def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 
             break
 
-def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=50000, lam_frac=1., randomization_scale=1., sigma= 1.):
+def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=20000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
@@ -93,7 +93,6 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=50000, lam_frac=1., ran
                 target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
                 boot_mle = mle_map(target_boot)
                 boot_pivot[b, :] = np.true_divide(boot_mle[0]- approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-
                 sys.stderr.write("bootstrap sample" + str(b) + "\n")
 
             break
@@ -137,12 +136,12 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=50000, lam_frac=1., ran
     import matplotlib.pyplot as plt
 
     bias = 0.
-    approx = boot_pivot_approx_var(n=1000, p=300, s=20, signal=3.5)
+    approx = boot_pivot_approx_var(n=300, p=50, s=5, signal=3.5)
     if approx is not None:
         pivot_boot = approx[0]
         bias = approx[1]
 
-    sys.stderr.write("overall_bias" + str(bias) + "\n")
+    #sys.stderr.write("overall_bias" + str(bias) + "\n")
 
     plt.clf()
     ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_boot)))

From c82c6cd8eed44fc4ed576c1f789b1efd650e7ce5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Sat, 25 Nov 2017 23:18:14 -0800
Subject: [PATCH 402/617] commit changes

---
 selection/adjusted_MLE/tests/test_MLE.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index e467d5b0e..ed6552155 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -180,7 +180,7 @@ def test_bias_lasso(nsim=2000):
     bias = 0.
     pivot_obs_info= []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=4000, p=2000, s=20, signal=3.5)
+        approx = test_lasso_approx_var(n=5000, p=4000, s=20, signal=3.5)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -197,8 +197,8 @@ def test_bias_lasso(nsim=2000):
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n1000_amp_3.5.png")
+    #plt.show()
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
 
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt

From ff480456015a2ce12388738baeda07a4cde65a82 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c610.SUNet>
Date: Mon, 27 Nov 2017 16:26:58 -0800
Subject: [PATCH 403/617] check sd from bootstrapped pivot

---
 selection/adjusted_MLE/tests/test_MLE_boot.py | 27 ++++++++++++-------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index dbb98cdc8..c044b4e4e 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -54,7 +54,7 @@ def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 
             break
 
-def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=20000, lam_frac=1., randomization_scale=1., sigma= 1.):
+def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
@@ -93,11 +93,12 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=20000, lam_frac=1., ran
                 target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
                 boot_mle = mle_map(target_boot)
                 boot_pivot[b, :] = np.true_divide(boot_mle[0]- approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-                sys.stderr.write("bootstrap sample" + str(b) + "\n")
+                #sys.stderr.write("bootstrap sample" + str(b) + "\n")
 
             break
 
-    return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0)
+    return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0), \
+           np.true_divide(approx_MLE - true_target, boot_pivot.std(0)), (approx_MLE - true_target).sum() / float(nactive)
 
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt
@@ -135,16 +136,24 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=20000, lam_frac=1., ran
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
+    ndraw = 100
     bias = 0.
-    approx = boot_pivot_approx_var(n=300, p=50, s=5, signal=3.5)
-    if approx is not None:
-        pivot_boot = approx[0]
-        bias = approx[1]
+    pivot_obs_info = []
 
-    #sys.stderr.write("overall_bias" + str(bias) + "\n")
+    for i in range(ndraw):
+        approx = boot_pivot_approx_var(n=1000, p=2000, s=20, signal=3.5, B=1000)
+        if approx is not None:
+            pivot_boot = approx[3]
+            bias += approx[4]
+
+            for j in range(pivot_boot.shape[0]):
+                pivot_obs_info.append(pivot_boot[j])
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
 
     plt.clf()
-    ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_boot)))
+    ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
     grid = np.linspace(0, 1, 101)
     print("ecdf", ecdf_boot(grid))
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')

From f14a036cccf9271c7f57d3e9cb9892290ad81772 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Thu, 30 Nov 2017 12:30:49 -0800
Subject: [PATCH 404/617] tried estimating var based on a sample from selective
 distribution

---
 selection/adjusted_MLE/selective_MLE.py       |  5 +-
 selection/adjusted_MLE/tests/test_MLE_boot.py |  8 +-
 .../adjusted_MLE/tests/test_boot_selective.py | 92 +++++++++++++++++++
 3 files changed, 100 insertions(+), 5 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/test_boot_selective.py

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 089f34b11..132cc64e2 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -99,6 +99,7 @@ def solve_UMVU(target_transform,
 
     linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
     offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
+
     natparam_transform = (linear_term, offset_term)
     conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
 
@@ -144,7 +145,9 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
                                     feasible_point, conditional_precision)
     sel_MLE, inv_hessian = mle_partial(target_observed)
 
-    return np.squeeze(sel_MLE), inv_hessian, mle_partial
+    implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value), offset_term])
+
+    return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter)
 
 
 def solve_barrier_nonneg(conjugate_arg,
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index c044b4e4e..8b2b29a4c 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -78,7 +78,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
         nactive = np.sum(active)
 
         if nactive > 0:
-            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
+            approx_MLE, var, mle_map, _, _ = solve_UMVU(M_est.target_transform,
                                                   M_est.opt_transform,
                                                   M_est.target_observed,
                                                   M_est.feasible_point,
@@ -141,7 +141,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     pivot_obs_info = []
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=1000, p=2000, s=20, signal=3.5, B=1000)
+        approx = boot_pivot_approx_var(n=1000, p=4000, s=20, signal=3.5, B=1200)
         if approx is not None:
             pivot_boot = approx[3]
             bias += approx[4]
@@ -158,5 +158,5 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     print("ecdf", ecdf_boot(grid))
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
     plt.plot(grid, grid, 'k--')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n2000_p2000_amp3.5_sigma1.png")
\ No newline at end of file
+    #plt.show()
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n1000_p4000_amp3.5_sigma1.png")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_boot_selective.py b/selection/adjusted_MLE/tests/test_boot_selective.py
new file mode 100644
index 000000000..11e6f24b5
--- /dev/null
+++ b/selection/adjusted_MLE/tests/test_boot_selective.py
@@ -0,0 +1,92 @@
+from __future__ import print_function
+import numpy as np, sys
+
+import regreg.api as rr
+from selection.tests.instance import gaussian_instance
+from scipy.stats import norm as ndist
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
+from statsmodels.distributions.empirical_distribution import ECDF
+import selection.constraints.affine as AC
+
+def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
+
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
+                                                       random_signs=True, equicorrelated=False)
+        n, p = X.shape
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1./np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma)
+
+        M_est.solve_map()
+        active = M_est._overall
+
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        nactive = np.sum(active)
+        print("number of variables selected by LASSO", nactive)
+
+        if nactive > 0:
+            approx_MLE, var, mle_map, implied_cov, implied_mean = solve_UMVU(M_est.target_transform,
+                                                                             M_est.opt_transform,
+                                                                             M_est.target_observed,
+                                                                             M_est.feasible_point,
+                                                                             M_est.target_cov,
+                                                                             M_est.randomizer_precision)
+
+            A = np.hstack([np.zeros((nactive, nactive)), -np.identity(nactive)])
+            b = np.zeros(nactive)
+            con = AC.constraints(A, b, covariance=implied_cov, mean= implied_mean)
+            sample = AC.sample_from_constraints(con, np.ones(2*nactive), ndraw=B, burnin=100)
+            boot_pivot = np.zeros((B, nactive))
+            boot_mle_vec = np.zeros((B, nactive))
+            for b in range(B):
+                boot_mle = mle_map((sample[b,:])[:nactive])
+                boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
+                boot_mle_vec[b, :] = boot_mle[0]
+            break
+
+    return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0), \
+           np.true_divide(approx_MLE - true_target, boot_pivot.std(0)), np.true_divide(approx_MLE - true_target, boot_mle_vec.std(0)),\
+           (approx_MLE - true_target).sum() / float(nactive)
+
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    ndraw = 100
+    bias = 0.
+    pivot_obs_info = []
+    pivot_mle = []
+
+    for i in range(ndraw):
+        approx = boot_pivot_approx_var(n=1000, p=4000, s=20, signal=3.5, B=2000)
+        if approx is not None:
+            pivot_boot = approx[3]
+            mle_boot = approx[4]
+            bias += approx[5]
+
+            for j in range(pivot_boot.shape[0]):
+                pivot_obs_info.append(pivot_boot[j])
+                pivot_mle.append(mle_boot[j])
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
+
+    plt.clf()
+    ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    ecdf_mle = ECDF(ndist.cdf(np.asarray(pivot_mle)))
+    grid = np.linspace(0, 1, 101)
+    #print("ecdf", ecdf_boot(grid))
+    plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
+    plt.plot(grid, ecdf_mle(grid), c='red', marker='^')
+    plt.plot(grid, grid, 'k--')
+    #plt.show()
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/selective_Boot_pivot_n1000_p4000_amp3.5_sigma1.png")
\ No newline at end of file

From 4bd08a52d89df88529d20d85ab2cd993d91eff9d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c08d.SUNet>
Date: Mon, 4 Dec 2017 12:34:40 -0800
Subject: [PATCH 405/617] estimating sigma using glmnet

---
 selection/adjusted_MLE/tests/compare_risks.py | 117 ++++++++++++++++++
 .../adjusted_MLE/tests/test_boot_selective.py |   5 +-
 2 files changed, 120 insertions(+), 2 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/compare_risks.py

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
new file mode 100644
index 000000000..9b18dd0ef
--- /dev/null
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -0,0 +1,117 @@
+from __future__ import print_function
+import numpy as np, sys
+
+import regreg.api as rr
+from selection.tests.instance import gaussian_instance
+from scipy.stats import norm as ndist
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
+from statsmodels.distributions.empirical_distribution import ECDF
+import statsmodels.api as sm
+from selection.randomized.M_estimator import M_estimator
+from rpy2.robjects.packages import importr
+from rpy2 import robjects
+
+glmnet = importr('glmnet')
+import rpy2.robjects.numpy2ri
+
+rpy2.robjects.numpy2ri.activate()
+
+def glmnet_sigma(X, y):
+    robjects.r('''
+                glmnet_cv = function(X,y){
+                y = as.matrix(y)
+                X = as.matrix(X)
+
+                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
+                lam_minCV = out$lambda.min
+
+                coef = coef(out, s = "lambda.min")
+                linear.fit = lm(y~ X[, which(coef>0.001)-1])
+                sigma_est = summary(linear.fit)$sigma
+                return(sigma_est)
+                }''')
+
+    try:
+        sigma_cv_R = robjects.globalenv['glmnet_cv']
+        n, p = X.shape
+        r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+        r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+
+        sigma_est = sigma_cv_R(r_X, r_y)
+        return sigma_est
+    except:
+        return np.array([1.])
+
+
+def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=1.):
+
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.,
+                                                       random_signs=True, equicorrelated=False)
+        n, p = X.shape
+
+        if p>n:
+            sigma_est = glmnet_sigma(X, y)[0]
+            print("sigma est", sigma_est)
+        else:
+            ols_fit = sm.OLS(y, X).fit()
+            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+            print("sigma est", sigma_est)
+
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1./np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma = sigma_est)
+
+        M_est.solve_map()
+        active = M_est._overall
+
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        nactive = np.sum(active)
+
+        if nactive > 0:
+            approx_MLE, var, mle_map, _, _ = solve_UMVU(M_est.target_transform,
+                                                        M_est.opt_transform,
+                                                        M_est.target_observed,
+                                                        M_est.feasible_point,
+                                                        M_est.target_cov,
+                                                        M_est.randomizer_precision)
+
+            print("approx sd", np.sqrt(np.diag(var)))
+            break
+
+    return np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive)
+
+
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+
+    ndraw = 100
+    bias = 0.
+    pivot_obs_info = []
+    for i in range(ndraw):
+        approx = test_lasso_approx_var(n=500, p=100, s=10, signal=3.5)
+        if approx is not None:
+            pivot = approx[0]
+            bias += approx[1]
+            for j in range(pivot.shape[0]):
+                pivot_obs_info.append(pivot[j])
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
+
+    # if i % 10 == 0:
+    plt.clf()
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_boot_selective.py b/selection/adjusted_MLE/tests/test_boot_selective.py
index 11e6f24b5..96f81fce2 100644
--- a/selection/adjusted_MLE/tests/test_boot_selective.py
+++ b/selection/adjusted_MLE/tests/test_boot_selective.py
@@ -9,6 +9,7 @@
 from statsmodels.distributions.empirical_distribution import ECDF
 import selection.constraints.affine as AC
 
+
 def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
@@ -88,5 +89,5 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
     plt.plot(grid, ecdf_mle(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
-    #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/selective_Boot_pivot_n1000_p4000_amp3.5_sigma1.png")
\ No newline at end of file
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/selective_Boot_pivot_n1000_p4000_amp3.5_sigma1.png")
\ No newline at end of file

From fa100e635f7a4d8d7d3fddd3e2c3170575c1f4b7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c08d.SUNet>
Date: Mon, 4 Dec 2017 14:11:43 -0800
Subject: [PATCH 406/617] added arguments for relative risk computation

---
 selection/adjusted_MLE/tests/compare_risks.py | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 9b18dd0ef..fd94dfa82 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -1,5 +1,6 @@
 from __future__ import print_function
 import numpy as np, sys
+import scipy.stats as stats
 
 import regreg.api as rr
 from selection.tests.instance import gaussian_instance
@@ -87,7 +88,9 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
             print("approx sd", np.sqrt(np.diag(var)))
             break
 
-    return np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive)
+    return np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive),\
+           np.true_divide((approx_MLE-true_target).dot((approx_MLE-true_target)), (true_target).dot(true_target)), \
+           np.true_divide((M_est.target_observed-true_target).dot((M_est.target_observed-true_target)), (true_target).dot(true_target))
 
 
 if __name__ == "__main__":
@@ -95,23 +98,34 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
 
     ndraw = 100
     bias = 0.
+    risk_selMLE = 0.
+    risk_relLASSO = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=500, p=100, s=10, signal=3.5)
+        approx = test_lasso_approx_var(n=500, p=2000, s=20, signal=1.25)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
+            risk_selMLE += approx[2]
+            risk_relLASSO += approx[3]
             for j in range(pivot.shape[0]):
                 pivot_obs_info.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
+        sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
+        sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
 
     # if i % 10 == 0:
+    # plt.clf()
+    # ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    # grid = np.linspace(0, 1, 101)
+    # print("ecdf", ecdf(grid))
+    # plt.plot(grid, ecdf(grid), c='red', marker='^')
+    # plt.plot(grid, grid, 'k--')
+    # plt.show()
+
+    import pylab
     plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
\ No newline at end of file
+    stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=pylab)
+    pylab.show()
\ No newline at end of file

From 5c942d842e8032d1f481d8c1f8219c4edd86326c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c08d.SUNet>
Date: Mon, 4 Dec 2017 15:31:36 -0800
Subject: [PATCH 407/617] changes

---
 selection/adjusted_MLE/tests/compare_risks.py | 38 ++++++++++---------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index fd94dfa82..c9db5faef 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -12,6 +12,7 @@
 from selection.randomized.M_estimator import M_estimator
 from rpy2.robjects.packages import importr
 from rpy2 import robjects
+from scipy.stats import t as tdist
 
 glmnet = importr('glmnet')
 import rpy2.robjects.numpy2ri
@@ -45,10 +46,10 @@ def glmnet_sigma(X, y):
         return np.array([1.])
 
 
-def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=1.):
+def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=1.,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
 
@@ -60,6 +61,9 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
             print("sigma est", sigma_est)
 
+        snr = (beta.T).dot(X.T.dot(X)).dot(beta)
+        print("snr", snr)
+
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
 
         loss = rr.glm.gaussian(X, y)
@@ -89,20 +93,19 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
             break
 
     return np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive),\
-           np.true_divide((approx_MLE-true_target).dot((approx_MLE-true_target)), (true_target).dot(true_target)), \
-           np.true_divide((M_est.target_observed-true_target).dot((M_est.target_observed-true_target)), (true_target).dot(true_target))
+           (approx_MLE-true_target).dot((approx_MLE-true_target)), (M_est.target_observed-true_target).dot((M_est.target_observed-true_target))
 
 
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 100
+    ndraw = 500
     bias = 0.
     risk_selMLE = 0.
     risk_relLASSO = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=500, p=2000, s=20, signal=1.25)
+        approx = test_lasso_approx_var(n=500, p=100, s=5, signal=0.25)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -116,16 +119,15 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
 
-    # if i % 10 == 0:
-    # plt.clf()
-    # ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    # grid = np.linspace(0, 1, 101)
-    # print("ecdf", ecdf(grid))
-    # plt.plot(grid, ecdf(grid), c='red', marker='^')
-    # plt.plot(grid, grid, 'k--')
-    # plt.show()
-
-    import pylab
     plt.clf()
-    stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=pylab)
-    pylab.show()
\ No newline at end of file
+    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    grid = np.linspace(0, 1, 101)
+    print("ecdf", ecdf(grid))
+    plt.plot(grid, ecdf(grid), c='red', marker='^')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
+
+    #import pylab
+    #plt.clf()
+    #stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=pylab)
+    #pylab.show()
\ No newline at end of file

From 9623d0aae8422b2f8be1fe38b5b18a85681441ac Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 5 Dec 2017 11:06:53 -0800
Subject: [PATCH 408/617] print correct bias

---
 selection/adjusted_MLE/tests/test_MLE.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 29f53509c..2d3322943 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -83,12 +83,12 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
         nactive = np.sum(active)
 
         if nactive > 0:
-            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                  M_est.opt_transform,
-                                                  M_est.target_observed,
-                                                  M_est.feasible_point,
-                                                  M_est.target_cov,
-                                                  M_est.randomizer_precision)
+            approx_MLE, var, mle_map, _, _ = solve_UMVU(M_est.target_transform,
+                                                        M_est.opt_transform,
+                                                        M_est.target_observed,
+                                                        M_est.feasible_point,
+                                                        M_est.target_cov,
+                                                        M_est.randomizer_precision)
 
             #print("approx_MLE and sd", approx_MLE, np.sqrt(np.diag(var)))
             print("approx sd", np.sqrt(np.diag(var)))
@@ -189,7 +189,6 @@ def test_bias_lasso(nsim=2000):
                 pivot_obs_info.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias/float(ndraw)) + "\n")
         if i % 10 == 0:
             plt.clf()
             ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))

From ef66b4d36fc254bbafbf2021b15bed1716f587e9 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c202.SUNet>
Date: Tue, 5 Dec 2017 12:31:24 -0800
Subject: [PATCH 409/617] est sigma, lambda from CV.glmnet and option for
 independent estimator

---
 selection/adjusted_MLE/selective_MLE.py       |  2 +-
 selection/adjusted_MLE/tests/compare_risks.py | 34 +++++++++----------
 selection/adjusted_MLE/tests/test_MLE_boot.py |  4 +--
 .../adjusted_MLE/tests/test_boot_selective.py |  2 +-
 4 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 132cc64e2..8a35a1a4a 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -147,7 +147,7 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
 
     implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value), offset_term])
 
-    return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter)
+    return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
 
 
 def solve_barrier_nonneg(conjugate_arg,
diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index c9db5faef..0988f3c32 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -27,21 +27,17 @@ def glmnet_sigma(X, y):
 
                 out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
                 lam_minCV = out$lambda.min
-
-                coef = coef(out, s = "lambda.min")
-                linear.fit = lm(y~ X[, which(coef>0.001)-1])
-                sigma_est = summary(linear.fit)$sigma
-                return(sigma_est)
+                return(lam_minCV)
                 }''')
 
     try:
-        sigma_cv_R = robjects.globalenv['glmnet_cv']
+        lambda_cv_R = robjects.globalenv['glmnet_cv']
         n, p = X.shape
         r_X = robjects.r.matrix(X, nrow=n, ncol=p)
         r_y = robjects.r.matrix(y, nrow=n, ncol=1)
 
-        sigma_est = sigma_cv_R(r_X, r_y)
-        return sigma_est
+        lam_minCV = lambda_cv_R(r_X, r_y)
+        return lam_minCV
     except:
         return np.array([1.])
 
@@ -49,19 +45,20 @@ def glmnet_sigma(X, y):
 def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=1.,
-                                                       random_signs=True, equicorrelated=False)
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
+                                                       random_signs=False, equicorrelated=False)
         n, p = X.shape
 
         if p>n:
-            sigma_est = glmnet_sigma(X, y)[0]
+            sigma_est = np.std(y)/2.
             print("sigma est", sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
             print("sigma est", sigma_est)
 
-        snr = (beta.T).dot(X.T.dot(X)).dot(beta)
+        #sigma_est = 1.
+        snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
         print("snr", snr)
 
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
@@ -99,13 +96,13 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 500
+    ndraw = 100
     bias = 0.
     risk_selMLE = 0.
     risk_relLASSO = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=500, p=100, s=5, signal=0.25)
+        approx = test_lasso_approx_var(n=5000, p=4000, s=20, signal=3.5)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
@@ -125,9 +122,10 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
     print("ecdf", ecdf(grid))
     plt.plot(grid, ecdf(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
     plt.show()
 
-    #import pylab
-    #plt.clf()
-    #stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=pylab)
-    #pylab.show()
\ No newline at end of file
+    # import pylab
+    # plt.clf()
+    # stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=pylab)
+    # pylab.show()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index 8b2b29a4c..9cb6284f2 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -141,7 +141,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     pivot_obs_info = []
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=1000, p=4000, s=20, signal=3.5, B=1200)
+        approx = boot_pivot_approx_var(n=5000, p=4000, s=20, signal=3.5, B=1200)
         if approx is not None:
             pivot_boot = approx[3]
             bias += approx[4]
@@ -159,4 +159,4 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
     plt.plot(grid, grid, 'k--')
     #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n1000_p4000_amp3.5_sigma1.png")
\ No newline at end of file
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n5000_p4000_amp3.5_sigma1.png")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_boot_selective.py b/selection/adjusted_MLE/tests/test_boot_selective.py
index 96f81fce2..eeb3ff0eb 100644
--- a/selection/adjusted_MLE/tests/test_boot_selective.py
+++ b/selection/adjusted_MLE/tests/test_boot_selective.py
@@ -68,7 +68,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     pivot_mle = []
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=1000, p=4000, s=20, signal=3.5, B=2000)
+        approx = boot_pivot_approx_var(n=5000, p=4000, s=20, signal=3.5, B=2000)
         if approx is not None:
             pivot_boot = approx[3]
             mle_boot = approx[4]

From a7c154cc16c6cfeb2d005e8784de2dc797572593 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c202.SUNet>
Date: Tue, 5 Dec 2017 13:26:20 -0800
Subject: [PATCH 410/617] found bug in offset term while setting implied mean

---
 selection/adjusted_MLE/selective_MLE.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 8a35a1a4a..4abbe5b28 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -107,8 +107,8 @@ def solve_UMVU(target_transform,
     #print("check conditional parameters", conditional_natural_parameter-(1.2*target_observed)+2.4, conditional_precision)
 
     M_1_inv = np.linalg.inv(M_1)
-    offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
-    mle_transform = (M_1_inv, -M_1_inv.dot(L), offset_term)
+    mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
+    mle_transform = (M_1_inv, -M_1_inv.dot(L), mle_offset_term)
     var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
                      -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
 
@@ -121,8 +121,6 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
 
         param_lin, param_offset = natparam_transform
         mle_target_lin, mle_soln_lin, mle_offset = mle_transform
-
-
         soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
                                                  conditional_precision,
                                                  feasible_point=feasible_point)
@@ -149,7 +147,6 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
 
     return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
 
-
 def solve_barrier_nonneg(conjugate_arg,
                          precision,
                          feasible_point=None,

From 45b7fc2a3032978a9955c244168c825527ca5a43 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 6 Dec 2017 14:08:20 -0800
Subject: [PATCH 411/617] added risk function

---
 selection/adjusted_MLE/selective_MLE.py       |   7 +-
 selection/adjusted_MLE/tests/compare_risks.py | 138 ++++++++++++++----
 selection/adjusted_MLE/tests/test_MLE_boot.py |  58 ++++++--
 .../adjusted_MLE/tests/test_boot_selective.py |  60 ++++++--
 4 files changed, 205 insertions(+), 58 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 4abbe5b28..d764e743f 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -104,7 +104,6 @@ def solve_UMVU(target_transform,
     conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
 
     conditional_precision = implied_precision[ntarget:,ntarget:]
-    #print("check conditional parameters", conditional_natural_parameter-(1.2*target_observed)+2.4, conditional_precision)
 
     M_1_inv = np.linalg.inv(M_1)
     mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
@@ -121,9 +120,11 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
 
         param_lin, param_offset = natparam_transform
         mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+
         soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
-                                                 conditional_precision,
-                                                 feasible_point=feasible_point)
+                                              conditional_precision,
+                                              feasible_point=feasible_point)
+
         selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
 
         var_target_lin, var_offset = var_transform
diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 0988f3c32..1d10bf772 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -61,7 +61,8 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
         snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
         print("snr", snr)
 
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        lam = glmnet_sigma(X, y)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1./np.sqrt(n)
@@ -79,12 +80,12 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
         nactive = np.sum(active)
 
         if nactive > 0:
-            approx_MLE, var, mle_map, _, _ = solve_UMVU(M_est.target_transform,
-                                                        M_est.opt_transform,
-                                                        M_est.target_observed,
-                                                        M_est.feasible_point,
-                                                        M_est.target_cov,
-                                                        M_est.randomizer_precision)
+            approx_MLE, var, mle_map, _, _, _ = solve_UMVU(M_est.target_transform,
+                                                           M_est.opt_transform,
+                                                           M_est.target_observed,
+                                                           M_est.feasible_point,
+                                                           M_est.target_cov,
+                                                           M_est.randomizer_precision)
 
             print("approx sd", np.sqrt(np.diag(var)))
             break
@@ -92,40 +93,119 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
     return np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive),\
            (approx_MLE-true_target).dot((approx_MLE-true_target)), (M_est.target_observed-true_target).dot((M_est.target_observed-true_target))
 
+def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
+
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.,
+                                                       random_signs=False, equicorrelated=False)
+        n, p = X.shape
+
+        if p>n:
+            sigma_est = np.std(y)/2.
+            print("sigma est", sigma_est)
+        else:
+            ols_fit = sm.OLS(y, X).fit()
+            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+            print("sigma est", sigma_est)
+
+        #sigma_est = 1.
+        snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
+        print("snr", snr)
+
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        #lam = glmnet_sigma(X, y)
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1./np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma = sigma_est)
+
+        M_est.solve_map()
+        active = M_est._overall
+
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        nactive = np.sum(active)
+        print("number of variables selected by LASSO", nactive)
+
+        if nactive > 0:
+            approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
+                                                                       M_est.opt_transform,
+                                                                       M_est.target_observed,
+                                                                       M_est.feasible_point,
+                                                                       M_est.target_cov,
+                                                                       M_est.randomizer_precision)
+
+            mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+            break
+
+    est_Sigma = X[:, active].T.dot(X[:, active])
+    ind_est = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
+    signal_amp = (true_target.T).dot(est_Sigma).dot(true_target)
+    return (approx_MLE - true_target).sum()/float(nactive),\
+           (approx_MLE-true_target).dot(est_Sigma).dot((approx_MLE-true_target))/ signal_amp, \
+           (M_est.target_observed-true_target).dot(est_Sigma).dot((M_est.target_observed-true_target))/ signal_amp,\
+           (ind_est - true_target).dot(est_Sigma).dot((ind_est - true_target))/ signal_amp
+
+
+# if __name__ == "__main__":
+#     import matplotlib.pyplot as plt
+#
+#     ndraw = 100
+#     bias = 0.
+#     risk_selMLE = 0.
+#     risk_relLASSO = 0.
+#     pivot_obs_info = []
+#     for i in range(ndraw):
+#         approx = test_lasso_approx_var(n=5000, p=4000, s=20, signal=3.5)
+#         if approx is not None:
+#             pivot = approx[0]
+#             bias += approx[1]
+#             risk_selMLE += approx[2]
+#             risk_relLASSO += approx[3]
+#             for j in range(pivot.shape[0]):
+#                 pivot_obs_info.append(pivot[j])
+#
+#         sys.stderr.write("iteration completed" + str(i) + "\n")
+#         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
+#         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
+#         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
+#
+#     plt.clf()
+#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+#     grid = np.linspace(0, 1, 101)
+#     print("ecdf", ecdf(grid))
+#     plt.plot(grid, ecdf(grid), c='red', marker='^')
+#     plt.plot(grid, grid, 'k--')
+#     #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
+#     plt.show()
+#
+#     # import pylab
+#     # plt.clf()
+#     # stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=pylab)
+#     # pylab.show()
 
 if __name__ == "__main__":
-    import matplotlib.pyplot as plt
 
     ndraw = 100
     bias = 0.
     risk_selMLE = 0.
     risk_relLASSO = 0.
-    pivot_obs_info = []
+    risk_indest = 0.
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=5000, p=4000, s=20, signal=3.5)
+        approx = risk_selective_mle(n=500, p=100, s=5, signal=4.)
         if approx is not None:
-            pivot = approx[0]
-            bias += approx[1]
-            risk_selMLE += approx[2]
-            risk_relLASSO += approx[3]
-            for j in range(pivot.shape[0]):
-                pivot_obs_info.append(pivot[j])
+            bias += approx[0]
+            risk_selMLE += approx[1]
+            risk_relLASSO += approx[2]
+            risk_indest += approx[3]
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
+        sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
 
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot(grid, grid, 'k--')
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
-    plt.show()
-
-    # import pylab
-    # plt.clf()
-    # stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=pylab)
-    # pylab.show()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index 9cb6284f2..b78a1842a 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -7,11 +7,41 @@
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from statsmodels.distributions.empirical_distribution import ECDF
+from rpy2.robjects.packages import importr
+from rpy2 import robjects
+from scipy.stats import t as tdist
+
+glmnet = importr('glmnet')
+import rpy2.robjects.numpy2ri
+
+rpy2.robjects.numpy2ri.activate()
+
+def glmnet_sigma(X, y):
+    robjects.r('''
+                glmnet_cv = function(X,y){
+                y = as.matrix(y)
+                X = as.matrix(X)
+
+                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
+                lam_minCV = out$lambda.min
+                return(lam_minCV)
+                }''')
+
+    try:
+        lambda_cv_R = robjects.globalenv['glmnet_cv']
+        n, p = X.shape
+        r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+        r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+
+        lam_minCV = lambda_cv_R(r_X, r_y)
+        return lam_minCV
+    except:
+        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
 
 def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=sigma,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
@@ -57,10 +87,14 @@ def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=sigma,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+
+        sigma_est = np.std(y) / np.sqrt(2.)
+        sys.stderr.write("est sigma" + str(sigma_est) + "\n")
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        #lam = glmnet_sigma(X, y)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1./np.sqrt(n)
@@ -69,7 +103,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
         randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma_est)
 
         M_est.solve_map()
         active = M_est._overall
@@ -78,12 +112,12 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
         nactive = np.sum(active)
 
         if nactive > 0:
-            approx_MLE, var, mle_map, _, _ = solve_UMVU(M_est.target_transform,
-                                                  M_est.opt_transform,
-                                                  M_est.target_observed,
-                                                  M_est.feasible_point,
-                                                  M_est.target_cov,
-                                                  M_est.randomizer_precision)
+            approx_MLE, var, mle_map, _, _, _ = solve_UMVU(M_est.target_transform,
+                                                           M_est.opt_transform,
+                                                           M_est.target_observed,
+                                                           M_est.feasible_point,
+                                                           M_est.target_cov,
+                                                           M_est.randomizer_precision)
 
             boot_pivot = np.zeros((B, nactive))
             resid = y - X[:, active].dot(M_est.target_observed)
@@ -141,7 +175,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     pivot_obs_info = []
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=5000, p=4000, s=20, signal=3.5, B=1200)
+        approx = boot_pivot_approx_var(n=2000, p=4000, s=20, signal=3.5, B=1200)
         if approx is not None:
             pivot_boot = approx[3]
             bias += approx[4]
@@ -159,4 +193,4 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
     plt.plot(grid, grid, 'k--')
     #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n5000_p4000_amp3.5_sigma1.png")
\ No newline at end of file
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n2000_p4000_amp3.5_rho_0.2_sigma1.png")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_boot_selective.py b/selection/adjusted_MLE/tests/test_boot_selective.py
index eeb3ff0eb..0659fbc82 100644
--- a/selection/adjusted_MLE/tests/test_boot_selective.py
+++ b/selection/adjusted_MLE/tests/test_boot_selective.py
@@ -9,14 +9,46 @@
 from statsmodels.distributions.empirical_distribution import ECDF
 import selection.constraints.affine as AC
 
+from rpy2.robjects.packages import importr
+from rpy2 import robjects
+from scipy.stats import t as tdist
+
+glmnet = importr('glmnet')
+import rpy2.robjects.numpy2ri
+
+rpy2.robjects.numpy2ri.activate()
+
+def glmnet_sigma(X, y):
+    robjects.r('''
+                glmnet_cv = function(X,y){
+                y = as.matrix(y)
+                X = as.matrix(X)
+
+                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
+                lam_minCV = out$lambda.min
+                return(lam_minCV)
+                }''')
+
+    try:
+        lambda_cv_R = robjects.globalenv['glmnet_cv']
+        n, p = X.shape
+        r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+        r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+
+        lam_minCV = lambda_cv_R(r_X, r_y)
+        return lam_minCV
+    except:
+        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
 
 def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=sigma,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=sigma,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
+        sigma_est = np.std(y) / np.sqrt(2.)
+        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        #lam = glmnet_sigma(X, y)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1./np.sqrt(n)
@@ -25,7 +57,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
         randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma_est)
 
         M_est.solve_map()
         active = M_est._overall
@@ -35,17 +67,17 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
         print("number of variables selected by LASSO", nactive)
 
         if nactive > 0:
-            approx_MLE, var, mle_map, implied_cov, implied_mean = solve_UMVU(M_est.target_transform,
-                                                                             M_est.opt_transform,
-                                                                             M_est.target_observed,
-                                                                             M_est.feasible_point,
-                                                                             M_est.target_cov,
-                                                                             M_est.randomizer_precision)
+            approx_MLE, var, mle_map, implied_cov, implied_mean, _ = solve_UMVU(M_est.target_transform,
+                                                                                M_est.opt_transform,
+                                                                                M_est.target_observed,
+                                                                                M_est.feasible_point,
+                                                                                M_est.target_cov,
+                                                                                M_est.randomizer_precision)
 
             A = np.hstack([np.zeros((nactive, nactive)), -np.identity(nactive)])
             b = np.zeros(nactive)
             con = AC.constraints(A, b, covariance=implied_cov, mean= implied_mean)
-            sample = AC.sample_from_constraints(con, np.ones(2*nactive), ndraw=B, burnin=100)
+            sample = AC.sample_from_constraints(con, np.ones(2*nactive), ndraw=B, burnin=300)
             boot_pivot = np.zeros((B, nactive))
             boot_mle_vec = np.zeros((B, nactive))
             for b in range(B):
@@ -62,13 +94,13 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
 
-    ndraw = 100
+    ndraw = 50
     bias = 0.
     pivot_obs_info = []
     pivot_mle = []
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=5000, p=4000, s=20, signal=3.5, B=2000)
+        approx = boot_pivot_approx_var(n=2000, p=4000, s=20, signal=3.5, B=2000)
         if approx is not None:
             pivot_boot = approx[3]
             mle_boot = approx[4]
@@ -89,5 +121,5 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
     plt.plot(grid, ecdf_mle(grid), c='red', marker='^')
     plt.plot(grid, grid, 'k--')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/selective_Boot_pivot_n1000_p4000_amp3.5_sigma1.png")
\ No newline at end of file
+    #plt.show()
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/selective_Boot_pivot_n2000_p4000_amp3.5_rho_0.2_sigma1.png")
\ No newline at end of file

From ccfb4fb1ebe17f48024adc17df2aa6eeed8a6430 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 6 Dec 2017 14:49:58 -0800
Subject: [PATCH 412/617] compute rel risks

---
 selection/adjusted_MLE/tests/compare_risks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 1d10bf772..85e54e9e7 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -96,8 +96,8 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
 def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.,
-                                                       random_signs=False, equicorrelated=False)
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.7, signal=signal, sigma=1.,
+                                                       random_signs=True, equicorrelated=False)
         n, p = X.shape
 
         if p>n:

From 639c90c5b911e4335008ef3336cec2393c3c85cf Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 6 Dec 2017 17:21:27 -0800
Subject: [PATCH 413/617] changed scale of glmnet

---
 selection/adjusted_MLE/tests/compare_risks.py | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 85e54e9e7..eb434bab4 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -26,8 +26,8 @@ def glmnet_sigma(X, y):
                 X = as.matrix(X)
 
                 out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                lam_minCV = out$lambda.min
-                return(lam_minCV)
+                lam_1se = out$lambda.1se
+                return(lam_1se)
                 }''')
 
     try:
@@ -36,10 +36,10 @@ def glmnet_sigma(X, y):
         r_X = robjects.r.matrix(X, nrow=n, ncol=p)
         r_y = robjects.r.matrix(y, nrow=n, ncol=1)
 
-        lam_minCV = lambda_cv_R(r_X, r_y)
-        return lam_minCV
+        lam_1se = lambda_cv_R(r_X, r_y)
+        return lam_1se*n
     except:
-        return np.array([1.])
+        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
 
 
 def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
@@ -96,7 +96,7 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
 def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.7, signal=signal, sigma=1.,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
 
@@ -112,8 +112,8 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
         snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
         print("snr", snr)
 
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        #lam = glmnet_sigma(X, y)
+        #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        lam = glmnet_sigma(X, y)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1./np.sqrt(n)
@@ -145,10 +145,12 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
     est_Sigma = X[:, active].T.dot(X[:, active])
     ind_est = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
     signal_amp = (true_target.T).dot(est_Sigma).dot(true_target)
-    return (approx_MLE - true_target).sum()/float(nactive),\
-           (approx_MLE-true_target).dot(est_Sigma).dot((approx_MLE-true_target))/ signal_amp, \
-           (M_est.target_observed-true_target).dot(est_Sigma).dot((M_est.target_observed-true_target))/ signal_amp,\
-           (ind_est - true_target).dot(est_Sigma).dot((ind_est - true_target))/ signal_amp
+    target_par = beta[active]
+
+    return (approx_MLE - target_par).sum()/float(nactive),\
+           (approx_MLE-target_par).dot(est_Sigma).dot((approx_MLE-target_par))/ signal_amp, \
+           (M_est.target_observed-target_par).dot(est_Sigma).dot((M_est.target_observed-target_par))/ signal_amp,\
+           (ind_est - target_par).dot(est_Sigma).dot((ind_est - target_par))/ signal_amp
 
 
 # if __name__ == "__main__":
@@ -196,7 +198,7 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
     risk_relLASSO = 0.
     risk_indest = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle(n=500, p=100, s=5, signal=4.)
+        approx = risk_selective_mle(n=500, p=1000, s=5, signal=5.)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From df1b830c19abd2229fdddc35bd3416c1ebeafb9e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 6 Dec 2017 17:31:43 -0800
Subject: [PATCH 414/617] cleaned risk

---
 selection/adjusted_MLE/tests/compare_risks.py | 88 -------------------
 1 file changed, 88 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index eb434bab4..bf84933e6 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -42,57 +42,6 @@ def glmnet_sigma(X, y):
         return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
 
 
-def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
-
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
-                                                       random_signs=False, equicorrelated=False)
-        n, p = X.shape
-
-        if p>n:
-            sigma_est = np.std(y)/2.
-            print("sigma est", sigma_est)
-        else:
-            ols_fit = sm.OLS(y, X).fit()
-            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-            print("sigma est", sigma_est)
-
-        #sigma_est = 1.
-        snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
-        print("snr", snr)
-
-        #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        lam = glmnet_sigma(X, y)
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma = sigma_est)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-        nactive = np.sum(active)
-
-        if nactive > 0:
-            approx_MLE, var, mle_map, _, _, _ = solve_UMVU(M_est.target_transform,
-                                                           M_est.opt_transform,
-                                                           M_est.target_observed,
-                                                           M_est.feasible_point,
-                                                           M_est.target_cov,
-                                                           M_est.randomizer_precision)
-
-            print("approx sd", np.sqrt(np.diag(var)))
-            break
-
-    return np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive),\
-           (approx_MLE-true_target).dot((approx_MLE-true_target)), (M_est.target_observed-true_target).dot((M_est.target_observed-true_target))
-
 def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
 
     while True:
@@ -153,43 +102,6 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
            (ind_est - target_par).dot(est_Sigma).dot((ind_est - target_par))/ signal_amp
 
 
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 100
-#     bias = 0.
-#     risk_selMLE = 0.
-#     risk_relLASSO = 0.
-#     pivot_obs_info = []
-#     for i in range(ndraw):
-#         approx = test_lasso_approx_var(n=5000, p=4000, s=20, signal=3.5)
-#         if approx is not None:
-#             pivot = approx[0]
-#             bias += approx[1]
-#             risk_selMLE += approx[2]
-#             risk_relLASSO += approx[3]
-#             for j in range(pivot.shape[0]):
-#                 pivot_obs_info.append(pivot[j])
-#
-#         sys.stderr.write("iteration completed" + str(i) + "\n")
-#         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
-#         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
-#         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
-#
-#     plt.clf()
-#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-#     grid = np.linspace(0, 1, 101)
-#     print("ecdf", ecdf(grid))
-#     plt.plot(grid, ecdf(grid), c='red', marker='^')
-#     plt.plot(grid, grid, 'k--')
-#     #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
-#     plt.show()
-#
-#     # import pylab
-#     # plt.clf()
-#     # stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=pylab)
-#     # pylab.show()
-
 if __name__ == "__main__":
 
     ndraw = 100

From 3b57a0c3b75e34c511a437fa9f4eeeb0e94fd234 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 6 Dec 2017 20:28:50 -0800
Subject: [PATCH 415/617] added comparison of estimators appended with zeros

---
 selection/adjusted_MLE/selective_MLE.py       |   1 +
 selection/adjusted_MLE/tests/compare_risks.py | 102 +++++++++++++++---
 2 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index d764e743f..cc8215b49 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -27,6 +27,7 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
         X, _ = self.loss.data
         n, p = X.shape
         self.p = p
+
         self.randomizer_precision = (1. / self.randomization_scale) * np.identity(p)
 
         score_cov = np.zeros((p, p))
diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index bf84933e6..897f8a968 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -1,19 +1,14 @@
 from __future__ import print_function
 import numpy as np, sys
-import scipy.stats as stats
 
 import regreg.api as rr
 from selection.tests.instance import gaussian_instance
-from scipy.stats import norm as ndist
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from statsmodels.distributions.empirical_distribution import ECDF
-import statsmodels.api as sm
 from selection.randomized.M_estimator import M_estimator
+import statsmodels.api as sm
 from rpy2.robjects.packages import importr
 from rpy2 import robjects
-from scipy.stats import t as tdist
-
 glmnet = importr('glmnet')
 import rpy2.robjects.numpy2ri
 
@@ -78,7 +73,7 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
 
         true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
         nactive = np.sum(active)
-        print("number of variables selected by LASSO", nactive)
+        print("number of variables selected by randomized LASSO", nactive)
 
         if nactive > 0:
             approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
@@ -93,14 +88,91 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
 
     est_Sigma = X[:, active].T.dot(X[:, active])
     ind_est = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
-    signal_amp = (true_target.T).dot(est_Sigma).dot(true_target)
     target_par = beta[active]
+    signal_amp = (target_par.T).dot(est_Sigma).dot(target_par)
+    Lasso_est = M_est.observed_opt_state[:nactive]
 
     return (approx_MLE - target_par).sum()/float(nactive),\
-           (approx_MLE-target_par).dot(est_Sigma).dot((approx_MLE-target_par))/ signal_amp, \
-           (M_est.target_observed-target_par).dot(est_Sigma).dot((M_est.target_observed-target_par))/ signal_amp,\
-           (ind_est - target_par).dot(est_Sigma).dot((ind_est - target_par))/ signal_amp
+           ((approx_MLE-target_par).T).dot(est_Sigma).dot((approx_MLE-target_par))/ signal_amp, \
+           ((M_est.target_observed-target_par).T).dot(est_Sigma).dot((M_est.target_observed-target_par))/ signal_amp,\
+           ((ind_est - target_par).T).dot(est_Sigma).dot((ind_est - target_par))/ signal_amp,\
+           ((Lasso_est - target_par).T).dot(est_Sigma).dot((Lasso_est - target_par)) / signal_amp
+
+
+def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
+
+    while True:
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
+                                                       random_signs=True, equicorrelated=False)
+        n, p = X.shape
+
+        if p>n:
+            sigma_est = np.std(y)/2.
+            print("sigma est", sigma_est)
+        else:
+            ols_fit = sm.OLS(y, X).fit()
+            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+            print("sigma est", sigma_est)
+
+        #sigma_est = 1.
+        snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
+        print("snr", snr)
+
+        #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        lam = glmnet_sigma(X, y)
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1./np.sqrt(n)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma = sigma_est)
+
+        M_est.solve_map()
+        active = M_est._overall
+
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        nactive = np.sum(active)
+        print("number of variables selected by randomized LASSO", nactive)
+
+        if nactive > 0:
+            approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
+                                                                       M_est.opt_transform,
+                                                                       M_est.target_observed,
+                                                                       M_est.feasible_point,
+                                                                       M_est.target_cov,
+                                                                       M_est.randomizer_precision)
+
+            mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+            break
+
+    est_Sigma = X.T.dot(X)
+    ind_est = np.zeros(p)
+    ind_est[active] = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
+    target_par = beta
+    signal_amp = (target_par.T).dot(est_Sigma).dot(target_par)
+
+    Lasso_est = np.zeros(p)
+    Lasso_est[active] = M_est.observed_opt_state[:nactive]
+    selective_MLE = np.zeros(p)
+    selective_MLE[active] = approx_MLE
+
+    relaxed_Lasso = np.zeros(p)
+    relaxed_Lasso[active] = M_est.target_observed
+
+    M_est_nonrand = M_estimator(loss, epsilon, penalty, randomization.isotropic_gaussian((p,), scale=0.005))
+    M_est_nonrand.solve()
+    Lasso_nonrand = np.zeros(p)
+    Lasso_nonrand[M_est_nonrand._overall] = M_est_nonrand.observed_internal_state[M_est_nonrand._overall.sum()]
 
+    return (selective_MLE - target_par).sum()/float(nactive),\
+           ((selective_MLE-target_par).T).dot(est_Sigma).dot((selective_MLE-target_par))/ signal_amp, \
+           ((relaxed_Lasso-target_par).T).dot(est_Sigma).dot((relaxed_Lasso-target_par))/ signal_amp,\
+           ((ind_est - target_par).T).dot(est_Sigma).dot((ind_est - target_par))/ signal_amp,\
+           ((Lasso_est - target_par).T).dot(est_Sigma).dot((Lasso_est - target_par)) / signal_amp,\
+           ((Lasso_nonrand - target_par).T).dot(est_Sigma).dot((Lasso_nonrand - target_par)) / signal_amp
 
 if __name__ == "__main__":
 
@@ -109,17 +181,23 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
     risk_selMLE = 0.
     risk_relLASSO = 0.
     risk_indest = 0.
+    risk_LASSO = 0.
+    risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle(n=500, p=1000, s=5, signal=5.)
+        approx = risk_selective_mle_full(n=500, p=100, s=5, signal=3.5)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
             risk_relLASSO += approx[2]
             risk_indest += approx[3]
+            risk_LASSO += approx[4]
+            risk_LASSO_nonrand += approx[5]
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
+        sys.stderr.write("overall_relLASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
+        sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
 

From c69f5b1375715c2712c170137c403a29ea4fd017 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Wed, 6 Dec 2017 21:26:04 -0800
Subject: [PATCH 416/617] commit additions of nonrand LASSO

---
 selection/adjusted_MLE/tests/compare_risks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 897f8a968..562bddc25 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -108,13 +108,13 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
 
         if p>n:
             sigma_est = np.std(y)/2.
+            #sigma_est = 1.
             print("sigma est", sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
             print("sigma est", sigma_est)
 
-        #sigma_est = 1.
         snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
         print("snr", snr)
 
@@ -184,7 +184,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
     risk_LASSO = 0.
     risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=100, s=5, signal=3.5)
+        approx = risk_selective_mle_full(n=500, p=5000, s=5, signal=3.5)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -198,6 +198,6 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
-        sys.stderr.write("overall_relLASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
 

From a3803e1b2c5daaeb1f11a881d108e024083e514d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Thu, 7 Dec 2017 17:18:58 -0800
Subject: [PATCH 417/617] added risk comparisons

---
 selection/adjusted_MLE/tests/compare_risks.py | 54 ++++++++++++-------
 selection/adjusted_MLE/tests/relaxed_lasso.py |  4 ++
 2 files changed, 38 insertions(+), 20 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/relaxed_lasso.py

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 562bddc25..1f571c797 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -36,8 +36,11 @@ def glmnet_sigma(X, y):
     except:
         return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
 
+def relative_risk(est, truth, Sigma):
 
-def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
+    return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
+
+def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=np.sqrt(0.1)):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
@@ -89,17 +92,20 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
     est_Sigma = X[:, active].T.dot(X[:, active])
     ind_est = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
     target_par = beta[active]
-    signal_amp = (target_par.T).dot(est_Sigma).dot(target_par)
     Lasso_est = M_est.observed_opt_state[:nactive]
 
-    return (approx_MLE - target_par).sum()/float(nactive),\
-           ((approx_MLE-target_par).T).dot(est_Sigma).dot((approx_MLE-target_par))/ signal_amp, \
-           ((M_est.target_observed-target_par).T).dot(est_Sigma).dot((M_est.target_observed-target_par))/ signal_amp,\
-           ((ind_est - target_par).T).dot(est_Sigma).dot((ind_est - target_par))/ signal_amp,\
-           ((Lasso_est - target_par).T).dot(est_Sigma).dot((Lasso_est - target_par)) / signal_amp
+    return (approx_MLE - target_par).sum()/float(nactive), \
+           relative_risk(approx_MLE, target_par, est_Sigma),\
+           relative_risk(M_est.target_observed, target_par, est_Sigma),\
+           relative_risk(ind_est, target_par, est_Sigma),\
+           relative_risk(Lasso_est, target_par, est_Sigma)
 
+def AR1(rho, p):
+    idx = np.arange(p)
+    cov = rho ** np.abs(np.subtract.outer(idx, idx))
+    return cov, np.linalg.cholesky(cov)
 
-def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
+def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=np.sqrt(0.1)):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
@@ -148,11 +154,11 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
             mle_target_lin, mle_soln_lin, mle_offset = mle_transform
             break
 
-    est_Sigma = X.T.dot(X)
+    #est_Sigma = X.T.dot(X)
+    Sigma, _ = AR1(rho=0.35, p=p)
     ind_est = np.zeros(p)
     ind_est[active] = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
     target_par = beta
-    signal_amp = (target_par.T).dot(est_Sigma).dot(target_par)
 
     Lasso_est = np.zeros(p)
     Lasso_est[active] = M_est.observed_opt_state[:nactive]
@@ -164,15 +170,20 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
 
     M_est_nonrand = M_estimator(loss, epsilon, penalty, randomization.isotropic_gaussian((p,), scale=0.005))
     M_est_nonrand.solve()
+    rel_Lasso_nonrand = np.zeros(p)
+    rel_Lasso_nonrand[M_est_nonrand._overall] = M_est_nonrand.observed_internal_state[M_est_nonrand._overall.sum()]
     Lasso_nonrand = np.zeros(p)
-    Lasso_nonrand[M_est_nonrand._overall] = M_est_nonrand.observed_internal_state[M_est_nonrand._overall.sum()]
+    Lasso_nonrand[M_est_nonrand._overall] = M_est_nonrand.observed_opt_state[:M_est_nonrand._overall.sum()]
+
+    print("number of variables selected by non-randomized LASSO", M_est_nonrand._overall.sum())
 
-    return (selective_MLE - target_par).sum()/float(nactive),\
-           ((selective_MLE-target_par).T).dot(est_Sigma).dot((selective_MLE-target_par))/ signal_amp, \
-           ((relaxed_Lasso-target_par).T).dot(est_Sigma).dot((relaxed_Lasso-target_par))/ signal_amp,\
-           ((ind_est - target_par).T).dot(est_Sigma).dot((ind_est - target_par))/ signal_amp,\
-           ((Lasso_est - target_par).T).dot(est_Sigma).dot((Lasso_est - target_par)) / signal_amp,\
-           ((Lasso_nonrand - target_par).T).dot(est_Sigma).dot((Lasso_nonrand - target_par)) / signal_amp
+    return (selective_MLE - target_par).sum()/float(nactive), \
+           relative_risk(selective_MLE, target_par, Sigma), \
+           relative_risk(relaxed_Lasso, target_par, Sigma), \
+           relative_risk(ind_est, target_par, Sigma), \
+           relative_risk(Lasso_est, target_par, Sigma), \
+           relative_risk(rel_Lasso_nonrand, target_par, Sigma),\
+           relative_risk(Lasso_nonrand, target_par, Sigma)
 
 if __name__ == "__main__":
 
@@ -182,16 +193,18 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
     risk_relLASSO = 0.
     risk_indest = 0.
     risk_LASSO = 0.
+    risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=5000, s=5, signal=3.5)
+        approx = risk_selective_mle_full(n=300, p=1000, s=5, signal=3.)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
             risk_relLASSO += approx[2]
             risk_indest += approx[3]
             risk_LASSO += approx[4]
-            risk_LASSO_nonrand += approx[5]
+            risk_relLASSO_nonrand += approx[5]
+            risk_LASSO_nonrand += approx[6]
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
@@ -199,5 +212,6 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
 
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
new file mode 100644
index 000000000..1b978af81
--- /dev/null
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -0,0 +1,4 @@
+from rpy2.robjects.packages import importr
+from rpy2 import robjects
+
+import rpy2.robjects.numpy2ri

From 377248ae452cf2cbb3dc499968cc22f86966b6bb Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN51slri.SUNet>
Date: Thu, 7 Dec 2017 22:53:05 -0800
Subject: [PATCH 418/617] added coverage

---
 selection/adjusted_MLE/tests/compare_risks.py | 10 ++--
 selection/adjusted_MLE/tests/test_MLE.py      | 60 ++++++++++++-------
 selection/adjusted_MLE/tests/test_MLE_boot.py | 54 +++++++++++------
 3 files changed, 77 insertions(+), 47 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 1f571c797..5b8b7a7fb 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -40,6 +40,11 @@ def relative_risk(est, truth, Sigma):
 
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
+def AR1(rho, p):
+    idx = np.arange(p)
+    cov = rho ** np.abs(np.subtract.outer(idx, idx))
+    return cov, np.linalg.cholesky(cov)
+
 def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=np.sqrt(0.1)):
 
     while True:
@@ -100,11 +105,6 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
            relative_risk(ind_est, target_par, est_Sigma),\
            relative_risk(Lasso_est, target_par, est_Sigma)
 
-def AR1(rho, p):
-    idx = np.arange(p)
-    cov = rho ** np.abs(np.subtract.outer(idx, idx))
-    return cov, np.linalg.cholesky(cov)
-
 def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=np.sqrt(0.1)):
 
     while True:
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index ed6552155..16d16f2b3 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -61,7 +61,7 @@ def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., random
 def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=1.):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
@@ -80,20 +80,26 @@ def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomizatio
 
         true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
         nactive = np.sum(active)
+        coverage = np.zeros(nactive)
 
         if nactive > 0:
-            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                  M_est.opt_transform,
-                                                  M_est.target_observed,
-                                                  M_est.feasible_point,
-                                                  M_est.target_cov,
-                                                  M_est.randomizer_precision)
+            approx_MLE, var, mle_map, _, _, _ = solve_UMVU(M_est.target_transform,
+                                                           M_est.opt_transform,
+                                                           M_est.target_observed,
+                                                           M_est.feasible_point,
+                                                           M_est.target_cov,
+                                                           M_est.randomizer_precision)
 
-            #print("approx_MLE and sd", approx_MLE, np.sqrt(np.diag(var)))
             print("approx sd", np.sqrt(np.diag(var)))
+            approx_sd = np.sqrt(np.diag(var))
+            print("approx sd", approx_sd)
+            for j in range(nactive):
+                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and true_target[j]<= (approx_MLE[j] + (1.65 * approx_sd[j])):
+                    coverage[j] += 1
             break
 
-    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum()/float(nactive)
+    return np.true_divide((approx_MLE - true_target),approx_sd), (approx_MLE - true_target).sum()/float(nactive), \
+           coverage.sum()/float(nactive)
 
 def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomization_scale=1., sigma = 1.):
 
@@ -125,6 +131,7 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
 
         nactive = np.sum(active)
         print('nactive', nactive)
+        coverage = np.zeros(nactive)
         if nactive >0:
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
             print("true_target", true_target)
@@ -135,11 +142,15 @@ def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomizatio
                                                   M_est.target_cov,
                                                   M_est.randomizer_precision)
 
-            print("approx sd", np.sqrt(np.diag(var)))
+            approx_sd = np.sqrt(np.diag(var))
+            print("approx sd", approx_sd)
+            for j in range(nactive):
+                if (approx_MLE[j]-(1.65*approx_sd[j]))<= true_target[j] and (approx_MLE[j] + (1.65*approx_sd[j])) >= true_target[j]:
+                    coverage[j] += 1
             break
 
-    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
-
+    return np.true_divide((approx_MLE - true_target),approx_sd), (approx_MLE - true_target).sum()/float(nactive), \
+           coverage.sum()/float(nactive)
 
 def test_bias_lasso(nsim=2000):
     bias = 0
@@ -179,26 +190,29 @@ def test_bias_lasso(nsim=2000):
     ndraw = 500
     bias = 0.
     pivot_obs_info= []
+    coverage = 0.
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=5000, p=4000, s=20, signal=3.5)
+        approx = test_lasso_approx_var(n=500, p=100, s=5, signal=3.5)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]
-            for j in range(pivot.shape[0]):
-                pivot_obs_info.append(pivot[j])
+            coverage += approx[2]
+            #for j in range(pivot.shape[0]):
+            #    pivot_obs_info.append(pivot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
+        sys.stderr.write("coverage so far" + str(coverage / float(i + 1)) + "\n")
 
     #if i % 10 == 0:
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot(grid, grid, 'k--')
-    #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
+    # plt.clf()
+    # ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    # grid = np.linspace(0, 1, 101)
+    # print("ecdf", ecdf(grid))
+    # plt.plot(grid, ecdf(grid), c='red', marker='^')
+    # plt.plot(grid, grid, 'k--')
+    # #plt.show()
+    # plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
 
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index b78a1842a..5c092e95b 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -10,6 +10,7 @@
 from rpy2.robjects.packages import importr
 from rpy2 import robjects
 from scipy.stats import t as tdist
+import statsmodels.api as sm
 
 glmnet = importr('glmnet')
 import rpy2.robjects.numpy2ri
@@ -23,8 +24,8 @@ def glmnet_sigma(X, y):
                 X = as.matrix(X)
 
                 out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                lam_minCV = out$lambda.min
-                return(lam_minCV)
+                lam_1se = out$lambda.1se
+                return(lam_1se)
                 }''')
 
     try:
@@ -33,8 +34,8 @@ def glmnet_sigma(X, y):
         r_X = robjects.r.matrix(X, nrow=n, ncol=p)
         r_y = robjects.r.matrix(y, nrow=n, ncol=1)
 
-        lam_minCV = lambda_cv_R(r_X, r_y)
-        return lam_minCV
+        lam_1se = lambda_cv_R(r_X, r_y)
+        return lam_1se*n
     except:
         return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
 
@@ -91,10 +92,16 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
 
-        sigma_est = np.std(y) / np.sqrt(2.)
-        sys.stderr.write("est sigma" + str(sigma_est) + "\n")
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        #lam = glmnet_sigma(X, y)
+        if p>n:
+            sigma_est = np.std(y)/2.
+            print("sigma est", sigma_est)
+        else:
+            ols_fit = sm.OLS(y, X).fit()
+            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+            print("sigma est", sigma_est)
+
+        #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        lam = glmnet_sigma(X, y)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1./np.sqrt(n)
@@ -110,6 +117,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 
         true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
         nactive = np.sum(active)
+        coverage = np.zeros(nactive)
 
         if nactive > 0:
             approx_MLE, var, mle_map, _, _, _ = solve_UMVU(M_est.target_transform,
@@ -129,10 +137,15 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
                 boot_pivot[b, :] = np.true_divide(boot_mle[0]- approx_MLE, np.sqrt(np.diag(boot_mle[1])))
                 #sys.stderr.write("bootstrap sample" + str(b) + "\n")
 
+            boot_std = boot_pivot.std(0)
+            for j in range(nactive):
+                if (approx_MLE[j] - (1.65 * boot_std[j])) <= true_target[j] and true_target[j] <= (approx_MLE[j] + (1.65 * boot_std[j])):
+                    coverage[j] += 1
             break
 
     return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0), \
-           np.true_divide(approx_MLE - true_target, boot_pivot.std(0)), (approx_MLE - true_target).sum() / float(nactive)
+           np.true_divide(approx_MLE - true_target, boot_pivot.std(0)), (approx_MLE - true_target).sum() / float(nactive),\
+           coverage.sum() / float(nactive)
 
 # if __name__ == "__main__":
 #     import matplotlib.pyplot as plt
@@ -173,24 +186,27 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     ndraw = 100
     bias = 0.
     pivot_obs_info = []
+    coverage = 0.
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=2000, p=4000, s=20, signal=3.5, B=1200)
+        approx = boot_pivot_approx_var(n=500, p=100, s=5, signal=3., B=1200)
         if approx is not None:
             pivot_boot = approx[3]
             bias += approx[4]
+            coverage += approx[5]
 
             for j in range(pivot_boot.shape[0]):
                 pivot_obs_info.append(pivot_boot[j])
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
-
-    plt.clf()
-    ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf_boot(grid))
-    plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
-    plt.plot(grid, grid, 'k--')
-    #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n2000_p4000_amp3.5_rho_0.2_sigma1.png")
\ No newline at end of file
+        sys.stderr.write("overall coverage" + str(coverage / float(i + 1)) + "\n")
+
+    # plt.clf()
+    # ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
+    # grid = np.linspace(0, 1, 101)
+    # print("ecdf", ecdf_boot(grid))
+    # plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
+    # plt.plot(grid, grid, 'k--')
+    # #plt.show()
+    # plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n2000_p4000_amp3.5_rho_0.2_sigma1.png")
\ No newline at end of file

From 7ccf87b2f8bc1d25a4f8468962f15915ee5b6a9f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c177.SUNet>
Date: Fri, 8 Dec 2017 11:48:47 -0800
Subject: [PATCH 419/617] commit all changes

---
 selection/adjusted_MLE/tests/test_MLE_boot.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index 5c092e95b..b8582c70f 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -39,7 +39,7 @@ def glmnet_sigma(X, y):
     except:
         return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
 
-def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
+def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=0.7, sigma= 1.):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=sigma,
@@ -85,10 +85,10 @@ def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 
             break
 
-def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
+def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=0.7, sigma= 1.):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=sigma,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=sigma,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
 
@@ -117,6 +117,8 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 
         true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
         nactive = np.sum(active)
+        print("number of variables selected by randomized LASSO", nactive)
+
         coverage = np.zeros(nactive)
 
         if nactive > 0:
@@ -189,7 +191,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     coverage = 0.
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=500, p=100, s=5, signal=3., B=1200)
+        approx = boot_pivot_approx_var(n=10000, p=2000, s=20, signal=5., B=1200)
         if approx is not None:
             pivot_boot = approx[3]
             bias += approx[4]

From d6dd7cb4b2bd8361bf3576908edac1f512742a74 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <snigdhapanigrahi@DN0a22c177.SUNet>
Date: Fri, 8 Dec 2017 12:41:58 -0800
Subject: [PATCH 420/617] commit changes

---
 selection/adjusted_MLE/tests/compare_risks.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 5b8b7a7fb..caab253a9 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -139,7 +139,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
         M_est.solve_map()
         active = M_est._overall
 
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
+        #true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
         nactive = np.sum(active)
         print("number of variables selected by randomized LASSO", nactive)
 
@@ -154,7 +154,6 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
             mle_target_lin, mle_soln_lin, mle_offset = mle_transform
             break
 
-    #est_Sigma = X.T.dot(X)
     Sigma, _ = AR1(rho=0.35, p=p)
     ind_est = np.zeros(p)
     ind_est[active] = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset

From 399d4bd87e46741cd1fbcd89349b4b089fea8cc6 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 9 Dec 2017 04:11:56 -0800
Subject: [PATCH 421/617] commit change

---
 selection/adjusted_MLE/tests/compare_risks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index caab253a9..737f85c55 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -105,7 +105,7 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
            relative_risk(ind_est, target_par, est_Sigma),\
            relative_risk(Lasso_est, target_par, est_Sigma)
 
-def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=np.sqrt(0.1)):
+def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
@@ -128,7 +128,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
         lam = glmnet_sigma(X, y)
 
         loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
+        epsilon = 1. /np.sqrt(n)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
@@ -195,7 +195,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
     risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=300, p=1000, s=5, signal=3.)
+        approx = risk_selective_mle_full(n=200, p=1000, s=10, signal=3.)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 475269c994e18f855257a64c4e34e7d4386d9a01 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 9 Dec 2017 18:20:15 -0800
Subject: [PATCH 422/617] called sim.xy

---
 selection/adjusted_MLE/tests/compare_risks.py |  6 +++---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 11 +++++++++++
 selection/adjusted_MLE/tests/test_MLE_boot.py |  2 +-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index caab253a9..737f85c55 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -105,7 +105,7 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
            relative_risk(ind_est, target_par, est_Sigma),\
            relative_risk(Lasso_est, target_par, est_Sigma)
 
-def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=np.sqrt(0.1)):
+def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
@@ -128,7 +128,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
         lam = glmnet_sigma(X, y)
 
         loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
+        epsilon = 1. /np.sqrt(n)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
@@ -195,7 +195,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
     risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=300, p=1000, s=5, signal=3.)
+        approx = risk_selective_mle_full(n=200, p=1000, s=10, signal=3.)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 1b978af81..c8c376a0d 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -2,3 +2,14 @@
 from rpy2 import robjects
 
 import rpy2.robjects.numpy2ri
+rpy2.robjects.numpy2ri.activate()
+
+def sim_xy(n, p, nval, rho=0, s=5):
+    robjects.r('''
+    source('~/best-subset/bestsubset/R/sim.R')
+    ''')
+
+    r_simulate = robjects.globalenv['sim.xy']
+    print(r_simulate(n, p, nval, rho=rho, s=s))
+
+sim_xy(n=50, p=10, nval=50)
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index b8582c70f..d1c3a75e6 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -191,7 +191,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     coverage = 0.
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=10000, p=2000, s=20, signal=5., B=1200)
+        approx = boot_pivot_approx_var(n=4000, p=2000, s=20, signal=5., B=1200)
         if approx is not None:
             pivot_boot = approx[3]
             bias += approx[4]

From 3165c901709ca289c9c01aac06a4bb7bd37f59fb Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 9 Dec 2017 19:18:44 -0800
Subject: [PATCH 423/617] getting coefs for all possible combinations of lambda
 and gamma

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 26 ++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index c8c376a0d..023c4c6ac 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -4,6 +4,8 @@
 import rpy2.robjects.numpy2ri
 rpy2.robjects.numpy2ri.activate()
 
+import numpy as np
+
 def sim_xy(n, p, nval, rho=0, s=5):
     robjects.r('''
     source('~/best-subset/bestsubset/R/sim.R')
@@ -12,4 +14,26 @@ def sim_xy(n, p, nval, rho=0, s=5):
     r_simulate = robjects.globalenv['sim.xy']
     print(r_simulate(n, p, nval, rho=rho, s=s))
 
-sim_xy(n=50, p=10, nval=50)
\ No newline at end of file
+#sim_xy(n=50, p=10, nval=50)
+
+def tuned_lasso(X, Y):
+    robjects.r('''
+        source('~/best-subset/bestsubset/R/lasso.R')
+        tuned_lasso_estimator = function(X,Y){
+        Y = as.matrix(Y)
+        X = as.matrix(X)
+        rel.lasso = lasso(X,Y,intercept=FALSE, nrelax=5, nlam=50)
+        beta.hat = as.matrix(coef(rel.lasso))
+        return(beta.hat)
+        }''')
+
+    r_lasso = robjects.globalenv['tuned_lasso_estimator']
+
+    n, p = X.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(Y, nrow=n, ncol=1)
+
+    estimator = r_lasso(r_X, r_y)
+    return (estimator)
+
+print(tuned_lasso(np.random.standard_normal((50,10)), np.random.standard_normal(50)))
\ No newline at end of file

From decb1e242caa751c47a1cdee7686d11b54228e60 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 9 Dec 2017 20:21:29 -0800
Subject: [PATCH 424/617] tuned version of relaxed lasso

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 22 ++++++++++++++-----
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 023c4c6ac..20d3c607e 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -16,24 +16,34 @@ def sim_xy(n, p, nval, rho=0, s=5):
 
 #sim_xy(n=50, p=10, nval=50)
 
-def tuned_lasso(X, Y):
+def tuned_lasso(X, Y, X_val,Y_val):
     robjects.r('''
         source('~/best-subset/bestsubset/R/lasso.R')
-        tuned_lasso_estimator = function(X,Y){
+        tuned_lasso_estimator = function(X,Y,X.val,Y.val){
         Y = as.matrix(Y)
         X = as.matrix(X)
-        rel.lasso = lasso(X,Y,intercept=FALSE, nrelax=5, nlam=50)
+        Y.val = as.vector(Y.val)
+        X.val = as.matrix(X.val)
+
+        rel.lasso = lasso(X,Y,intercept=TRUE, nrelax=5, nlam=50)
         beta.hat = as.matrix(coef(rel.lasso))
-        return(beta.hat)
+
+        muhat.val = as.matrix(predict(rel.lasso, X.val))
+        err.val = colMeans((muhat.val - Y.val)^2)
+        return(beta.hat[,which.min(err.val)])
         }''')
 
     r_lasso = robjects.globalenv['tuned_lasso_estimator']
 
     n, p = X.shape
+    nval, _ = X_val.shape
     r_X = robjects.r.matrix(X, nrow=n, ncol=p)
     r_y = robjects.r.matrix(Y, nrow=n, ncol=1)
 
-    estimator = r_lasso(r_X, r_y)
+    r_X_val = robjects.r.matrix(X_val, nrow=nval, ncol=p)
+    r_y_val = robjects.r.matrix(Y_val, nrow=nval, ncol=1)
+    estimator = r_lasso(r_X, r_y, r_X_val, r_y_val)
     return (estimator)
 
-print(tuned_lasso(np.random.standard_normal((50,10)), np.random.standard_normal(50)))
\ No newline at end of file
+print(tuned_lasso(np.random.standard_normal((50,10)), np.random.standard_normal(50),
+                  np.random.standard_normal((50,10)), np.random.standard_normal(50)))
\ No newline at end of file

From 0333f137e2d1e39f5d754806830c0c25243b51e5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 9 Dec 2017 23:08:04 -0800
Subject: [PATCH 425/617] fix sigma and change coef in dgp

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 20d3c607e..0d8e9e399 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -6,15 +6,24 @@
 
 import numpy as np
 
-def sim_xy(n, p, nval, rho=0, s=5):
+def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
     source('~/best-subset/bestsubset/R/sim.R')
     ''')
 
     r_simulate = robjects.globalenv['sim.xy']
-    print(r_simulate(n, p, nval, rho=rho, s=s))
+    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
+    X = np.array(sim.rx2('x'))
+    y = np.array(sim.rx2('y'))
+    X_val = np.array(sim.rx2('xval'))
+    y_val = np.array(sim.rx2('yval'))
+    Sigma = np.array(sim.rx2('Sigma'))
+    beta = np.array(sim.rx2('beta'))
+    sigma = np.array(sim.rx2('sigma'))
 
-#sim_xy(n=50, p=10, nval=50)
+    return X, y, X_val, y_val, Sigma, beta, sigma
+
+sim_xy(n=50, p=10, nval=50)
 
 def tuned_lasso(X, Y, X_val,Y_val):
     robjects.r('''
@@ -45,5 +54,5 @@ def tuned_lasso(X, Y, X_val,Y_val):
     estimator = r_lasso(r_X, r_y, r_X_val, r_y_val)
     return (estimator)
 
-print(tuned_lasso(np.random.standard_normal((50,10)), np.random.standard_normal(50),
-                  np.random.standard_normal((50,10)), np.random.standard_normal(50)))
\ No newline at end of file
+#print(tuned_lasso(np.random.standard_normal((50,10)), np.random.standard_normal(50),
+#                  np.random.standard_normal((50,10)), np.random.standard_normal(50)))
\ No newline at end of file

From e931db63664cce3d788042a2c032ed47ce77b986 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 9 Dec 2017 23:41:27 -0800
Subject: [PATCH 426/617] return tuned relaxed lasso est

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 0d8e9e399..362689c91 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -23,9 +23,7 @@ def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
 
     return X, y, X_val, y_val, Sigma, beta, sigma
 
-sim_xy(n=50, p=10, nval=50)
-
-def tuned_lasso(X, Y, X_val,Y_val):
+def tuned_lasso(X, y, X_val,y_val):
     robjects.r('''
         source('~/best-subset/bestsubset/R/lasso.R')
         tuned_lasso_estimator = function(X,Y,X.val,Y.val){
@@ -34,7 +32,7 @@ def tuned_lasso(X, Y, X_val,Y_val):
         Y.val = as.vector(Y.val)
         X.val = as.matrix(X.val)
 
-        rel.lasso = lasso(X,Y,intercept=TRUE, nrelax=5, nlam=50)
+        rel.lasso = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
         beta.hat = as.matrix(coef(rel.lasso))
 
         muhat.val = as.matrix(predict(rel.lasso, X.val))
@@ -47,12 +45,13 @@ def tuned_lasso(X, Y, X_val,Y_val):
     n, p = X.shape
     nval, _ = X_val.shape
     r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(Y, nrow=n, ncol=1)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
 
     r_X_val = robjects.r.matrix(X_val, nrow=nval, ncol=p)
-    r_y_val = robjects.r.matrix(Y_val, nrow=nval, ncol=1)
-    estimator = r_lasso(r_X, r_y, r_X_val, r_y_val)
-    return (estimator)
+    r_y_val = robjects.r.matrix(y_val, nrow=nval, ncol=1)
+    estimator = np.array(r_lasso(r_X, r_y, r_X_val, r_y_val))
+    return estimator
 
-#print(tuned_lasso(np.random.standard_normal((50,10)), np.random.standard_normal(50),
-#                  np.random.standard_normal((50,10)), np.random.standard_normal(50)))
\ No newline at end of file
+X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2)
+rel_LASSO = tuned_lasso(X, y, X_val,y_val)
+print("relaxed LASSO", rel_LASSO)

From b40619b971521eab35acf04fc2ae3bcf9b61c690 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sun, 10 Dec 2017 00:27:42 -0800
Subject: [PATCH 427/617] commit all changes

---
 selection/adjusted_MLE/tests/compare_risks.py |   2 +-
 selection/adjusted_MLE/tests/relaxed_lasso.py | 134 +++++++++++++++++-
 2 files changed, 131 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 737f85c55..7dd1470ce 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -195,7 +195,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
     risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=200, p=1000, s=10, signal=3.)
+        approx = risk_selective_mle_full(n=500, p=100, s=5, signal=3.)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 362689c91..3efe1cace 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -1,10 +1,37 @@
+from __future__ import print_function
 from rpy2.robjects.packages import importr
 from rpy2 import robjects
 
 import rpy2.robjects.numpy2ri
 rpy2.robjects.numpy2ri.activate()
 
-import numpy as np
+import statsmodels.api as sm
+import numpy as np, sys
+import regreg.api as rr
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
+
+def glmnet_sigma(X, y):
+    robjects.r('''
+                glmnet_cv = function(X,y){
+                y = as.matrix(y)
+                X = as.matrix(X)
+
+                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
+                lam_1se = out$lambda.1se
+                return(lam_1se)
+                }''')
+
+    try:
+        lambda_cv_R = robjects.globalenv['glmnet_cv']
+        n, p = X.shape
+        r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+        r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+
+        lam_1se = lambda_cv_R(r_X, r_y)
+        return lam_1se*n
+    except:
+        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
@@ -52,6 +79,105 @@ def tuned_lasso(X, y, X_val,y_val):
     estimator = np.array(r_lasso(r_X, r_y, r_X_val, r_y_val))
     return estimator
 
-X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2)
-rel_LASSO = tuned_lasso(X, y, X_val,y_val)
-print("relaxed LASSO", rel_LASSO)
+def relative_risk(est, truth, Sigma):
+
+    return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
+
+def risk_selective_mle_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+                            lam_frac=1., randomization_scale=np.sqrt(0.5)):
+
+    X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
+    rel_LASSO = tuned_lasso(X, y, X_val, y_val)
+
+    X -= X.mean(0)[None, :]
+    X/= (X.std(0)[None, :] * np.sqrt(n))
+    if p > n:
+        sigma_est = np.std(y) / 2.
+        print("sigma est", sigma_est)
+    else:
+        ols_fit = sm.OLS(y, X).fit()
+        sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+        print("sigma est", sigma_est)
+
+    lam = glmnet_sigma(X, y)
+
+    loss = rr.glm.gaussian(X, y)
+    epsilon = 1. / np.sqrt(n)
+    W = np.ones(p) * lam
+    penalty = rr.group_lasso(np.arange(p),
+                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
+                            sigma=sigma_est)
+
+    M_est.solve_map()
+    active = M_est._overall
+
+    nactive = np.sum(active)
+    print("number of variables selected by randomized LASSO", nactive)
+
+    if nactive > 0:
+        approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
+                                                                   M_est.opt_transform,
+                                                                   M_est.target_observed,
+                                                                   M_est.feasible_point,
+                                                                   M_est.target_cov,
+                                                                   M_est.randomizer_precision)
+
+        mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+
+    ind_est = np.zeros(p)
+    ind_est[active] = mle_target_lin.dot(M_est.target_observed) +\
+                      mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
+    ind_est/= np.sqrt(n)
+    target_par = beta
+
+    Lasso_est = np.zeros(p)
+    Lasso_est[active] = M_est.observed_opt_state[:nactive]/np.sqrt(n)
+    selective_MLE = np.zeros(p)
+    selective_MLE[active] = approx_MLE/np.sqrt(n)
+    relaxed_Lasso = np.zeros(p)
+    relaxed_Lasso[active] = M_est.target_observed/np.sqrt(n)
+
+    return (selective_MLE - target_par).sum() / float(nactive), \
+           relative_risk(selective_MLE, target_par, Sigma), \
+           relative_risk(relaxed_Lasso, target_par, Sigma), \
+           relative_risk(ind_est, target_par, Sigma),\
+           relative_risk(Lasso_est, target_par, Sigma),\
+           relative_risk(rel_LASSO, target_par, Sigma)
+
+if __name__ == "__main__":
+
+    ndraw = 100
+    bias = 0.
+    risk_selMLE = 0.
+    risk_relLASSO = 0.
+    risk_indest = 0.
+    risk_LASSO = 0.
+    risk_relLASSO_nonrand = 0.
+    for i in range(ndraw):
+        approx = risk_selective_mle_full(n=500, p=100, nval=100, rho=0., s=5, beta_type=2, snr=0.1)
+        if approx is not None:
+            bias += approx[0]
+            risk_selMLE += approx[1]
+            risk_relLASSO += approx[2]
+            risk_indest += approx[3]
+            risk_LASSO += approx[4]
+            risk_relLASSO_nonrand += approx[5]
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
+        sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
+        sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
+        sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
+        sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+
+
+
+
+
+
+
+

From 3ed5da22e44b285a9a0518120b3d0f07643b7469 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 11 Dec 2017 12:11:48 -0800
Subject: [PATCH 428/617] corrected glmnet

---
 selection/adjusted_MLE/tests/compare_risks.py | 25 ++++++++---------
 selection/adjusted_MLE/tests/relaxed_lasso.py | 28 +++++++++++++------
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 7dd1470ce..b25f492d1 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -19,22 +19,21 @@ def glmnet_sigma(X, y):
                 glmnet_cv = function(X,y){
                 y = as.matrix(y)
                 X = as.matrix(X)
-
+                n = nrow(X)
                 out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                lam_1se = out$lambda.1se
-                return(lam_1se)
+                #lam_1se = out$lambda.1se
+                lam_min = out$lambda.min
+                return(n * as.numeric(lam_min))
                 }''')
 
-    try:
-        lambda_cv_R = robjects.globalenv['glmnet_cv']
-        n, p = X.shape
-        r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-        r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+    lambda_cv_R = robjects.globalenv['glmnet_cv']
+    n, p = X.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
 
-        lam_1se = lambda_cv_R(r_X, r_y)
-        return lam_1se*n
-    except:
-        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+    lam_1se = lambda_cv_R(r_X, r_y)
+    print("lambda", lam_1se)
+    return lam_1se
 
 def relative_risk(est, truth, Sigma):
 
@@ -195,7 +194,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
     risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=100, s=5, signal=3.)
+        approx = risk_selective_mle_full(n=200, p=1000, s=5, signal=3.13)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 3efe1cace..4b1bcb91c 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -19,7 +19,9 @@ def glmnet_sigma(X, y):
 
                 out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
                 lam_1se = out$lambda.1se
-                return(lam_1se)
+                active = which(coef(out, s="lambda.1se") != 0)
+                print(active)
+                return(list(lambda=lam_1se, active = active, lasso_est = as.vector(coef(out, s = "lambda.1se")[active])))
                 }''')
 
     try:
@@ -28,10 +30,14 @@ def glmnet_sigma(X, y):
         r_X = robjects.r.matrix(X, nrow=n, ncol=p)
         r_y = robjects.r.matrix(y, nrow=n, ncol=1)
 
-        lam_1se = lambda_cv_R(r_X, r_y)
-        return lam_1se*n
+        out = lambda_cv_R(r_X, r_y)
+        lam_1se = out.rx2('lambda')
+        lasso_est = np.array(out.rx2('lasso_est'))
+        active = np.array(out.rx2('active'))
+        print("lasso est", lasso_est, active, lam_1se)
+        return lam_1se*n, lasso_est, active
     except:
-        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)), 0, 0
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
@@ -83,11 +89,12 @@ def relative_risk(est, truth, Sigma):
 
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
-def risk_selective_mle_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
                             lam_frac=1., randomization_scale=np.sqrt(0.5)):
 
     X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
     rel_LASSO = tuned_lasso(X, y, X_val, y_val)
+    #print("beta", beta, X.std(0), X.mean(0))
 
     X -= X.mean(0)[None, :]
     X/= (X.std(0)[None, :] * np.sqrt(n))
@@ -99,10 +106,12 @@ def risk_selective_mle_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2,
         sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
         print("sigma est", sigma_est)
 
-    lam = glmnet_sigma(X, y)
-
     loss = rr.glm.gaussian(X, y)
     epsilon = 1. / np.sqrt(n)
+
+    lam, lasso_est, lasso_active = glmnet_sigma(X, y)
+    print("lambda from glmnet", lam, lasso_est, lasso_active)
+
     W = np.ones(p) * lam
     penalty = rr.group_lasso(np.arange(p),
                              weights=dict(zip(np.arange(p), W)), lagrange=1.)
@@ -140,6 +149,7 @@ def risk_selective_mle_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2,
     relaxed_Lasso = np.zeros(p)
     relaxed_Lasso[active] = M_est.target_observed/np.sqrt(n)
 
+    #print("target", target_par, Sigma)
     return (selective_MLE - target_par).sum() / float(nactive), \
            relative_risk(selective_MLE, target_par, Sigma), \
            relative_risk(relaxed_Lasso, target_par, Sigma), \
@@ -149,7 +159,7 @@ def risk_selective_mle_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2,
 
 if __name__ == "__main__":
 
-    ndraw = 100
+    ndraw = 1
     bias = 0.
     risk_selMLE = 0.
     risk_relLASSO = 0.
@@ -157,7 +167,7 @@ def risk_selective_mle_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2,
     risk_LASSO = 0.
     risk_relLASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=100, nval=100, rho=0., s=5, beta_type=2, snr=0.1)
+        approx = risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.1)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 5136ee997195031167a266c57de626a78a641ecb Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 11 Dec 2017 12:56:44 -0800
Subject: [PATCH 429/617] added both lambda min and 1se in glmnet

---
 selection/adjusted_MLE/tests/compare_risks.py | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index b25f492d1..7b88a682e 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -21,9 +21,9 @@ def glmnet_sigma(X, y):
                 X = as.matrix(X)
                 n = nrow(X)
                 out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                #lam_1se = out$lambda.1se
+                lam_1se = out$lambda.1se
                 lam_min = out$lambda.min
-                return(n * as.numeric(lam_min))
+                return(list(lam_min = n * as.numeric(lam_min), lam_1se = n* as.numeric(lam_1se)))
                 }''')
 
     lambda_cv_R = robjects.globalenv['glmnet_cv']
@@ -31,9 +31,10 @@ def glmnet_sigma(X, y):
     r_X = robjects.r.matrix(X, nrow=n, ncol=p)
     r_y = robjects.r.matrix(y, nrow=n, ncol=1)
 
-    lam_1se = lambda_cv_R(r_X, r_y)
-    print("lambda", lam_1se)
-    return lam_1se
+    lam = lambda_cv_R(r_X, r_y)
+    lam_min = np.array(lam.rx2('lam_min'))
+    lam_1se = np.array(lam.rx2('lam_1se'))
+    return lam_min, lam_1se
 
 def relative_risk(est, truth, Sigma):
 
@@ -64,7 +65,10 @@ def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_
         print("snr", snr)
 
         #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        lam = glmnet_sigma(X, y)
+        lam_min, lam_1se = glmnet_sigma(X, y)
+        print(" here lambda")
+        lam = lam_1se[0]
+        print(" here lambda", lam)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1./np.sqrt(n)
@@ -124,7 +128,8 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
         print("snr", snr)
 
         #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        lam = glmnet_sigma(X, y)
+        lam_min, lam_1se = glmnet_sigma(X, y)
+        lam = lam_1se[0]
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. /np.sqrt(n)
@@ -194,7 +199,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
     risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=200, p=1000, s=5, signal=3.13)
+        approx = risk_selective_mle_full(n=500, p=100, s=5, signal=3.13)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From ac6b93eafaac79efa1bf02ba34d3a08c8bf2ad62 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 11 Dec 2017 13:10:15 -0800
Subject: [PATCH 430/617] comparisons for tuned estimator against sel MLE

---
 selection/adjusted_MLE/tests/compare_risks.py |   2 +-
 selection/adjusted_MLE/tests/relaxed_lasso.py | 113 +++++++++---------
 2 files changed, 56 insertions(+), 59 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 7b88a682e..7ded1b63c 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -199,7 +199,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
     risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=100, s=5, signal=3.13)
+        approx = risk_selective_mle_full(n=500, p=100, s=5, signal=5.)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 4b1bcb91c..4cbeb512f 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -16,28 +16,23 @@ def glmnet_sigma(X, y):
                 glmnet_cv = function(X,y){
                 y = as.matrix(y)
                 X = as.matrix(X)
-
+                n = nrow(X)
                 out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
                 lam_1se = out$lambda.1se
-                active = which(coef(out, s="lambda.1se") != 0)
-                print(active)
-                return(list(lambda=lam_1se, active = active, lasso_est = as.vector(coef(out, s = "lambda.1se")[active])))
+                lam_min = out$lambda.min
+                return(list(lam_min = n * as.numeric(lam_min), lam_1se = n* as.numeric(lam_1se)))
                 }''')
 
-    try:
-        lambda_cv_R = robjects.globalenv['glmnet_cv']
-        n, p = X.shape
-        r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-        r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-
-        out = lambda_cv_R(r_X, r_y)
-        lam_1se = out.rx2('lambda')
-        lasso_est = np.array(out.rx2('lasso_est'))
-        active = np.array(out.rx2('active'))
-        print("lasso est", lasso_est, active, lam_1se)
-        return lam_1se*n, lasso_est, active
-    except:
-        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)), 0, 0
+    lambda_cv_R = robjects.globalenv['glmnet_cv']
+    n, p = X.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+
+    lam = lambda_cv_R(r_X, r_y)
+    lam_min = np.array(lam.rx2('lam_min'))
+    lam_1se = np.array(lam.rx2('lam_1se'))
+    return lam_min, lam_1se
+
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
@@ -91,63 +86,65 @@ def relative_risk(est, truth, Sigma):
 
 def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
                             lam_frac=1., randomization_scale=np.sqrt(0.5)):
+    while True:
+        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
+        rel_LASSO = tuned_lasso(X, y, X_val, y_val)
+        # print("beta", beta, X.std(0), X.mean(0))
 
-    X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-    rel_LASSO = tuned_lasso(X, y, X_val, y_val)
-    #print("beta", beta, X.std(0), X.mean(0))
+        X -= X.mean(0)[None, :]
+        X /= (X.std(0)[None, :] * np.sqrt(n))
+        if p > n:
+            sigma_est = np.std(y) / 2.
+            print("sigma est", sigma_est)
+        else:
+            ols_fit = sm.OLS(y, X).fit()
+            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+            print("sigma est", sigma_est)
 
-    X -= X.mean(0)[None, :]
-    X/= (X.std(0)[None, :] * np.sqrt(n))
-    if p > n:
-        sigma_est = np.std(y) / 2.
-        print("sigma est", sigma_est)
-    else:
-        ols_fit = sm.OLS(y, X).fit()
-        sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-        print("sigma est", sigma_est)
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1. / np.sqrt(n)
 
-    loss = rr.glm.gaussian(X, y)
-    epsilon = 1. / np.sqrt(n)
+        lam_min, lam_1se = glmnet_sigma(X, y)
+        lam = lam_1se[0]
 
-    lam, lasso_est, lasso_active = glmnet_sigma(X, y)
-    print("lambda from glmnet", lam, lasso_est, lasso_active)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p),
+                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
+                                sigma=sigma_est)
 
-    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
-                            sigma=sigma_est)
+        M_est.solve_map()
+        active = M_est._overall
 
-    M_est.solve_map()
-    active = M_est._overall
+        nactive = np.sum(active)
+        print("number of variables selected by randomized LASSO", nactive)
 
-    nactive = np.sum(active)
-    print("number of variables selected by randomized LASSO", nactive)
+        if nactive > 0:
+            approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
+                                                                       M_est.opt_transform,
+                                                                       M_est.target_observed,
+                                                                       M_est.feasible_point,
+                                                                       M_est.target_cov,
+                                                                       M_est.randomizer_precision)
 
-    if nactive > 0:
-        approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
-                                                                   M_est.opt_transform,
-                                                                   M_est.target_observed,
-                                                                   M_est.feasible_point,
-                                                                   M_est.target_cov,
-                                                                   M_est.randomizer_precision)
+            mle_target_lin, mle_soln_lin, mle_offset = mle_transform
 
-        mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+            break
 
     ind_est = np.zeros(p)
-    ind_est[active] = mle_target_lin.dot(M_est.target_observed) +\
+    ind_est[active] = mle_target_lin.dot(M_est.target_observed) + \
                       mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
-    ind_est/= np.sqrt(n)
+    ind_est /= np.sqrt(n)
     target_par = beta
 
     Lasso_est = np.zeros(p)
-    Lasso_est[active] = M_est.observed_opt_state[:nactive]/np.sqrt(n)
+    Lasso_est[active] = M_est.observed_opt_state[:nactive] / np.sqrt(n)
     selective_MLE = np.zeros(p)
-    selective_MLE[active] = approx_MLE/np.sqrt(n)
+    selective_MLE[active] = approx_MLE / np.sqrt(n)
     relaxed_Lasso = np.zeros(p)
-    relaxed_Lasso[active] = M_est.target_observed/np.sqrt(n)
+    relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
 
     #print("target", target_par, Sigma)
     return (selective_MLE - target_par).sum() / float(nactive), \
@@ -159,7 +156,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
 
 if __name__ == "__main__":
 
-    ndraw = 1
+    ndraw = 100
     bias = 0.
     risk_selMLE = 0.
     risk_relLASSO = 0.

From be3f7014b05647dc937bde145e81bc704df3cbef Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 11 Dec 2017 14:05:41 -0800
Subject: [PATCH 431/617] extract the tuned lambda in best subset and use it
 for rand LASSO

---
 selection/adjusted_MLE/tests/compare_risks.py |  1 +
 selection/adjusted_MLE/tests/relaxed_lasso.py | 25 +++++++++++++------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
index 7ded1b63c..3c089bfea 100644
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ b/selection/adjusted_MLE/tests/compare_risks.py
@@ -130,6 +130,7 @@ def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomiza
         #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
         lam_min, lam_1se = glmnet_sigma(X, y)
         lam = lam_1se[0]
+        print("lambda from glmnet", lam)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. /np.sqrt(n)
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 4cbeb512f..b086ddf8f 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -62,10 +62,17 @@ def tuned_lasso(X, y, X_val,y_val):
 
         rel.lasso = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
         beta.hat = as.matrix(coef(rel.lasso))
+        print(dim(beta.hat))
+
+        min.lam = min(rel.lasso$lambda)
+        max.lam = max(rel.lasso$lambda)
+        lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.lasso$nlambda))
 
         muhat.val = as.matrix(predict(rel.lasso, X.val))
         err.val = colMeans((muhat.val - Y.val)^2)
-        return(beta.hat[,which.min(err.val)])
+        opt_lam = ceiling(which.min(err.val)/10)
+        lambda.tuned = lam.seq[opt_lam]
+        return(list(beta.hat = beta.hat[,which.min(err.val)], lambda.tuned = lambda.tuned))
         }''')
 
     r_lasso = robjects.globalenv['tuned_lasso_estimator']
@@ -77,8 +84,10 @@ def tuned_lasso(X, y, X_val,y_val):
 
     r_X_val = robjects.r.matrix(X_val, nrow=nval, ncol=p)
     r_y_val = robjects.r.matrix(y_val, nrow=nval, ncol=1)
-    estimator = np.array(r_lasso(r_X, r_y, r_X_val, r_y_val))
-    return estimator
+    tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
+    estimator = np.array(tuned_est.rx2('beta.hat'))
+    lam_tuned = np.array(tuned_est.rx2('lambda.tuned'))
+    return estimator, lam_tuned
 
 def relative_risk(est, truth, Sigma):
 
@@ -88,8 +97,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
                             lam_frac=1., randomization_scale=np.sqrt(0.5)):
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-        rel_LASSO = tuned_lasso(X, y, X_val, y_val)
-        # print("beta", beta, X.std(0), X.mean(0))
+        rel_LASSO, lam_tuned = tuned_lasso(X, y, X_val, y_val)
 
         X -= X.mean(0)[None, :]
         X /= (X.std(0)[None, :] * np.sqrt(n))
@@ -104,8 +112,11 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
 
-        lam_min, lam_1se = glmnet_sigma(X, y)
-        lam = lam_1se[0]
+        #lam_min, lam_1se = glmnet_sigma(X, y)
+        #lam = lam_1se[0]
+        lam = np.sqrt(n)*lam_tuned[0]
+
+        #print("lam_tuned", np.sqrt(n)*lam_tuned, lam)
 
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),

From d1a8202e6d4d93476170e12a474433c04a57bd57 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 11 Dec 2017 15:05:48 -0800
Subject: [PATCH 432/617] tried tuning randomized LASSO

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 45 ++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index b086ddf8f..61c781cb0 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -62,7 +62,6 @@ def tuned_lasso(X, y, X_val,y_val):
 
         rel.lasso = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
         beta.hat = as.matrix(coef(rel.lasso))
-        print(dim(beta.hat))
 
         min.lam = min(rel.lasso$lambda)
         max.lam = max(rel.lasso$lambda)
@@ -72,7 +71,7 @@ def tuned_lasso(X, y, X_val,y_val):
         err.val = colMeans((muhat.val - Y.val)^2)
         opt_lam = ceiling(which.min(err.val)/10)
         lambda.tuned = lam.seq[opt_lam]
-        return(list(beta.hat = beta.hat[,which.min(err.val)], lambda.tuned = lambda.tuned))
+        return(list(beta.hat = beta.hat[,which.min(err.val)], lambda.tuned = lambda.tuned, lambda.seq = lam.seq))
         }''')
 
     r_lasso = robjects.globalenv['tuned_lasso_estimator']
@@ -87,20 +86,24 @@ def tuned_lasso(X, y, X_val,y_val):
     tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
     estimator = np.array(tuned_est.rx2('beta.hat'))
     lam_tuned = np.array(tuned_est.rx2('lambda.tuned'))
-    return estimator, lam_tuned
+    lam_seq = np.array(tuned_est.rx2('lambda.seq'))
+    return estimator, lam_tuned, lam_seq
 
 def relative_risk(est, truth, Sigma):
 
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
 def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                            lam_frac=1., randomization_scale=np.sqrt(0.5)):
+                            lam_frac=1., randomization_scale=np.sqrt(0.25)):
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-        rel_LASSO, lam_tuned = tuned_lasso(X, y, X_val, y_val)
+        rel_LASSO, lam_tuned, lam_seq = tuned_lasso(X, y, X_val, y_val)
 
         X -= X.mean(0)[None, :]
         X /= (X.std(0)[None, :] * np.sqrt(n))
+
+        X_val -= X_val.mean(0)[None, :]
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(n))
         if p > n:
             sigma_est = np.std(y) / 2.
             print("sigma est", sigma_est)
@@ -112,17 +115,39 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
 
-        #lam_min, lam_1se = glmnet_sigma(X, y)
-        #lam = lam_1se[0]
-        lam = np.sqrt(n)*lam_tuned[0]
+        lam_min, lam_1se = glmnet_sigma(X, y)
+        lam = lam_1se[0]
+        #lam = np.sqrt(n)*lam_tuned[0]
+
+        lam_seq = np.linspace(0.5* lam_1se, lam_1se, num=50)
+        print("lam seq", lam_seq)
 
         #print("lam_tuned", np.sqrt(n)*lam_tuned, lam)
+        err = np.zeros(50)
+        for k in range(50):
+            lam = lam_seq[k]
+            W = np.ones(p) * lam
+            penalty = rr.group_lasso(np.arange(p),
+                                     weights=dict(zip(np.arange(p), W)), lagrange=1.)
+
+            randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
+                                    sigma=sigma_est)
+
+            M_est.solve_map()
+            active = M_est._overall
+            nactive = np.sum(active)
+            Lasso_est = np.zeros(p)
+            Lasso_est[active] = M_est.observed_opt_state[:nactive]
+            err[k] = np.mean((y-X.dot(Lasso_est))**2.)
+
+        lam = lam_seq[np.argmin(err)]
+        print("err seq", err, lam)
 
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
                                 sigma=sigma_est)
 

From 70e162506c9bd9521c1a4a1d96d23f76f2e6adc7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 11 Dec 2017 17:46:35 -0800
Subject: [PATCH 433/617] use external validation set to tune lambda

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 34 ++++++++++---------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 61c781cb0..43ec6961e 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -66,12 +66,13 @@ def tuned_lasso(X, y, X_val,y_val):
         min.lam = min(rel.lasso$lambda)
         max.lam = max(rel.lasso$lambda)
         lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.lasso$nlambda))
-
+        ext.lam.seq = exp(seq(1.25*log(max.lam),log(min.lam),length=100))
         muhat.val = as.matrix(predict(rel.lasso, X.val))
         err.val = colMeans((muhat.val - Y.val)^2)
         opt_lam = ceiling(which.min(err.val)/10)
         lambda.tuned = lam.seq[opt_lam]
-        return(list(beta.hat = beta.hat[,which.min(err.val)], lambda.tuned = lambda.tuned, lambda.seq = lam.seq))
+        return(list(beta.hat = beta.hat[,which.min(err.val)], lambda.tuned = lambda.tuned, lambda.seq = lam.seq,
+        ext.lambda.seq = ext.lam.seq))
         }''')
 
     r_lasso = robjects.globalenv['tuned_lasso_estimator']
@@ -87,7 +88,8 @@ def tuned_lasso(X, y, X_val,y_val):
     estimator = np.array(tuned_est.rx2('beta.hat'))
     lam_tuned = np.array(tuned_est.rx2('lambda.tuned'))
     lam_seq = np.array(tuned_est.rx2('lambda.seq'))
-    return estimator, lam_tuned, lam_seq
+    ext_lam_seq = np.array(tuned_est.rx2('ext.lambda.seq'))
+    return estimator, lam_tuned, lam_seq, ext_lam_seq
 
 def relative_risk(est, truth, Sigma):
 
@@ -97,7 +99,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
                             lam_frac=1., randomization_scale=np.sqrt(0.25)):
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-        rel_LASSO, lam_tuned, lam_seq = tuned_lasso(X, y, X_val, y_val)
+        rel_LASSO, lam_tuned, lam_seq, ext_lam_seq = tuned_lasso(X, y, X_val, y_val)
 
         X -= X.mean(0)[None, :]
         X /= (X.std(0)[None, :] * np.sqrt(n))
@@ -115,16 +117,17 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
 
-        lam_min, lam_1se = glmnet_sigma(X, y)
-        lam = lam_1se[0]
-        #lam = np.sqrt(n)*lam_tuned[0]
+        #lam_min, lam_1se = glmnet_sigma(X, y)
+        #lam = lam_1se[0]
 
-        lam_seq = np.linspace(0.5* lam_1se, lam_1se, num=50)
-        print("lam seq", lam_seq)
+        #lam_seq = np.linspace(0.5* lam_1se, lam_1se, num=50)
+        #lam_seq = np.sqrt(n)* ext_lam_seq
+        #print("lam seq", lam_seq)
 
-        #print("lam_tuned", np.sqrt(n)*lam_tuned, lam)
-        err = np.zeros(50)
-        for k in range(50):
+        lam_seq = np.linspace(0.75, 2.5, num= 100)\
+                  *np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        err = np.zeros(100)
+        for k in range(100):
             lam = lam_seq[k]
             W = np.ones(p) * lam
             penalty = rr.group_lasso(np.arange(p),
@@ -139,11 +142,10 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
             nactive = np.sum(active)
             Lasso_est = np.zeros(p)
             Lasso_est[active] = M_est.observed_opt_state[:nactive]
-            err[k] = np.mean((y-X.dot(Lasso_est))**2.)
+            err[k] = np.mean((y_val-X_val.dot(Lasso_est))**2.)
 
         lam = lam_seq[np.argmin(err)]
         print("err seq", err, lam)
-
         randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
@@ -156,6 +158,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
 
         nactive = np.sum(active)
         print("number of variables selected by randomized LASSO", nactive)
+        print("number of variables selected by tuned LASSO", (rel_LASSO!=0).sum())
 
         if nactive > 0:
             approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
@@ -182,7 +185,6 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
     relaxed_Lasso = np.zeros(p)
     relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
 
-    #print("target", target_par, Sigma)
     return (selective_MLE - target_par).sum() / float(nactive), \
            relative_risk(selective_MLE, target_par, Sigma), \
            relative_risk(relaxed_Lasso, target_par, Sigma), \
@@ -200,7 +202,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
     risk_LASSO = 0.
     risk_relLASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.1)
+        approx = risk_selective_mle_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.15)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 6268010859d16ccc3c2a65f437e5b9666d51dae7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 11 Dec 2017 23:36:38 -0800
Subject: [PATCH 434/617] added tuned LASSO est

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 50 ++++++++++++-------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 43ec6961e..4f3312627 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -60,19 +60,27 @@ def tuned_lasso(X, y, X_val,y_val):
         Y.val = as.vector(Y.val)
         X.val = as.matrix(X.val)
 
-        rel.lasso = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
-        beta.hat = as.matrix(coef(rel.lasso))
-
-        min.lam = min(rel.lasso$lambda)
-        max.lam = max(rel.lasso$lambda)
-        lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.lasso$nlambda))
-        ext.lam.seq = exp(seq(1.25*log(max.lam),log(min.lam),length=100))
-        muhat.val = as.matrix(predict(rel.lasso, X.val))
-        err.val = colMeans((muhat.val - Y.val)^2)
-        opt_lam = ceiling(which.min(err.val)/10)
+        rel.LASSO = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
+        LASSO = lasso(X,Y,intercept=FALSE,nlam=50)
+        beta.hat.rellasso = as.matrix(coef(rel.LASSO))
+        beta.hat.lasso = as.matrix(coef(LASSO))
+
+        min.lam = min(rel.LASSO$lambda)
+        max.lam = max(rel.LASSO$lambda)
+        lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
+
+        muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
+        muhat.val.lasso = as.matrix(predict(LASSO, X.val))
+
+        err.val.rellasso = colMeans((muhat.val.rellasso - Y.val)^2)
+        err.val.lasso = colMeans((muhat.val.lasso - Y.val)^2)
+
+        opt_lam = ceiling(which.min(err.val.rellasso)/10)
         lambda.tuned = lam.seq[opt_lam]
-        return(list(beta.hat = beta.hat[,which.min(err.val)], lambda.tuned = lambda.tuned, lambda.seq = lam.seq,
-        ext.lambda.seq = ext.lam.seq))
+
+        return(list(beta.hat.rellasso = beta.hat.rellasso[,which.min(err.val.rellasso)],
+        beta.hat.lasso = beta.hat.lasso[,which.min(err.val.lasso)],
+        lambda.tuned = lambda.tuned, lambda.seq = lam.seq))
         }''')
 
     r_lasso = robjects.globalenv['tuned_lasso_estimator']
@@ -81,15 +89,15 @@ def tuned_lasso(X, y, X_val,y_val):
     nval, _ = X_val.shape
     r_X = robjects.r.matrix(X, nrow=n, ncol=p)
     r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-
     r_X_val = robjects.r.matrix(X_val, nrow=nval, ncol=p)
     r_y_val = robjects.r.matrix(y_val, nrow=nval, ncol=1)
+
     tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
-    estimator = np.array(tuned_est.rx2('beta.hat'))
+    estimator_rellasso = np.array(tuned_est.rx2('beta.hat.rellasso'))
+    estimator_lasso = np.array(tuned_est.rx2('beta.hat.lasso'))
     lam_tuned = np.array(tuned_est.rx2('lambda.tuned'))
     lam_seq = np.array(tuned_est.rx2('lambda.seq'))
-    ext_lam_seq = np.array(tuned_est.rx2('ext.lambda.seq'))
-    return estimator, lam_tuned, lam_seq, ext_lam_seq
+    return estimator_rellasso, estimator_lasso, lam_tuned, lam_seq
 
 def relative_risk(est, truth, Sigma):
 
@@ -99,7 +107,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
                             lam_frac=1., randomization_scale=np.sqrt(0.25)):
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-        rel_LASSO, lam_tuned, lam_seq, ext_lam_seq = tuned_lasso(X, y, X_val, y_val)
+        rel_LASSO, est_LASSO, lam_tuned, lam_seq = tuned_lasso(X, y, X_val, y_val)
 
         X -= X.mean(0)[None, :]
         X /= (X.std(0)[None, :] * np.sqrt(n))
@@ -190,7 +198,8 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
            relative_risk(relaxed_Lasso, target_par, Sigma), \
            relative_risk(ind_est, target_par, Sigma),\
            relative_risk(Lasso_est, target_par, Sigma),\
-           relative_risk(rel_LASSO, target_par, Sigma)
+           relative_risk(rel_LASSO, target_par, Sigma),\
+           relative_risk(est_LASSO, target_par, Sigma)
 
 if __name__ == "__main__":
 
@@ -201,8 +210,9 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
     risk_indest = 0.
     risk_LASSO = 0.
     risk_relLASSO_nonrand = 0.
+    risk_LASSO_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.15)
+        approx = risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.1)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -210,6 +220,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
             risk_indest += approx[3]
             risk_LASSO += approx[4]
             risk_relLASSO_nonrand += approx[5]
+            risk_LASSO_nonrand += approx[6]
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
@@ -218,6 +229,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
         sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
 
 
 

From 31c33619c054c3f7eb419ede1346d4ad0794791e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 12 Dec 2017 11:06:52 -0800
Subject: [PATCH 435/617] added screening power

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 29 ++++++++++++-------
 selection/adjusted_MLE/tests/test_MLE_boot.py |  5 ++--
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 4f3312627..6407c32b2 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -125,14 +125,7 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
 
-        #lam_min, lam_1se = glmnet_sigma(X, y)
-        #lam = lam_1se[0]
-
-        #lam_seq = np.linspace(0.5* lam_1se, lam_1se, num=50)
-        #lam_seq = np.sqrt(n)* ext_lam_seq
-        #print("lam seq", lam_seq)
-
-        lam_seq = np.linspace(0.75, 2.5, num= 100)\
+        lam_seq = np.linspace(0.75, 2.75, num= 100)\
                   *np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
         err = np.zeros(100)
         for k in range(100):
@@ -193,17 +186,24 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
     relaxed_Lasso = np.zeros(p)
     relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
 
+    true_signals = np.zeros(p, np.bool)
+    true_signals[beta!=0] = 1
+    screened_randomized = np.logical_and(active, true_signals).sum()/5.
+    screened_nonrandomized = np.logical_and(rel_LASSO!=0, true_signals).sum()/5.
+
     return (selective_MLE - target_par).sum() / float(nactive), \
            relative_risk(selective_MLE, target_par, Sigma), \
            relative_risk(relaxed_Lasso, target_par, Sigma), \
            relative_risk(ind_est, target_par, Sigma),\
            relative_risk(Lasso_est, target_par, Sigma),\
            relative_risk(rel_LASSO, target_par, Sigma),\
-           relative_risk(est_LASSO, target_par, Sigma)
+           relative_risk(est_LASSO, target_par, Sigma), \
+           screened_randomized,\
+           screened_nonrandomized
 
 if __name__ == "__main__":
 
-    ndraw = 100
+    ndraw = 50
     bias = 0.
     risk_selMLE = 0.
     risk_relLASSO = 0.
@@ -211,8 +211,11 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
     risk_LASSO = 0.
     risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
+    spower_rand = 0.
+    spower_nonrand = 0.
     for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.1)
+        np.random.seed(i)
+        approx = risk_selective_mle_full(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.20)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -221,6 +224,8 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
             risk_LASSO += approx[4]
             risk_relLASSO_nonrand += approx[5]
             risk_LASSO_nonrand += approx[6]
+            spower_rand += approx[7]
+            spower_nonrand += approx[8]
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
@@ -230,6 +235,8 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
         sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / float(i + 1)) + "\n")
 
 
 
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index d1c3a75e6..73131cd81 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -85,7 +85,8 @@ def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
 
             break
 
-def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=0.7, sigma= 1.):
+def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=np.sqrt(0.25),
+                          sigma= 1.):
 
     while True:
         X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=sigma,
@@ -191,7 +192,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     coverage = 0.
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=4000, p=2000, s=20, signal=5., B=1200)
+        approx = boot_pivot_approx_var(n=500, p=100, s=5, signal=3., B=1200)
         if approx is not None:
             pivot_boot = approx[3]
             bias += approx[4]

From b3e22461b072ebd096c1b040c9920b241c122723 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 12 Dec 2017 12:39:00 -0800
Subject: [PATCH 436/617] added coverages

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 75 ++++++++++++++-----
 selection/adjusted_MLE/tests/test_MLE.py      |  4 +-
 2 files changed, 59 insertions(+), 20 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 6407c32b2..edfde6691 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -103,28 +103,31 @@ def relative_risk(est, truth, Sigma):
 
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
-def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                            lam_frac=1., randomization_scale=np.sqrt(0.25)):
+def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2, randomization_scale=np.sqrt(0.25)):
+
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
+        true_mean = X.dot(beta)
         rel_LASSO, est_LASSO, lam_tuned, lam_seq = tuned_lasso(X, y, X_val, y_val)
+        active_nonrand = (rel_LASSO != 0)
+        nactive_nonrand = active_nonrand.sum()
 
         X -= X.mean(0)[None, :]
         X /= (X.std(0)[None, :] * np.sqrt(n))
 
         X_val -= X_val.mean(0)[None, :]
         X_val /= (X_val.std(0)[None, :] * np.sqrt(n))
+
         if p > n:
             sigma_est = np.std(y) / 2.
-            print("sigma est", sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-            print("sigma est", sigma_est)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
 
+
         lam_seq = np.linspace(0.75, 2.75, num= 100)\
                   *np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
         err = np.zeros(100)
@@ -146,20 +149,36 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
             err[k] = np.mean((y_val-X_val.dot(Lasso_est))**2.)
 
         lam = lam_seq[np.argmin(err)]
-        print("err seq", err, lam)
         randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
         M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
                                 sigma=sigma_est)
-
         M_est.solve_map()
         active = M_est._overall
-
         nactive = np.sum(active)
+
         print("number of variables selected by randomized LASSO", nactive)
         print("number of variables selected by tuned LASSO", (rel_LASSO!=0).sum())
+        true_signals = np.zeros(p, np.bool)
+        true_signals[beta != 0] = 1
+        screened_randomized = np.logical_and(active, true_signals).sum() / float(s)
+        screened_nonrandomized = np.logical_and(active_nonrand, true_signals).sum() / float(s)
+
+        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
+        unad_sd = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
+        true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])).\
+            dot(X[:, active_nonrand].T).dot(true_mean)
+        unad_sd_nonrand = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
+        coverage_sel = 0.
+        coverage_rand = 0.
+        coverage_nonrand = 0.
+
+        for k in range(nactive_nonrand):
+            if (rel_LASSO[k]-(1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
+                    and (rel_LASSO[k]+(1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
+                coverage_nonrand += 1
 
         if nactive > 0:
             approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
@@ -170,26 +189,30 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
                                                                        M_est.randomizer_precision)
 
             mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+            approx_sd = np.sqrt(np.diag(var))
 
+            for j in range(nactive):
+                if (approx_MLE[j]-(1.65*approx_sd[j]))<= true_target[j] and (approx_MLE[j] + (1.65*approx_sd[j])) >= true_target[j]:
+                    coverage_sel += 1
+                if (M_est.target_observed[j]-(1.65*unad_sd[j]))<= true_target[j] and (M_est.target_observed[j]+(1.65*unad_sd[j])) >= true_target[j]:
+                    coverage_rand += 1
             break
 
+    target_par = beta
+
     ind_est = np.zeros(p)
     ind_est[active] = mle_target_lin.dot(M_est.target_observed) + \
                       mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
     ind_est /= np.sqrt(n)
-    target_par = beta
+
+    relaxed_Lasso = np.zeros(p)
+    relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
 
     Lasso_est = np.zeros(p)
     Lasso_est[active] = M_est.observed_opt_state[:nactive] / np.sqrt(n)
+
     selective_MLE = np.zeros(p)
     selective_MLE[active] = approx_MLE / np.sqrt(n)
-    relaxed_Lasso = np.zeros(p)
-    relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
-
-    true_signals = np.zeros(p, np.bool)
-    true_signals[beta!=0] = 1
-    screened_randomized = np.logical_and(active, true_signals).sum()/5.
-    screened_nonrandomized = np.logical_and(rel_LASSO!=0, true_signals).sum()/5.
 
     return (selective_MLE - target_par).sum() / float(nactive), \
            relative_risk(selective_MLE, target_par, Sigma), \
@@ -199,7 +222,10 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
            relative_risk(rel_LASSO, target_par, Sigma),\
            relative_risk(est_LASSO, target_par, Sigma), \
            screened_randomized,\
-           screened_nonrandomized
+           screened_nonrandomized,\
+           coverage_sel/float(nactive),\
+           coverage_rand/float(nactive), \
+           coverage_nonrand/float(nactive_nonrand)
 
 if __name__ == "__main__":
 
@@ -213,9 +239,13 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
     risk_LASSO_nonrand = 0.
     spower_rand = 0.
     spower_nonrand = 0.
+    coverage_sel = 0.
+    coverage_rand = 0.
+    coverage_nonrand = 0.
+
     for i in range(ndraw):
         np.random.seed(i)
-        approx = risk_selective_mle_full(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.20)
+        approx = inference_approx(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.40)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -226,18 +256,27 @@ def risk_selective_mle_full(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2,
             risk_LASSO_nonrand += approx[6]
             spower_rand += approx[7]
             spower_nonrand += approx[8]
+            coverage_sel += approx[9]
+            coverage_rand += approx[10]
+            coverage_nonrand += approx[11]
 
-        sys.stderr.write("iteration completed" + str(i) + "\n")
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+
         sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / float(i + 1)) + "\n")
 
+        sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
+        sys.stderr.write("randomized coverage" + str(coverage_rand / float(i + 1)) + "\n")
+        sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / float(i + 1)) + "\n")
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
+
 
 
 
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
index 16d16f2b3..678d61c87 100644
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ b/selection/adjusted_MLE/tests/test_MLE.py
@@ -61,7 +61,7 @@ def test_lasso(n=100, p=50, s=5, signal=5., B=500, seed_n=0, lam_frac=1., random
 def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=1.):
 
     while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
+        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.70, signal=signal, sigma=1.,
                                                        random_signs=True, equicorrelated=False)
         n, p = X.shape
         lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
@@ -192,7 +192,7 @@ def test_bias_lasso(nsim=2000):
     pivot_obs_info= []
     coverage = 0.
     for i in range(ndraw):
-        approx = test_lasso_approx_var(n=500, p=100, s=5, signal=3.5)
+        approx = test_lasso_approx_var(n=500, p=100, s=5, signal=3.)
         if approx is not None:
             pivot = approx[0]
             bias += approx[1]

From b711f27c4a0c27b105368ac15f8cc3198704a004 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 12 Dec 2017 12:55:12 -0800
Subject: [PATCH 437/617] added false positives to screening attributes

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index edfde6691..c30b641bb 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -165,6 +165,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         true_signals[beta != 0] = 1
         screened_randomized = np.logical_and(active, true_signals).sum() / float(s)
         screened_nonrandomized = np.logical_and(active_nonrand, true_signals).sum() / float(s)
+        false_positive_randomized = np.logical_and(active, ~true_signals).sum()/float(nactive)
+        false_positive_nonrandomized = np.logical_and(active_nonrand, ~true_signals).sum()/float(nactive_nonrand)
 
         true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
         unad_sd = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
@@ -223,6 +225,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
            relative_risk(est_LASSO, target_par, Sigma), \
            screened_randomized,\
            screened_nonrandomized,\
+           false_positive_randomized, \
+           false_positive_nonrandomized,\
            coverage_sel/float(nactive),\
            coverage_rand/float(nactive), \
            coverage_nonrand/float(nactive_nonrand)
@@ -239,6 +243,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     risk_LASSO_nonrand = 0.
     spower_rand = 0.
     spower_nonrand = 0.
+    false_positive_randomized = 0.
+    false_positive_nonrandomized = 0.
     coverage_sel = 0.
     coverage_rand = 0.
     coverage_nonrand = 0.
@@ -256,9 +262,11 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             risk_LASSO_nonrand += approx[6]
             spower_rand += approx[7]
             spower_nonrand += approx[8]
-            coverage_sel += approx[9]
-            coverage_rand += approx[10]
-            coverage_nonrand += approx[11]
+            false_positive_randomized += approx[9]
+            false_positive_nonrandomized += approx[10]
+            coverage_sel += approx[11]
+            coverage_rand += approx[12]
+            coverage_nonrand += approx[13]
 
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
@@ -266,10 +274,12 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-
         sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
+
         sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSO_rand_falsepositives" + str(false_positive_randomized / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSO_norand_falsepositives" + str(false_positive_nonrandomized / float(i + 1)) + "\n")
 
         sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
         sys.stderr.write("randomized coverage" + str(coverage_rand / float(i + 1)) + "\n")

From 7b81d39a58d48a835df68d96d1fe21b5d67d7862 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 12 Dec 2017 14:28:59 -0800
Subject: [PATCH 438/617] added inferential power

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 40 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index c30b641bb..e61c1706e 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -168,19 +168,37 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         false_positive_randomized = np.logical_and(active, ~true_signals).sum()/float(nactive)
         false_positive_nonrandomized = np.logical_and(active_nonrand, ~true_signals).sum()/float(nactive_nonrand)
 
+        true_set = np.asarray([u for u in range(p) if true_signals[u]])
+        active_set = np.asarray([t for t in range(p) if active[t]])
+        active_set_nonrand = np.asarray([s for s in range(p) if active_nonrand[s]])
+        active_bool = np.zeros(nactive, np.bool)
+        for x in range(nactive):
+            active_bool[x] = (np.in1d(active_set[x],true_set).sum()>0)
+        active_bool_nonrand= np.zeros(nactive_nonrand, np.bool)
+        for y in range(nactive_nonrand):
+            active_bool_nonrand[y] = (np.in1d(active_set_nonrand[y],true_set).sum()>0)
+
         true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
         unad_sd = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
         true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])).\
             dot(X[:, active_nonrand].T).dot(true_mean)
         unad_sd_nonrand = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
+
         coverage_sel = 0.
         coverage_rand = 0.
         coverage_nonrand = 0.
+        power_sel = 0.
+        power_rand = 0.
+        power_nonrand = 0.
 
         for k in range(nactive_nonrand):
             if (rel_LASSO[k]-(1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
                     and (rel_LASSO[k]+(1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
                 coverage_nonrand += 1
+            #print("non randomized intervals", rel_LASSO[k]-(1.65 * unad_sd_nonrand[k]),rel_LASSO[k]+(1.65 * unad_sd_nonrand[k]))
+            if active_bool_nonrand[k] == True and ((rel_LASSO[k]-(1.65 * unad_sd_nonrand[k])) > 0.
+                                                   or (rel_LASSO[k]+(1.65 * unad_sd_nonrand[k])) <0.):
+                power_nonrand += 1
 
         if nactive > 0:
             approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
@@ -196,8 +214,13 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             for j in range(nactive):
                 if (approx_MLE[j]-(1.65*approx_sd[j]))<= true_target[j] and (approx_MLE[j] + (1.65*approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
+                #print("randomized intervals", (approx_MLE[j]-(1.65*approx_sd[j])),(approx_MLE[j] + (1.65 * approx_sd[j])))
+                if active_bool[j]==True and ((approx_MLE[j]-(1.65*approx_sd[j]))> 0. or (approx_MLE[j] + (1.65*approx_sd[j])) < 0.):
+                    power_sel += 1
                 if (M_est.target_observed[j]-(1.65*unad_sd[j]))<= true_target[j] and (M_est.target_observed[j]+(1.65*unad_sd[j])) >= true_target[j]:
                     coverage_rand += 1
+                if active_bool[j]==True and ((M_est.target_observed[j]-(1.65*unad_sd[j]))>0. or (M_est.target_observed[j]+(1.65*unad_sd[j]))<0.):
+                    power_rand += 1
             break
 
     target_par = beta
@@ -229,7 +252,10 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
            false_positive_nonrandomized,\
            coverage_sel/float(nactive),\
            coverage_rand/float(nactive), \
-           coverage_nonrand/float(nactive_nonrand)
+           coverage_nonrand/float(nactive_nonrand), \
+           power_sel/float(s), \
+           power_rand/float(s), \
+           power_nonrand/float(s)
 
 if __name__ == "__main__":
 
@@ -248,10 +274,13 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     coverage_sel = 0.
     coverage_rand = 0.
     coverage_nonrand = 0.
+    power_sel = 0.
+    power_rand = 0.
+    power_nonrand = 0.
 
     for i in range(ndraw):
         np.random.seed(i)
-        approx = inference_approx(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.40)
+        approx = inference_approx(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.50)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -267,6 +296,9 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             coverage_sel += approx[11]
             coverage_rand += approx[12]
             coverage_nonrand += approx[13]
+            power_sel += approx[14]
+            power_rand += approx[15]
+            power_nonrand += approx[16]
 
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
@@ -285,6 +317,10 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         sys.stderr.write("randomized coverage" + str(coverage_rand / float(i + 1)) + "\n")
         sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / float(i + 1)) + "\n")
 
+        sys.stderr.write("selective power" + str(power_sel / float(i + 1)) + "\n")
+        sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
+        sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n")
+
         sys.stderr.write("iteration completed" + str(i) + "\n")
 
 

From b5bdf1065692dc8a08930c7e9009b8d0c63842e0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 12 Dec 2017 17:36:46 -0800
Subject: [PATCH 439/617] corrected power

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index e61c1706e..14e661223 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -170,7 +170,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
         true_set = np.asarray([u for u in range(p) if true_signals[u]])
         active_set = np.asarray([t for t in range(p) if active[t]])
-        active_set_nonrand = np.asarray([s for s in range(p) if active_nonrand[s]])
+        active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
         active_bool = np.zeros(nactive, np.bool)
         for x in range(nactive):
             active_bool[x] = (np.in1d(active_set[x],true_set).sum()>0)
@@ -214,7 +214,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             for j in range(nactive):
                 if (approx_MLE[j]-(1.65*approx_sd[j]))<= true_target[j] and (approx_MLE[j] + (1.65*approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
-                #print("randomized intervals", (approx_MLE[j]-(1.65*approx_sd[j])),(approx_MLE[j] + (1.65 * approx_sd[j])))
+                print("randomized intervals", (approx_MLE[j]-(1.65*approx_sd[j])),(approx_MLE[j] + (1.65 * approx_sd[j])))
                 if active_bool[j]==True and ((approx_MLE[j]-(1.65*approx_sd[j]))> 0. or (approx_MLE[j] + (1.65*approx_sd[j])) < 0.):
                     power_sel += 1
                 if (M_est.target_observed[j]-(1.65*unad_sd[j]))<= true_target[j] and (M_est.target_observed[j]+(1.65*unad_sd[j])) >= true_target[j]:
@@ -280,7 +280,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
     for i in range(ndraw):
         np.random.seed(i)
-        approx = inference_approx(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.50)
+        approx = inference_approx(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.20)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 4307d32d74e44b9aa0301afa65f9d26d8c3749d8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 12 Dec 2017 17:57:07 -0800
Subject: [PATCH 440/617] commit all changes

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 27 ++++++++++---------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 14e661223..eb7718a42 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -103,7 +103,8 @@ def relative_risk(est, truth, Sigma):
 
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
-def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2, randomization_scale=np.sqrt(0.25)):
+def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
+                     randomization_scale=np.sqrt(0.25)):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
@@ -163,10 +164,10 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         print("number of variables selected by tuned LASSO", (rel_LASSO!=0).sum())
         true_signals = np.zeros(p, np.bool)
         true_signals[beta != 0] = 1
-        screened_randomized = np.logical_and(active, true_signals).sum() / float(s)
-        screened_nonrandomized = np.logical_and(active_nonrand, true_signals).sum() / float(s)
-        false_positive_randomized = np.logical_and(active, ~true_signals).sum()/float(nactive)
-        false_positive_nonrandomized = np.logical_and(active_nonrand, ~true_signals).sum()/float(nactive_nonrand)
+        screened_randomized = np.logical_and(active, true_signals).sum() /float(s)
+        screened_nonrandomized = np.logical_and(active_nonrand, true_signals).sum() /float(s)
+        false_positive_randomized = np.logical_and(active, ~true_signals).sum()/max(float(nactive), 1.)
+        false_positive_nonrandomized = np.logical_and(active_nonrand, ~true_signals).sum()/max(float(nactive_nonrand),1.)
 
         true_set = np.asarray([u for u in range(p) if true_signals[u]])
         active_set = np.asarray([t for t in range(p) if active[t]])
@@ -195,7 +196,6 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             if (rel_LASSO[k]-(1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
                     and (rel_LASSO[k]+(1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
                 coverage_nonrand += 1
-            #print("non randomized intervals", rel_LASSO[k]-(1.65 * unad_sd_nonrand[k]),rel_LASSO[k]+(1.65 * unad_sd_nonrand[k]))
             if active_bool_nonrand[k] == True and ((rel_LASSO[k]-(1.65 * unad_sd_nonrand[k])) > 0.
                                                    or (rel_LASSO[k]+(1.65 * unad_sd_nonrand[k])) <0.):
                 power_nonrand += 1
@@ -211,10 +211,13 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             mle_target_lin, mle_soln_lin, mle_offset = mle_transform
             approx_sd = np.sqrt(np.diag(var))
 
+            if nactive == 1:
+                approx_MLE = np.array([approx_MLE])
+                approx_sd = np.array([approx_sd])
+
             for j in range(nactive):
                 if (approx_MLE[j]-(1.65*approx_sd[j]))<= true_target[j] and (approx_MLE[j] + (1.65*approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
-                print("randomized intervals", (approx_MLE[j]-(1.65*approx_sd[j])),(approx_MLE[j] + (1.65 * approx_sd[j])))
                 if active_bool[j]==True and ((approx_MLE[j]-(1.65*approx_sd[j]))> 0. or (approx_MLE[j] + (1.65*approx_sd[j])) < 0.):
                     power_sel += 1
                 if (M_est.target_observed[j]-(1.65*unad_sd[j]))<= true_target[j] and (M_est.target_observed[j]+(1.65*unad_sd[j])) >= true_target[j]:
@@ -250,16 +253,16 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
            screened_nonrandomized,\
            false_positive_randomized, \
            false_positive_nonrandomized,\
-           coverage_sel/float(nactive),\
-           coverage_rand/float(nactive), \
-           coverage_nonrand/float(nactive_nonrand), \
+           coverage_sel/max(float(nactive),1.),\
+           coverage_rand/max(float(nactive),1.), \
+           coverage_nonrand/max(float(nactive_nonrand),1.), \
            power_sel/float(s), \
            power_rand/float(s), \
            power_nonrand/float(s)
 
 if __name__ == "__main__":
 
-    ndraw = 50
+    ndraw = 100
     bias = 0.
     risk_selMLE = 0.
     risk_relLASSO = 0.
@@ -280,7 +283,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
     for i in range(ndraw):
         np.random.seed(i)
-        approx = inference_approx(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.20)
+        approx = inference_approx(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.05)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 627a7179dff61c0037e2a1ccb248fd2f262393cc Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 13 Dec 2017 09:00:47 -0800
Subject: [PATCH 441/617] a small correction

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index eb7718a42..b1900382c 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -117,7 +117,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         X /= (X.std(0)[None, :] * np.sqrt(n))
 
         X_val -= X_val.mean(0)[None, :]
-        X_val /= (X_val.std(0)[None, :] * np.sqrt(n))
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
         if p > n:
             sigma_est = np.std(y) / 2.
@@ -262,7 +262,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
 if __name__ == "__main__":
 
-    ndraw = 100
+    ndraw = 150
     bias = 0.
     risk_selMLE = 0.
     risk_relLASSO = 0.
@@ -282,8 +282,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     power_nonrand = 0.
 
     for i in range(ndraw):
-        np.random.seed(i)
-        approx = inference_approx(n=500, p=100, nval=100, rho=0.70, s=5, beta_type=2, snr=0.05)
+        approx = inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.10)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 7554f0a31412242a5b3066c232db8c17f02e03b1 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 15 Dec 2017 00:14:14 -0800
Subject: [PATCH 442/617] centered y and added partial risks

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 62 +++++++++++++++----
 1 file changed, 50 insertions(+), 12 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index b1900382c..cbe58e0cc 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -119,11 +119,15 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         X_val -= X_val.mean(0)[None, :]
         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
+        y -= y.mean()
+        y_val -= y_val.mean()
+
         if p > n:
             sigma_est = np.std(y) / 2.
         else:
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+            print("sigma est", sigma_est)
 
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
@@ -218,10 +222,15 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             for j in range(nactive):
                 if (approx_MLE[j]-(1.65*approx_sd[j]))<= true_target[j] and (approx_MLE[j] + (1.65*approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
+                    #print("selective intervals", (approx_MLE[j]-(1.65*approx_sd[j])),
+                    #      (approx_MLE[j] + (1.65 * approx_sd[j])),
+                    #      true_target[j])
                 if active_bool[j]==True and ((approx_MLE[j]-(1.65*approx_sd[j]))> 0. or (approx_MLE[j] + (1.65*approx_sd[j])) < 0.):
                     power_sel += 1
                 if (M_est.target_observed[j]-(1.65*unad_sd[j]))<= true_target[j] and (M_est.target_observed[j]+(1.65*unad_sd[j])) >= true_target[j]:
                     coverage_rand += 1
+                    #print("randomized intervals", (M_est.target_observed[j]-(1.65*unad_sd[j])), (M_est.target_observed[j]+(1.65*unad_sd[j])),
+                    #      true_target[j])
                 if active_bool[j]==True and ((M_est.target_observed[j]-(1.65*unad_sd[j]))>0. or (M_est.target_observed[j]+(1.65*unad_sd[j]))<0.):
                     power_rand += 1
             break
@@ -229,18 +238,20 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     target_par = beta
 
     ind_est = np.zeros(p)
-    ind_est[active] = mle_target_lin.dot(M_est.target_observed) + \
-                      mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
-    ind_est /= np.sqrt(n)
+    partial_ind_est = ind_est[active] = (mle_target_lin.dot(M_est.target_observed) +
+                                         mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset)/ np.sqrt(n)
 
     relaxed_Lasso = np.zeros(p)
-    relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
+    partial_relaxed_Lasso = relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
 
     Lasso_est = np.zeros(p)
-    Lasso_est[active] = M_est.observed_opt_state[:nactive] / np.sqrt(n)
+    partial_Lasso_est = Lasso_est[active] = M_est.observed_opt_state[:nactive] / np.sqrt(n)
 
     selective_MLE = np.zeros(p)
-    selective_MLE[active] = approx_MLE / np.sqrt(n)
+    partial_selective_MLE = selective_MLE[active] = approx_MLE / np.sqrt(n)
+
+    partial_Sigma = (Sigma[:, active])[active,:]
+    partial_Sigma_nonrand = (Sigma[:, active_nonrand])[active_nonrand,:]
 
     return (selective_MLE - target_par).sum() / float(nactive), \
            relative_risk(selective_MLE, target_par, Sigma), \
@@ -258,7 +269,14 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
            coverage_nonrand/max(float(nactive_nonrand),1.), \
            power_sel/float(s), \
            power_rand/float(s), \
-           power_nonrand/float(s)
+           power_nonrand/float(s),\
+           relative_risk(partial_selective_MLE, true_target, partial_Sigma),\
+           relative_risk(partial_relaxed_Lasso, true_target, partial_Sigma), \
+           relative_risk(partial_ind_est, true_target, partial_Sigma),\
+           relative_risk(partial_Lasso_est, true_target, partial_Sigma),\
+           relative_risk(rel_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand),\
+           relative_risk(est_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand),
+
 
 if __name__ == "__main__":
 
@@ -280,9 +298,15 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     power_sel = 0.
     power_rand = 0.
     power_nonrand = 0.
+    partial_risk_selMLE = 0.
+    partial_risk_relLASSO = 0.
+    partial_risk_indest = 0.
+    partial_risk_LASSO = 0.
+    partial_risk_relLASSO_nonrand = 0.
+    partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.10)
+        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.05)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -302,26 +326,40 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             power_rand += approx[15]
             power_nonrand += approx[16]
 
+            partial_risk_selMLE += approx[17]
+            partial_risk_relLASSO += approx[18]
+            partial_risk_indest += approx[19]
+            partial_risk_LASSO += approx[20]
+            partial_risk_relLASSO_nonrand += approx[21]
+            partial_risk_LASSO_nonrand += approx[22]
+
         sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
         sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n"+"\n")
 
         sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSO_rand_falsepositives" + str(false_positive_randomized / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSO_norand_falsepositives" + str(false_positive_nonrandomized / float(i + 1)) + "\n")
+        sys.stderr.write("overall_LASSO_norand_falsepositives" + str(false_positive_nonrandomized / float(i + 1)) + "\n"+"\n")
 
         sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
         sys.stderr.write("randomized coverage" + str(coverage_rand / float(i + 1)) + "\n")
-        sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / float(i + 1)) + "\n"+"\n")
 
         sys.stderr.write("selective power" + str(power_sel / float(i + 1)) + "\n")
         sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
-        sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n"+"\n")
+
+        sys.stderr.write("overall_partial_selrisk" + str(partial_risk_selMLE / float(i + 1)) + "\n")
+        sys.stderr.write("overall_partial_relLASSOrisk" + str(partial_risk_relLASSO / float(i + 1)) + "\n")
+        sys.stderr.write("overall_partial_indepestrisk" + str(partial_risk_indest / float(i + 1)) + "\n")
+        sys.stderr.write("overall_partial_LASSOrisk" + str(partial_risk_LASSO / float(i + 1)) + "\n")
+        sys.stderr.write("overall_partial_relLASSOrisk_norand" + str(partial_risk_relLASSO_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_partial_LASSOrisk_norand" + str(partial_risk_LASSO_nonrand / float(i + 1)) + "\n")
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
 

From 5762b7398ed0bd767298e9b6f34fa92b157badaf Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 15 Dec 2017 01:42:09 -0800
Subject: [PATCH 443/617] fixed scales in partial risk

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index cbe58e0cc..f9c6f935d 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -238,17 +238,22 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     target_par = beta
 
     ind_est = np.zeros(p)
-    partial_ind_est = ind_est[active] = (mle_target_lin.dot(M_est.target_observed) +
-                                         mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset)/ np.sqrt(n)
+    ind_est[active] = (mle_target_lin.dot(M_est.target_observed) +
+                                         mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset)
+    partial_ind_est = ind_est[active]
+    ind_est /= np.sqrt(n)
 
     relaxed_Lasso = np.zeros(p)
-    partial_relaxed_Lasso = relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
+    relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
+    partial_relaxed_Lasso = M_est.target_observed
 
     Lasso_est = np.zeros(p)
-    partial_Lasso_est = Lasso_est[active] = M_est.observed_opt_state[:nactive] / np.sqrt(n)
+    Lasso_est[active] = M_est.observed_opt_state[:nactive] / np.sqrt(n)
+    partial_Lasso_est = M_est.observed_opt_state[:nactive]
 
     selective_MLE = np.zeros(p)
-    partial_selective_MLE = selective_MLE[active] = approx_MLE / np.sqrt(n)
+    selective_MLE[active] = approx_MLE / np.sqrt(n)
+    partial_selective_MLE = approx_MLE
 
     partial_Sigma = (Sigma[:, active])[active,:]
     partial_Sigma_nonrand = (Sigma[:, active_nonrand])[active_nonrand,:]
@@ -274,8 +279,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
            relative_risk(partial_relaxed_Lasso, true_target, partial_Sigma), \
            relative_risk(partial_ind_est, true_target, partial_Sigma),\
            relative_risk(partial_Lasso_est, true_target, partial_Sigma),\
-           relative_risk(rel_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand),\
-           relative_risk(est_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand),
+           relative_risk(np.sqrt(n)*rel_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand),\
+           relative_risk(np.sqrt(n)*est_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand),
 
 
 if __name__ == "__main__":
@@ -306,7 +311,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.05)
+        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.42)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From f24b70a60fbf65bc9c5413abf8d20f2c11831b07 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 16 Dec 2017 14:20:39 -0800
Subject: [PATCH 444/617] tuning the selective MLE rather than lasso

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 113 ++++++++++--------
 1 file changed, 64 insertions(+), 49 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index b1900382c..0e0c07a44 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+from __future__ import print_function, division
 from rpy2.robjects.packages import importr
 from rpy2 import robjects
 
@@ -36,10 +36,11 @@ def glmnet_sigma(X, y):
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
-    source('~/best-subset/bestsubset/R/sim.R')
+    library(bestsubset) # source('~/best-subset/bestsubset/R/sim.R')
+    sim_xy = bestsubset::sim.xy
     ''')
 
-    r_simulate = robjects.globalenv['sim.xy']
+    r_simulate = robjects.globalenv['sim_xy']
     sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
     X = np.array(sim.rx2('x'))
     y = np.array(sim.rx2('y'))
@@ -53,7 +54,7 @@ def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
 
 def tuned_lasso(X, y, X_val,y_val):
     robjects.r('''
-        source('~/best-subset/bestsubset/R/lasso.R')
+        #source('~/best-subset/bestsubset/R/lasso.R')
         tuned_lasso_estimator = function(X,Y,X.val,Y.val){
         Y = as.matrix(Y)
         X = as.matrix(X)
@@ -104,7 +105,7 @@ def relative_risk(est, truth, Sigma):
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
 def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                     randomization_scale=np.sqrt(0.25)):
+                     randomization_scale=np.sqrt(0.1)):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
@@ -125,20 +126,22 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
 
+        y = y - y.mean()
+        y_val = y_val - y_val.mean()
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
 
 
         lam_seq = np.linspace(0.75, 2.75, num= 100)\
-                  *np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+                  * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
         err = np.zeros(100)
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         for k in range(100):
             lam = lam_seq[k]
             W = np.ones(p) * lam
             penalty = rr.group_lasso(np.arange(p),
                                      weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-            randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
             M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
                                     sigma=sigma_est)
 
@@ -146,10 +149,19 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             active = M_est._overall
             nactive = np.sum(active)
             Lasso_est = np.zeros(p)
-            Lasso_est[active] = M_est.observed_opt_state[:nactive]
+            
+            approx_MLE = solve_UMVU(M_est.target_transform,
+                                    M_est.opt_transform,
+                                    M_est.target_observed,
+                                    M_est.feasible_point,
+                                    M_est.target_cov,
+                                    M_est.randomizer_precision)[0]
+            Lasso_est[active] = approx_MLE
+            
             err[k] = np.mean((y_val-X_val.dot(Lasso_est))**2.)
 
         lam = lam_seq[np.argmin(err)]
+        print('lambda', lam)
         randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
@@ -242,23 +254,24 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     selective_MLE = np.zeros(p)
     selective_MLE[active] = approx_MLE / np.sqrt(n)
 
-    return (selective_MLE - target_par).sum() / float(nactive), \
-           relative_risk(selective_MLE, target_par, Sigma), \
-           relative_risk(relaxed_Lasso, target_par, Sigma), \
-           relative_risk(ind_est, target_par, Sigma),\
-           relative_risk(Lasso_est, target_par, Sigma),\
-           relative_risk(rel_LASSO, target_par, Sigma),\
-           relative_risk(est_LASSO, target_par, Sigma), \
-           screened_randomized,\
-           screened_nonrandomized,\
-           false_positive_randomized, \
-           false_positive_nonrandomized,\
-           coverage_sel/max(float(nactive),1.),\
-           coverage_rand/max(float(nactive),1.), \
-           coverage_nonrand/max(float(nactive_nonrand),1.), \
-           power_sel/float(s), \
-           power_rand/float(s), \
-           power_nonrand/float(s)
+    if screened_randomized == 1.:
+        return (selective_MLE - target_par).sum() / float(nactive), \
+               relative_risk(selective_MLE, target_par, Sigma), \
+               relative_risk(relaxed_Lasso, target_par, Sigma), \
+               relative_risk(ind_est, target_par, Sigma),\
+               relative_risk(Lasso_est, target_par, Sigma),\
+               relative_risk(rel_LASSO, target_par, Sigma),\
+               relative_risk(est_LASSO, target_par, Sigma), \
+               screened_randomized,\
+               screened_nonrandomized,\
+               false_positive_randomized, \
+               false_positive_nonrandomized,\
+               coverage_sel/max(float(nactive),1.),\
+               coverage_rand/max(float(nactive),1.), \
+               coverage_nonrand/max(float(nactive_nonrand),1.), \
+               power_sel/float(s), \
+               power_rand/float(s), \
+               power_nonrand/float(s)
 
 if __name__ == "__main__":
 
@@ -281,8 +294,9 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     power_rand = 0.
     power_nonrand = 0.
 
+    count = 0
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.10)
+        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=.1)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -301,29 +315,30 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             power_sel += approx[14]
             power_rand += approx[15]
             power_nonrand += approx[16]
-
-        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
-        sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
-        sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
-
-        sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSO_rand_falsepositives" + str(false_positive_randomized / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSO_norand_falsepositives" + str(false_positive_nonrandomized / float(i + 1)) + "\n")
-
-        sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
-        sys.stderr.write("randomized coverage" + str(coverage_rand / float(i + 1)) + "\n")
-        sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / float(i + 1)) + "\n")
-
-        sys.stderr.write("selective power" + str(power_sel / float(i + 1)) + "\n")
-        sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
-        sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n")
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
+            count += 1
+
+            sys.stderr.write("overall_bias" + str(bias / count) + "\n")
+            sys.stderr.write("overall_selrisk" + str(risk_selMLE / count) + "\n")
+            sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / count) + "\n")
+            sys.stderr.write("overall_indepestrisk" + str(risk_indest / count) + "\n")
+            sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / count) + "\n")
+            sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / count) + "\n")
+            sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / count) + "\n")
+
+            sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / count) + "\n")
+            sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / count) + "\n")
+            sys.stderr.write("overall_LASSO_rand_falsepositives" + str(false_positive_randomized / count) + "\n")
+            sys.stderr.write("overall_LASSO_norand_falsepositives" + str(false_positive_nonrandomized / count) + "\n")
+
+            sys.stderr.write("selective coverage" + str(coverage_sel / count) + "\n")
+            sys.stderr.write("randomized coverage" + str(coverage_rand / count) + "\n")
+            sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / count) + "\n")
+
+            sys.stderr.write("selective power" + str(power_sel / count) + "\n")
+            sys.stderr.write("randomized power" + str(power_rand / count) + "\n")
+            sys.stderr.write("nonrandomized power" + str(power_nonrand / count) + "\n")
+
+            sys.stderr.write("iteration completed, count" + str((i + 1, count)) + "\n")
 
 
 

From 7fdd58ebd62362abe16135939b29a7c8e8ab2198 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 16 Dec 2017 14:56:21 -0800
Subject: [PATCH 445/617] using full target

---
 selection/adjusted_MLE/selective_MLE.py       | 19 +++++++++++++++----
 selection/adjusted_MLE/tests/relaxed_lasso.py |  6 +++---
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index cc8215b49..28eaf1265 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -24,7 +24,7 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
             (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
         self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
 
-        X, _ = self.loss.data
+        X, y = self.loss.data
         n, p = X.shape
         self.p = p
 
@@ -38,9 +38,20 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
         self.score_cov = (sigma**2.) * score_cov
 
         self.observed_score_state = self.observed_internal_state
-        self.target_observed = self.observed_internal_state[:self.nactive]
-        self.score_target_cov = self.score_cov[:, :self.nactive]
-        self.target_cov = self.score_cov[:self.nactive, :self.nactive]
+
+        target = 'full'
+        if target == "partial":
+            self.target_observed = self.observed_internal_state[:self.nactive]
+            self.score_target_cov = self.score_cov[:, :self.nactive]
+            self.target_cov = self.score_cov[:self.nactive, :self.nactive]
+        elif target == 'full':
+            X_full_inv = np.linalg.pinv(X)[self._overall]
+            self.target_observed = X_full_inv.dot(y)       # unique to OLS!!!!
+            self.target_cov = (sigma**2) * X_full_inv.dot(X_full_inv.T)
+            self.score_target_cov = np.zeros((p, self.nactive))
+            self.score_target_cov[:self.nactive] = np.linalg.pinv(X[:,self._overall]).dot(X_full_inv.T)
+            self.score_target_cov[self.nactive:] = X[:, ~self._overall].T.dot(projection_perp.dot(X_full_inv.T))
+            self.score_target_cov *= sigma**2
 
     def solve_map(self):
         #self.feasible_point = np.abs(self.initial_soln[self._overall])
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 0e0c07a44..f8689c7a4 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -105,7 +105,7 @@ def relative_risk(est, truth, Sigma):
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
 def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                     randomization_scale=np.sqrt(0.1)):
+                     randomization_scale=np.sqrt(0.25)):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
@@ -254,7 +254,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     selective_MLE = np.zeros(p)
     selective_MLE[active] = approx_MLE / np.sqrt(n)
 
-    if screened_randomized == 1.:
+    if True: # screened_randomized == 1.:
         return (selective_MLE - target_par).sum() / float(nactive), \
                relative_risk(selective_MLE, target_par, Sigma), \
                relative_risk(relaxed_Lasso, target_par, Sigma), \
@@ -296,7 +296,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
     count = 0
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=.1)
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.2)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 6a1668f7a2ed561d8fb31dd0a41abbd136bd6f01 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 16 Dec 2017 16:07:37 -0800
Subject: [PATCH 446/617] updated changes of target and tuning

---
 selection/adjusted_MLE/selective_MLE.py       |  23 ++-
 selection/adjusted_MLE/tests/relaxed_lasso.py | 141 +++++++++---------
 2 files changed, 91 insertions(+), 73 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index cc8215b49..53e6dfb26 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -24,7 +24,7 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
             (_score_linear_term[self._overall, :], _score_linear_term[~self._overall, :]), 0)
         self.score_transform = (self._score_linear_term, np.zeros(self._score_linear_term.shape[0]))
 
-        X, _ = self.loss.data
+        X, y = self.loss.data
         n, p = X.shape
         self.p = p
 
@@ -38,19 +38,29 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
         self.score_cov = (sigma**2.) * score_cov
 
         self.observed_score_state = self.observed_internal_state
-        self.target_observed = self.observed_internal_state[:self.nactive]
-        self.score_target_cov = self.score_cov[:, :self.nactive]
-        self.target_cov = self.score_cov[:self.nactive, :self.nactive]
+
+        target = 'full'
+        if self.nactive>0:
+            if target == "partial":
+                self.target_observed = self.observed_internal_state[:self.nactive]
+                self.score_target_cov = self.score_cov[:, :self.nactive]
+                self.target_cov = self.score_cov[:self.nactive, :self.nactive]
+            elif target == 'full':
+                X_full_inv = np.linalg.pinv(X)[self._overall]
+                self.target_observed = X_full_inv.dot(y)  # unique to OLS!!!!
+                self.target_cov = (sigma ** 2) * X_full_inv.dot(X_full_inv.T)
+                self.score_target_cov = np.zeros((p, self.nactive))
+                self.score_target_cov[:self.nactive] = np.linalg.pinv(X[:, self._overall]).dot(X_full_inv.T)
+                self.score_target_cov[self.nactive:] = X[:, ~self._overall].T.dot(projection_perp.dot(X_full_inv.T))
+                self.score_target_cov *= sigma ** 2
 
     def solve_map(self):
-        #self.feasible_point = np.abs(self.initial_soln[self._overall])
         self.feasible_point = np.ones(self._overall.sum())
         self.A = np.dot(self._score_linear_term, self.score_target_cov).dot(np.linalg.inv(self.target_cov))
         self.data_offset = self._score_linear_term.dot(self.observed_score_state)- self.A.dot(self.target_observed)
         self.target_transform = (self.A, self.data_offset)
 
     def solve_map_univariate_target(self, j):
-        #self.feasible_point = np.abs(self.initial_soln[self._overall])[j]
         self.feasible_point = np.ones(self._overall.sum())
         self.A = np.dot(self._score_linear_term, self.score_target_cov[:, j]) / self.target_cov[j, j]
         self.data_offset = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
@@ -228,5 +238,6 @@ def solve_barrier_nonneg(conjugate_arg,
 
 
 
+
 
 
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index f9c6f935d..5f0575421 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -10,6 +10,7 @@
 import regreg.api as rr
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
+from scipy.stats import norm as ndist
 
 def glmnet_sigma(X, y):
     robjects.r('''
@@ -59,25 +60,19 @@ def tuned_lasso(X, y, X_val,y_val):
         X = as.matrix(X)
         Y.val = as.vector(Y.val)
         X.val = as.matrix(X.val)
-
         rel.LASSO = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
         LASSO = lasso(X,Y,intercept=FALSE,nlam=50)
         beta.hat.rellasso = as.matrix(coef(rel.LASSO))
         beta.hat.lasso = as.matrix(coef(LASSO))
-
         min.lam = min(rel.LASSO$lambda)
         max.lam = max(rel.LASSO$lambda)
         lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
-
         muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
         muhat.val.lasso = as.matrix(predict(LASSO, X.val))
-
         err.val.rellasso = colMeans((muhat.val.rellasso - Y.val)^2)
         err.val.lasso = colMeans((muhat.val.lasso - Y.val)^2)
-
         opt_lam = ceiling(which.min(err.val.rellasso)/10)
         lambda.tuned = lam.seq[opt_lam]
-
         return(list(beta.hat.rellasso = beta.hat.rellasso[,which.min(err.val.rellasso)],
         beta.hat.lasso = beta.hat.lasso[,which.min(err.val.lasso)],
         lambda.tuned = lambda.tuned, lambda.seq = lam.seq))
@@ -104,8 +99,7 @@ def relative_risk(est, truth, Sigma):
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
 def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                     randomization_scale=np.sqrt(0.25)):
-
+                         randomization_scale=np.sqrt(0.25)):
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
         true_mean = X.dot(beta)
@@ -119,41 +113,47 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         X_val -= X_val.mean(0)[None, :]
         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
-        y -= y.mean()
-        y_val -= y_val.mean()
-
         if p > n:
             sigma_est = np.std(y) / 2.
         else:
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-            print("sigma est", sigma_est)
 
+        y = y - y.mean()
+        y_val = y_val - y_val.mean()
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
 
-
-        lam_seq = np.linspace(0.75, 2.75, num= 100)\
-                  *np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        lam_seq = np.linspace(0.75, 2.75, num=100) \
+                  * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
         err = np.zeros(100)
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         for k in range(100):
             lam = lam_seq[k]
             W = np.ones(p) * lam
             penalty = rr.group_lasso(np.arange(p),
                                      weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-            randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
             M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
                                     sigma=sigma_est)
 
-            M_est.solve_map()
             active = M_est._overall
-            nactive = np.sum(active)
+            nactive = active.sum()
             Lasso_est = np.zeros(p)
-            Lasso_est[active] = M_est.observed_opt_state[:nactive]
-            err[k] = np.mean((y_val-X_val.dot(Lasso_est))**2.)
+            if nactive>0:
+                M_est.solve_map()
+                approx_MLE = solve_UMVU(M_est.target_transform,
+                                        M_est.opt_transform,
+                                        M_est.target_observed,
+                                        M_est.feasible_point,
+                                        M_est.target_cov,
+                                        M_est.randomizer_precision)[0]
+                Lasso_est[active] = approx_MLE
+
+            err[k] = np.mean((y_val - X_val.dot(Lasso_est)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
+        print('lambda', lam)
         randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
@@ -165,27 +165,28 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         nactive = np.sum(active)
 
         print("number of variables selected by randomized LASSO", nactive)
-        print("number of variables selected by tuned LASSO", (rel_LASSO!=0).sum())
+        print("number of variables selected by tuned LASSO", (rel_LASSO != 0).sum())
         true_signals = np.zeros(p, np.bool)
         true_signals[beta != 0] = 1
-        screened_randomized = np.logical_and(active, true_signals).sum() /float(s)
-        screened_nonrandomized = np.logical_and(active_nonrand, true_signals).sum() /float(s)
-        false_positive_randomized = np.logical_and(active, ~true_signals).sum()/max(float(nactive), 1.)
-        false_positive_nonrandomized = np.logical_and(active_nonrand, ~true_signals).sum()/max(float(nactive_nonrand),1.)
+        screened_randomized = np.logical_and(active, true_signals).sum() / float(s)
+        screened_nonrandomized = np.logical_and(active_nonrand, true_signals).sum() / float(s)
+        false_positive_randomized = np.logical_and(active, ~true_signals).sum() / max(float(nactive), 1.)
+        false_positive_nonrandomized = np.logical_and(active_nonrand, ~true_signals).sum() / max(float(nactive_nonrand),
+                                                                                                 1.)
 
         true_set = np.asarray([u for u in range(p) if true_signals[u]])
         active_set = np.asarray([t for t in range(p) if active[t]])
         active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
         active_bool = np.zeros(nactive, np.bool)
         for x in range(nactive):
-            active_bool[x] = (np.in1d(active_set[x],true_set).sum()>0)
-        active_bool_nonrand= np.zeros(nactive_nonrand, np.bool)
+            active_bool[x] = (np.in1d(active_set[x], true_set).sum() > 0)
+        active_bool_nonrand = np.zeros(nactive_nonrand, np.bool)
         for y in range(nactive_nonrand):
-            active_bool_nonrand[y] = (np.in1d(active_set_nonrand[y],true_set).sum()>0)
+            active_bool_nonrand[y] = (np.in1d(active_set_nonrand[y], true_set).sum() > 0)
 
         true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
         unad_sd = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
-        true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])).\
+        true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])). \
             dot(X[:, active_nonrand].T).dot(true_mean)
         unad_sd_nonrand = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
 
@@ -197,11 +198,11 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         power_nonrand = 0.
 
         for k in range(nactive_nonrand):
-            if (rel_LASSO[k]-(1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
-                    and (rel_LASSO[k]+(1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
+            if (rel_LASSO[k] - (1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
+                    and (rel_LASSO[k] + (1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
                 coverage_nonrand += 1
-            if active_bool_nonrand[k] == True and ((rel_LASSO[k]-(1.65 * unad_sd_nonrand[k])) > 0.
-                                                   or (rel_LASSO[k]+(1.65 * unad_sd_nonrand[k])) <0.):
+            if active_bool_nonrand[k] == True and ((rel_LASSO[k] - (1.65 * unad_sd_nonrand[k])) > 0.
+                                                   or (rel_LASSO[k] + (1.65 * unad_sd_nonrand[k])) < 0.):
                 power_nonrand += 1
 
         if nactive > 0:
@@ -220,19 +221,21 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 approx_sd = np.array([approx_sd])
 
             for j in range(nactive):
-                if (approx_MLE[j]-(1.65*approx_sd[j]))<= true_target[j] and (approx_MLE[j] + (1.65*approx_sd[j])) >= true_target[j]:
+                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and (
+                    approx_MLE[j] + (1.65 * approx_sd[j])) >= \
+                        true_target[j]:
                     coverage_sel += 1
-                    #print("selective intervals", (approx_MLE[j]-(1.65*approx_sd[j])),
-                    #      (approx_MLE[j] + (1.65 * approx_sd[j])),
-                    #      true_target[j])
-                if active_bool[j]==True and ((approx_MLE[j]-(1.65*approx_sd[j]))> 0. or (approx_MLE[j] + (1.65*approx_sd[j])) < 0.):
+                if active_bool[j] == True and (
+                                (approx_MLE[j] - (1.65 * approx_sd[j])) > 0. or (
+                            approx_MLE[j] + (1.65 * approx_sd[j])) < 0.):
                     power_sel += 1
-                if (M_est.target_observed[j]-(1.65*unad_sd[j]))<= true_target[j] and (M_est.target_observed[j]+(1.65*unad_sd[j])) >= true_target[j]:
+                if (M_est.target_observed[j] - (1.65 * unad_sd[j])) <= true_target[j] and (
+                            M_est.target_observed[j] + (1.65 * unad_sd[j])) >= true_target[j]:
                     coverage_rand += 1
-                    #print("randomized intervals", (M_est.target_observed[j]-(1.65*unad_sd[j])), (M_est.target_observed[j]+(1.65*unad_sd[j])),
-                    #      true_target[j])
-                if active_bool[j]==True and ((M_est.target_observed[j]-(1.65*unad_sd[j]))>0. or (M_est.target_observed[j]+(1.65*unad_sd[j]))<0.):
+                if active_bool[j] == True and ((M_est.target_observed[j] - (1.65 * unad_sd[j])) > 0. or (
+                            M_est.target_observed[j] + (1.65 * unad_sd[j])) < 0.):
                     power_rand += 1
+
             break
 
     target_par = beta
@@ -258,29 +261,30 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_Sigma = (Sigma[:, active])[active,:]
     partial_Sigma_nonrand = (Sigma[:, active_nonrand])[active_nonrand,:]
 
-    return (selective_MLE - target_par).sum() / float(nactive), \
-           relative_risk(selective_MLE, target_par, Sigma), \
-           relative_risk(relaxed_Lasso, target_par, Sigma), \
-           relative_risk(ind_est, target_par, Sigma),\
-           relative_risk(Lasso_est, target_par, Sigma),\
-           relative_risk(rel_LASSO, target_par, Sigma),\
-           relative_risk(est_LASSO, target_par, Sigma), \
-           screened_randomized,\
-           screened_nonrandomized,\
-           false_positive_randomized, \
-           false_positive_nonrandomized,\
-           coverage_sel/max(float(nactive),1.),\
-           coverage_rand/max(float(nactive),1.), \
-           coverage_nonrand/max(float(nactive_nonrand),1.), \
-           power_sel/float(s), \
-           power_rand/float(s), \
-           power_nonrand/float(s),\
-           relative_risk(partial_selective_MLE, true_target, partial_Sigma),\
-           relative_risk(partial_relaxed_Lasso, true_target, partial_Sigma), \
-           relative_risk(partial_ind_est, true_target, partial_Sigma),\
-           relative_risk(partial_Lasso_est, true_target, partial_Sigma),\
-           relative_risk(np.sqrt(n)*rel_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand),\
-           relative_risk(np.sqrt(n)*est_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand),
+    if True:
+        return (selective_MLE - target_par).sum() / float(nactive), \
+               relative_risk(selective_MLE, target_par, Sigma), \
+               relative_risk(relaxed_Lasso, target_par, Sigma), \
+               relative_risk(ind_est, target_par, Sigma), \
+               relative_risk(Lasso_est, target_par, Sigma), \
+               relative_risk(rel_LASSO, target_par, Sigma), \
+               relative_risk(est_LASSO, target_par, Sigma), \
+               screened_randomized, \
+               screened_nonrandomized, \
+               false_positive_randomized, \
+               false_positive_nonrandomized, \
+               coverage_sel / max(float(nactive), 1.), \
+               coverage_rand / max(float(nactive), 1.), \
+               coverage_nonrand / max(float(nactive_nonrand), 1.), \
+               power_sel / float(s), \
+               power_rand / float(s), \
+               power_nonrand / float(s), \
+               relative_risk(partial_selective_MLE, true_target, partial_Sigma), \
+               relative_risk(partial_relaxed_Lasso, true_target, partial_Sigma), \
+               relative_risk(partial_ind_est, true_target, partial_Sigma), \
+               relative_risk(partial_Lasso_est, true_target, partial_Sigma), \
+               relative_risk(np.sqrt(n) * rel_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand), \
+               relative_risk(np.sqrt(n) * est_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand)
 
 
 if __name__ == "__main__":
@@ -311,7 +315,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.42)
+        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=10, beta_type=2, snr=0.2)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -320,13 +324,16 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             risk_LASSO += approx[4]
             risk_relLASSO_nonrand += approx[5]
             risk_LASSO_nonrand += approx[6]
+
             spower_rand += approx[7]
             spower_nonrand += approx[8]
             false_positive_randomized += approx[9]
             false_positive_nonrandomized += approx[10]
+
             coverage_sel += approx[11]
             coverage_rand += approx[12]
             coverage_nonrand += approx[13]
+
             power_sel += approx[14]
             power_rand += approx[15]
             power_nonrand += approx[16]
@@ -364,7 +371,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         sys.stderr.write("overall_partial_indepestrisk" + str(partial_risk_indest / float(i + 1)) + "\n")
         sys.stderr.write("overall_partial_LASSOrisk" + str(partial_risk_LASSO / float(i + 1)) + "\n")
         sys.stderr.write("overall_partial_relLASSOrisk_norand" + str(partial_risk_relLASSO_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_partial_LASSOrisk_norand" + str(partial_risk_LASSO_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall_partial_LASSOrisk_norand" + str(partial_risk_LASSO_nonrand / float(i + 1)) + "\n"+ "\n")
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
 

From 39b7a4bb451984932b275f13ac3357a277fe07f4 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 16 Dec 2017 20:49:15 -0800
Subject: [PATCH 447/617] added debiased target

---
 selection/adjusted_MLE/selective_MLE.py       | 12 ++++++--
 selection/adjusted_MLE/tests/relaxed_lasso.py | 28 ++++++++++++-------
 selection/algorithms/debiased_lasso.py        |  2 +-
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 53e6dfb26..a286ba250 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -4,7 +4,7 @@
 
 class M_estimator_map(M_estimator):
 
-    def __init__(self, loss, epsilon, penalty, randomization, randomization_scale = 1., sigma= 1.):
+    def __init__(self, loss, epsilon, penalty, randomization, M, randomization_scale = 1., sigma= 1.):
         M_estimator.__init__(self, loss, epsilon, penalty, randomization)
         self.randomizer = randomization
         self.randomization_scale = randomization_scale
@@ -39,7 +39,7 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
 
         self.observed_score_state = self.observed_internal_state
 
-        target = 'full'
+        target = 'debiased'
         if self.nactive>0:
             if target == "partial":
                 self.target_observed = self.observed_internal_state[:self.nactive]
@@ -53,6 +53,14 @@ def __init__(self, loss, epsilon, penalty, randomization, randomization_scale =
                 self.score_target_cov[:self.nactive] = np.linalg.pinv(X[:, self._overall]).dot(X_full_inv.T)
                 self.score_target_cov[self.nactive:] = X[:, ~self._overall].T.dot(projection_perp.dot(X_full_inv.T))
                 self.score_target_cov *= sigma ** 2
+            elif target == 'debiased':
+                X_full_inv = M.dot(X.T)[self._overall]
+                self.target_observed = X_full_inv.dot(y)  # unique to OLS!!!!
+                self.target_cov = (sigma ** 2) * X_full_inv.dot(X_full_inv.T)
+                self.score_target_cov = np.zeros((p, self.nactive))
+                self.score_target_cov[:self.nactive] = np.linalg.pinv(X[:, self._overall]).dot(X_full_inv.T)
+                self.score_target_cov[self.nactive:] = X[:, ~self._overall].T.dot(projection_perp.dot(X_full_inv.T))
+                self.score_target_cov *= sigma ** 2
 
     def solve_map(self):
         self.feasible_point = np.ones(self._overall.sum())
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 5f0575421..c6f83b6db 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -11,6 +11,7 @@
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from scipy.stats import norm as ndist
+from selection.algorithms.debiased_lasso import _find_row_approx_inverse
 
 def glmnet_sigma(X, y):
     robjects.r('''
@@ -99,7 +100,7 @@ def relative_risk(est, truth, Sigma):
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
 def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                         randomization_scale=np.sqrt(0.25)):
+                         randomization_scale=np.sqrt(0.25), target="debiased"):
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
         true_mean = X.dot(beta)
@@ -119,6 +120,13 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
 
+        if target == "debiased":
+            M = np.zeros((p, p))
+            for var in range(p):
+                M[:, var] = _find_row_approx_inverse(X.T.dot(X), var, delta=0.5)
+        else:
+            M = np.identity(p)
+            
         y = y - y.mean()
         y_val = y_val - y_val.mean()
         loss = rr.glm.gaussian(X, y)
@@ -134,7 +142,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             penalty = rr.group_lasso(np.arange(p),
                                      weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
+            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale,
                                     sigma=sigma_est)
 
             active = M_est._overall
@@ -158,7 +166,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale,
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale,
                                 sigma=sigma_est)
         M_est.solve_map()
         active = M_est._overall
@@ -315,7 +323,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=10, beta_type=2, snr=0.2)
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.2)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -366,12 +374,12 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
         sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n"+"\n")
 
-        sys.stderr.write("overall_partial_selrisk" + str(partial_risk_selMLE / float(i + 1)) + "\n")
-        sys.stderr.write("overall_partial_relLASSOrisk" + str(partial_risk_relLASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_partial_indepestrisk" + str(partial_risk_indest / float(i + 1)) + "\n")
-        sys.stderr.write("overall_partial_LASSOrisk" + str(partial_risk_LASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_partial_relLASSOrisk_norand" + str(partial_risk_relLASSO_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_partial_LASSOrisk_norand" + str(partial_risk_LASSO_nonrand / float(i + 1)) + "\n"+ "\n")
+        # sys.stderr.write("overall_partial_selrisk" + str(partial_risk_selMLE / float(i + 1)) + "\n")
+        # sys.stderr.write("overall_partial_relLASSOrisk" + str(partial_risk_relLASSO / float(i + 1)) + "\n")
+        # sys.stderr.write("overall_partial_indepestrisk" + str(partial_risk_indest / float(i + 1)) + "\n")
+        # sys.stderr.write("overall_partial_LASSOrisk" + str(partial_risk_LASSO / float(i + 1)) + "\n")
+        # sys.stderr.write("overall_partial_relLASSOrisk_norand" + str(partial_risk_relLASSO_nonrand / float(i + 1)) + "\n")
+        # sys.stderr.write("overall_partial_LASSOrisk_norand" + str(partial_risk_LASSO_nonrand / float(i + 1)) + "\n"+ "\n")
 
         sys.stderr.write("iteration completed" + str(i) + "\n")
 
diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index c270b233a..b7976c1d5 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -4,7 +4,7 @@
                         l1norm,
                         simple_problem)
 
-from .debiased_lasso_utils import solve_wide_
+#from .debiased_lasso_utils import solve_wide_
 from ..constraints.affine import constraints
 
 def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):

From bdcad4acb6f15b180c12c1c65f0eab6845217456 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 16 Dec 2017 23:18:23 -0800
Subject: [PATCH 448/617] commit changes so far

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index c6f83b6db..c27031ae0 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -100,7 +100,8 @@ def relative_risk(est, truth, Sigma):
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
 def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                         randomization_scale=np.sqrt(0.25), target="debiased"):
+                         randomization_scale=np.sqrt(0.25), target="partial"):
+
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
         true_mean = X.dot(beta)
@@ -126,7 +127,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 M[:, var] = _find_row_approx_inverse(X.T.dot(X), var, delta=0.5)
         else:
             M = np.identity(p)
-            
+
         y = y - y.mean()
         y_val = y_val - y_val.mean()
         loss = rr.glm.gaussian(X, y)
@@ -192,11 +193,25 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         for y in range(nactive_nonrand):
             active_bool_nonrand[y] = (np.in1d(active_set_nonrand[y], true_set).sum() > 0)
 
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
-        unad_sd = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
-        true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])). \
-            dot(X[:, active_nonrand].T).dot(true_mean)
-        unad_sd_nonrand = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
+        if target == "partial":
+            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
+            unad_sd = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
+            true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])). \
+                dot(X[:, active_nonrand].T).dot(true_mean)
+            unad_sd_nonrand = sigma_est * np.sqrt(
+                np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
+        elif target == "full":
+            X_full_inv = np.linalg.pinv(X)
+            true_target = X_full_inv[active].dot(true_mean)
+            unad_sd = sigma_est * np.sqrt(np.diag(X_full_inv[active].dot(X_full_inv[active].T)))
+            true_target_nonrand = X_full_inv[active_nonrand].dot(true_mean)
+            unad_sd_nonrand = sigma_est * np.sqrt(np.diag(X_full_inv[active_nonrand].dot(X_full_inv[active_nonrand].T)))
+        elif target == "debiased":
+            X_full_inv = M.dot(X.T)
+            true_target = X_full_inv[active].dot(true_mean)
+            unad_sd = sigma_est * np.sqrt(np.diag(X_full_inv[active].dot(X_full_inv[active].T)))
+            true_target_nonrand = X_full_inv[active_nonrand].dot(true_mean)
+            unad_sd_nonrand = sigma_est * np.sqrt(np.diag(X_full_inv[active_nonrand].dot(X_full_inv[active_nonrand].T)))
 
         coverage_sel = 0.
         coverage_rand = 0.

From 45c2a54fcab67ce4ca0ad5fb9b281741db4b91a4 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sun, 17 Dec 2017 01:04:03 -0800
Subject: [PATCH 449/617] reverted back to fixing sigma as per snr in sim.xy

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index c27031ae0..e9db36c6e 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -120,6 +120,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         else:
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+            print("sigma and sigma_est", sigma, sigma_est)
 
         if target == "debiased":
             M = np.zeros((p, p))
@@ -136,14 +137,14 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         lam_seq = np.linspace(0.75, 2.75, num=100) \
                   * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
         err = np.zeros(100)
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale*sigma_est)
         for k in range(100):
             lam = lam_seq[k]
             W = np.ones(p) * lam
             penalty = rr.group_lasso(np.arange(p),
                                      weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
-            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale,
+            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale*sigma_est,
                                     sigma=sigma_est)
 
             active = M_est._overall
@@ -163,13 +164,12 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
         lam = lam_seq[np.argmin(err)]
         print('lambda', lam)
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale*sigma_est)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p),
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale,
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale*sigma_est,
                                 sigma=sigma_est)
-        M_est.solve_map()
         active = M_est._overall
         nactive = np.sum(active)
 
@@ -229,6 +229,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 power_nonrand += 1
 
         if nactive > 0:
+            M_est.solve_map()
             approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
                                                                        M_est.opt_transform,
                                                                        M_est.target_observed,
@@ -248,6 +249,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                     approx_MLE[j] + (1.65 * approx_sd[j])) >= \
                         true_target[j]:
                     coverage_sel += 1
+                #print("selective intervals",(approx_MLE[j] - (1.65 * approx_sd[j])), (approx_MLE[j] + (1.65 * approx_sd[j])))
                 if active_bool[j] == True and (
                                 (approx_MLE[j] - (1.65 * approx_sd[j])) > 0. or (
                             approx_MLE[j] + (1.65 * approx_sd[j])) < 0.):
@@ -255,6 +257,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 if (M_est.target_observed[j] - (1.65 * unad_sd[j])) <= true_target[j] and (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) >= true_target[j]:
                     coverage_rand += 1
+                #print("randomized intervals", (M_est.target_observed[j] - (1.65 * unad_sd[j])),
+                #      (M_est.target_observed[j] + (1.65 * unad_sd[j])))
                 if active_bool[j] == True and ((M_est.target_observed[j] - (1.65 * unad_sd[j])) > 0. or (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) < 0.):
                     power_rand += 1
@@ -338,7 +342,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.2)
+        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.1)
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -376,18 +380,18 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
         sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n"+"\n")
 
-        sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSO_rand_falsepositives" + str(false_positive_randomized / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSO_norand_falsepositives" + str(false_positive_nonrandomized / float(i + 1)) + "\n"+"\n")
+        # sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / float(i + 1)) + "\n")
+        # sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / float(i + 1)) + "\n")
+        # sys.stderr.write("overall_LASSO_rand_falsepositives" + str(false_positive_randomized / float(i + 1)) + "\n")
+        # sys.stderr.write("overall_LASSO_norand_falsepositives" + str(false_positive_nonrandomized / float(i + 1)) + "\n"+"\n")
 
         sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
         sys.stderr.write("randomized coverage" + str(coverage_rand / float(i + 1)) + "\n")
         sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / float(i + 1)) + "\n"+"\n")
 
-        sys.stderr.write("selective power" + str(power_sel / float(i + 1)) + "\n")
-        sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
-        sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n"+"\n")
+        # sys.stderr.write("selective power" + str(power_sel / float(i + 1)) + "\n")
+        # sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
+        # sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n"+"\n")
 
         # sys.stderr.write("overall_partial_selrisk" + str(partial_risk_selMLE / float(i + 1)) + "\n")
         # sys.stderr.write("overall_partial_relLASSOrisk" + str(partial_risk_relLASSO / float(i + 1)) + "\n")

From 30464f75484652f4cb2e32a5aa2aee44cec148a0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sun, 17 Dec 2017 15:11:09 -0800
Subject: [PATCH 450/617] normalized y's by sigma_est

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 53 +++++++++----------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index e9db36c6e..5dfb59eab 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -100,7 +100,7 @@ def relative_risk(est, truth, Sigma):
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
 def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                         randomization_scale=np.sqrt(0.25), target="partial"):
+                         randomization_scale=np.sqrt(0.10), target="partial"):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
@@ -130,26 +130,25 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             M = np.identity(p)
 
         y = y - y.mean()
+        y /= sigma_est
         y_val = y_val - y_val.mean()
+        y_val /= sigma_est
+        true_mean /= sigma_est
+
         loss = rr.glm.gaussian(X, y)
         epsilon = 1. / np.sqrt(n)
-
-        lam_seq = np.linspace(0.75, 2.75, num=100) \
-                  * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
+        lam_seq = np.linspace(0.75, 2.75, num=100) * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
         err = np.zeros(100)
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale*sigma_est)
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         for k in range(100):
             lam = lam_seq[k]
             W = np.ones(p) * lam
-            penalty = rr.group_lasso(np.arange(p),
-                                     weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale*sigma_est,
-                                    sigma=sigma_est)
+            penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
+            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale, sigma=1.)
 
             active = M_est._overall
             nactive = active.sum()
-            Lasso_est = np.zeros(p)
+            approx_MLE_est = np.zeros(p)
             if nactive>0:
                 M_est.solve_map()
                 approx_MLE = solve_UMVU(M_est.target_transform,
@@ -158,18 +157,16 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                                         M_est.feasible_point,
                                         M_est.target_cov,
                                         M_est.randomizer_precision)[0]
-                Lasso_est[active] = approx_MLE
+                approx_MLE_est[active] = approx_MLE
 
-            err[k] = np.mean((y_val - X_val.dot(Lasso_est)) ** 2.)
+            err[k] = np.mean((y_val - X_val.dot(approx_MLE_est)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
         print('lambda', lam)
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale*sigma_est)
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale*sigma_est,
-                                sigma=sigma_est)
+        penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale,sigma=1.)
         active = M_est._overall
         nactive = np.sum(active)
 
@@ -195,23 +192,23 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
         if target == "partial":
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
-            unad_sd = sigma_est * np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
+            unad_sd =  np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
             true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])). \
                 dot(X[:, active_nonrand].T).dot(true_mean)
-            unad_sd_nonrand = sigma_est * np.sqrt(
+            unad_sd_nonrand = np.sqrt(
                 np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
         elif target == "full":
             X_full_inv = np.linalg.pinv(X)
             true_target = X_full_inv[active].dot(true_mean)
-            unad_sd = sigma_est * np.sqrt(np.diag(X_full_inv[active].dot(X_full_inv[active].T)))
+            unad_sd = np.sqrt(np.diag(X_full_inv[active].dot(X_full_inv[active].T)))
             true_target_nonrand = X_full_inv[active_nonrand].dot(true_mean)
-            unad_sd_nonrand = sigma_est * np.sqrt(np.diag(X_full_inv[active_nonrand].dot(X_full_inv[active_nonrand].T)))
+            unad_sd_nonrand = np.sqrt(np.diag(X_full_inv[active_nonrand].dot(X_full_inv[active_nonrand].T)))
         elif target == "debiased":
             X_full_inv = M.dot(X.T)
             true_target = X_full_inv[active].dot(true_mean)
-            unad_sd = sigma_est * np.sqrt(np.diag(X_full_inv[active].dot(X_full_inv[active].T)))
+            unad_sd = np.sqrt(np.diag(X_full_inv[active].dot(X_full_inv[active].T)))
             true_target_nonrand = X_full_inv[active_nonrand].dot(true_mean)
-            unad_sd_nonrand = sigma_est * np.sqrt(np.diag(X_full_inv[active_nonrand].dot(X_full_inv[active_nonrand].T)))
+            unad_sd_nonrand = np.sqrt(np.diag(X_full_inv[active_nonrand].dot(X_full_inv[active_nonrand].T)))
 
         coverage_sel = 0.
         coverage_rand = 0.
@@ -271,18 +268,18 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     ind_est[active] = (mle_target_lin.dot(M_est.target_observed) +
                                          mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset)
     partial_ind_est = ind_est[active]
-    ind_est /= np.sqrt(n)
+    ind_est /= (np.sqrt(n)*(1./sigma_est))
 
     relaxed_Lasso = np.zeros(p)
-    relaxed_Lasso[active] = M_est.target_observed / np.sqrt(n)
+    relaxed_Lasso[active] = M_est.target_observed / (np.sqrt(n)*(1./sigma_est))
     partial_relaxed_Lasso = M_est.target_observed
 
     Lasso_est = np.zeros(p)
-    Lasso_est[active] = M_est.observed_opt_state[:nactive] / np.sqrt(n)
+    Lasso_est[active] = M_est.observed_opt_state[:nactive] / (np.sqrt(n)*(1./sigma_est))
     partial_Lasso_est = M_est.observed_opt_state[:nactive]
 
     selective_MLE = np.zeros(p)
-    selective_MLE[active] = approx_MLE / np.sqrt(n)
+    selective_MLE[active] = approx_MLE / (np.sqrt(n)*(1./sigma_est))
     partial_selective_MLE = approx_MLE
 
     partial_Sigma = (Sigma[:, active])[active,:]

From b2b54cebf938121926623ef502f536b55b9db703 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sun, 17 Dec 2017 15:31:32 -0800
Subject: [PATCH 451/617] changed scale of relaxed lasso tuned

---
 selection/adjusted_MLE/selective_MLE.py       |  2 +-
 selection/adjusted_MLE/tests/relaxed_lasso.py | 24 +++++++++----------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index a286ba250..1481c50d0 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -39,7 +39,7 @@ def __init__(self, loss, epsilon, penalty, randomization, M, randomization_scale
 
         self.observed_score_state = self.observed_internal_state
 
-        target = 'debiased'
+        target = 'partial'
         if self.nactive>0:
             if target == "partial":
                 self.target_observed = self.observed_internal_state[:self.nactive]
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 5dfb59eab..e31d909e0 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -100,7 +100,7 @@ def relative_risk(est, truth, Sigma):
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
 def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                         randomization_scale=np.sqrt(0.10), target="partial"):
+                         randomization_scale=np.sqrt(0.25), target="partial"):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
@@ -195,8 +195,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             unad_sd =  np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
             true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])). \
                 dot(X[:, active_nonrand].T).dot(true_mean)
-            unad_sd_nonrand = np.sqrt(
-                np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
+            unad_sd_nonrand = np.sqrt(np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
         elif target == "full":
             X_full_inv = np.linalg.pinv(X)
             true_target = X_full_inv[active].dot(true_mean)
@@ -218,11 +217,11 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         power_nonrand = 0.
 
         for k in range(nactive_nonrand):
-            if (rel_LASSO[k] - (1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
-                    and (rel_LASSO[k] + (1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
+            if ((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
+                    and ((np.sqrt(n)*rel_LASSO[k]/sigma_est) + (1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
                 coverage_nonrand += 1
-            if active_bool_nonrand[k] == True and ((rel_LASSO[k] - (1.65 * unad_sd_nonrand[k])) > 0.
-                                                   or (rel_LASSO[k] + (1.65 * unad_sd_nonrand[k])) < 0.):
+            if active_bool_nonrand[k] == True and (((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])) > 0.
+                                                   or ((np.sqrt(n)*rel_LASSO[k]/sigma_est) + (1.65 * unad_sd_nonrand[k])) < 0.):
                 power_nonrand += 1
 
         if nactive > 0:
@@ -242,11 +241,10 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 approx_sd = np.array([approx_sd])
 
             for j in range(nactive):
-                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and (
-                    approx_MLE[j] + (1.65 * approx_sd[j])) >= \
-                        true_target[j]:
+                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and \
+                                (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
-                #print("selective intervals",(approx_MLE[j] - (1.65 * approx_sd[j])), (approx_MLE[j] + (1.65 * approx_sd[j])))
+                print("selective intervals",(approx_MLE[j] - (1.65 * approx_sd[j])), (approx_MLE[j] + (1.65 * approx_sd[j])))
                 if active_bool[j] == True and (
                                 (approx_MLE[j] - (1.65 * approx_sd[j])) > 0. or (
                             approx_MLE[j] + (1.65 * approx_sd[j])) < 0.):
@@ -254,8 +252,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 if (M_est.target_observed[j] - (1.65 * unad_sd[j])) <= true_target[j] and (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) >= true_target[j]:
                     coverage_rand += 1
-                #print("randomized intervals", (M_est.target_observed[j] - (1.65 * unad_sd[j])),
-                #      (M_est.target_observed[j] + (1.65 * unad_sd[j])))
+                print("randomized intervals", (M_est.target_observed[j] - (1.65 * unad_sd[j])),
+                      (M_est.target_observed[j] + (1.65 * unad_sd[j])))
                 if active_bool[j] == True and ((M_est.target_observed[j] - (1.65 * unad_sd[j])) > 0. or (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) < 0.):
                     power_rand += 1

From ab02c39b07ef4ebdfc6d148015ff0781a7c99eda Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sun, 17 Dec 2017 17:27:09 -0800
Subject: [PATCH 452/617] added target in selection map

---
 selection/adjusted_MLE/selective_MLE.py       |  3 +--
 selection/adjusted_MLE/tests/relaxed_lasso.py | 25 +++++++++++--------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index 1481c50d0..c5d635249 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -4,7 +4,7 @@
 
 class M_estimator_map(M_estimator):
 
-    def __init__(self, loss, epsilon, penalty, randomization, M, randomization_scale = 1., sigma= 1.):
+    def __init__(self, loss, epsilon, penalty, randomization, M, target="partial", randomization_scale = 1., sigma= 1.):
         M_estimator.__init__(self, loss, epsilon, penalty, randomization)
         self.randomizer = randomization
         self.randomization_scale = randomization_scale
@@ -39,7 +39,6 @@ def __init__(self, loss, epsilon, penalty, randomization, M, randomization_scale
 
         self.observed_score_state = self.observed_internal_state
 
-        target = 'partial'
         if self.nactive>0:
             if target == "partial":
                 self.target_observed = self.observed_internal_state[:self.nactive]
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index e31d909e0..16da54846 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -117,15 +117,17 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
         if p > n:
             sigma_est = np.std(y) / 2.
+            print("sigma and sigma_est", sigma, sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
             sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
             print("sigma and sigma_est", sigma, sigma_est)
 
         if target == "debiased":
-            M = np.zeros((p, p))
-            for var in range(p):
-                M[:, var] = _find_row_approx_inverse(X.T.dot(X), var, delta=0.5)
+            # M = np.zeros((p, p))
+            # for var in range(p):
+            #     M[:, var] = _find_row_approx_inverse(X.T.dot(X), var, delta=0.5)
+            M = np.linalg.inv(Sigma)
         else:
             M = np.identity(p)
 
@@ -144,7 +146,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             lam = lam_seq[k]
             W = np.ones(p) * lam
             penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale, sigma=1.)
+            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target, randomization_scale=randomization_scale, sigma=1.)
 
             active = M_est._overall
             nactive = active.sum()
@@ -166,7 +168,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, randomization_scale=randomization_scale,sigma=1.)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target, randomization_scale=randomization_scale,sigma=1.)
         active = M_est._overall
         nactive = np.sum(active)
 
@@ -220,6 +222,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             if ((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
                     and ((np.sqrt(n)*rel_LASSO[k]/sigma_est) + (1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
                 coverage_nonrand += 1
+            #print("tuned nonrandomized intervals", ((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])),
+            #      ((np.sqrt(n) * rel_LASSO[k] / sigma_est) + (1.65 * unad_sd_nonrand[k])))
             if active_bool_nonrand[k] == True and (((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])) > 0.
                                                    or ((np.sqrt(n)*rel_LASSO[k]/sigma_est) + (1.65 * unad_sd_nonrand[k])) < 0.):
                 power_nonrand += 1
@@ -252,8 +256,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 if (M_est.target_observed[j] - (1.65 * unad_sd[j])) <= true_target[j] and (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) >= true_target[j]:
                     coverage_rand += 1
-                print("randomized intervals", (M_est.target_observed[j] - (1.65 * unad_sd[j])),
-                      (M_est.target_observed[j] + (1.65 * unad_sd[j])))
+                #print("randomized intervals", (M_est.target_observed[j] - (1.65 * unad_sd[j])),(M_est.target_observed[j] + (1.65 * unad_sd[j])))
                 if active_bool[j] == True and ((M_est.target_observed[j] - (1.65 * unad_sd[j])) > 0. or (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) < 0.):
                     power_rand += 1
@@ -337,7 +340,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.1)
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.10, target="full")
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
@@ -384,9 +387,9 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         sys.stderr.write("randomized coverage" + str(coverage_rand / float(i + 1)) + "\n")
         sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / float(i + 1)) + "\n"+"\n")
 
-        # sys.stderr.write("selective power" + str(power_sel / float(i + 1)) + "\n")
-        # sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
-        # sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n"+"\n")
+        sys.stderr.write("selective power" + str(power_sel / float(i + 1)) + "\n")
+        sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
+        sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n"+"\n")
 
         # sys.stderr.write("overall_partial_selrisk" + str(partial_risk_selMLE / float(i + 1)) + "\n")
         # sys.stderr.write("overall_partial_relLASSOrisk" + str(partial_risk_relLASSO / float(i + 1)) + "\n")

From ef6c9a6c76a1250699102c47b87c3614581b0963 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 18 Dec 2017 12:03:41 -0800
Subject: [PATCH 453/617] bootstrap intervals very long for high dims

---
 .../tests/high_dim_boot_coverage.py           | 174 ++++++++++++++++++
 selection/adjusted_MLE/tests/relaxed_lasso.py |  33 +++-
 selection/adjusted_MLE/tests/test_MLE_boot.py |   6 +-
 3 files changed, 203 insertions(+), 10 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/high_dim_boot_coverage.py

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
new file mode 100644
index 000000000..cf5c3b7bb
--- /dev/null
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -0,0 +1,174 @@
+from __future__ import print_function
+from rpy2.robjects.packages import importr
+from rpy2 import robjects
+
+import rpy2.robjects.numpy2ri
+rpy2.robjects.numpy2ri.activate()
+
+import statsmodels.api as sm
+import numpy as np, sys
+import regreg.api as rr
+from selection.randomized.api import randomization
+from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
+from scipy.stats import norm as ndist
+from selection.algorithms.debiased_lasso import _find_row_approx_inverse
+
+def glmnet_sigma(X, y):
+    robjects.r('''
+                glmnet_cv = function(X,y){
+                y = as.matrix(y)
+                X = as.matrix(X)
+                n = nrow(X)
+                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
+                lam_1se = out$lambda.1se
+                lam_min = out$lambda.min
+                return(list(lam_min = n * as.numeric(lam_min), lam_1se = n* as.numeric(lam_1se)))
+                }''')
+
+    lambda_cv_R = robjects.globalenv['glmnet_cv']
+    n, p = X.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+
+    lam = lambda_cv_R(r_X, r_y)
+    lam_min = np.array(lam.rx2('lam_min'))
+    lam_1se = np.array(lam.rx2('lam_1se'))
+    return lam_min, lam_1se
+
+
+def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
+    robjects.r('''
+    source('~/best-subset/bestsubset/R/sim.R')
+    ''')
+
+    r_simulate = robjects.globalenv['sim.xy']
+    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
+    X = np.array(sim.rx2('x'))
+    y = np.array(sim.rx2('y'))
+    X_val = np.array(sim.rx2('xval'))
+    y_val = np.array(sim.rx2('yval'))
+    Sigma = np.array(sim.rx2('Sigma'))
+    beta = np.array(sim.rx2('beta'))
+    sigma = np.array(sim.rx2('sigma'))
+
+    return X, y, X_val, y_val, Sigma, beta, sigma
+
+def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
+                     randomization_scale=np.sqrt(0.25), target="partial"):
+    while True:
+        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
+        true_mean = X.dot(beta)
+
+        X -= X.mean(0)[None, :]
+        X /= (X.std(0)[None, :] * np.sqrt(n))
+
+        X_val -= X_val.mean(0)[None, :]
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
+
+        if p > n:
+            #sigma_est = np.std(y) / 2.
+            sigma_est = np.std(y)
+            print("sigma and sigma_est", sigma, sigma_est)
+        else:
+            ols_fit = sm.OLS(y, X).fit()
+            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
+            print("sigma and sigma_est", sigma, sigma_est)
+
+        y = y - y.mean()
+        y /= sigma_est
+        y_val = y_val - y_val.mean()
+        y_val /= sigma_est
+        true_mean /= sigma_est
+
+        loss = rr.glm.gaussian(X, y)
+        epsilon = 1. / np.sqrt(n)
+        lam_seq = np.linspace(0.75, 2.75, num=100) * np.mean(
+            np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+        err = np.zeros(100)
+        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
+        M = np.identity(p)
+        for k in range(100):
+            lam = lam_seq[k]
+            W = np.ones(p) * lam
+            penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
+            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target,
+                                    randomization_scale=randomization_scale, sigma=1.)
+
+            active = M_est._overall
+            nactive = active.sum()
+            approx_MLE_est = np.zeros(p)
+            if nactive > 0:
+                M_est.solve_map()
+                approx_MLE = solve_UMVU(M_est.target_transform,
+                                        M_est.opt_transform,
+                                        M_est.target_observed,
+                                        M_est.feasible_point,
+                                        M_est.target_cov,
+                                        M_est.randomizer_precision)[0]
+                approx_MLE_est[active] = approx_MLE
+
+            err[k] = np.mean((y_val - X_val.dot(approx_MLE_est)) ** 2.)
+
+        lam = lam_seq[np.argmin(err)]
+        print('lambda', lam)
+        W = np.ones(p) * lam
+        penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target,
+                                randomization_scale=randomization_scale, sigma=1.)
+        active = M_est._overall
+        nactive = np.sum(active)
+
+        print("number of variables selected by randomized LASSO", nactive)
+
+        if nactive > 0:
+            M_est.solve_map()
+            approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
+                                                                       M_est.opt_transform,
+                                                                       M_est.target_observed,
+                                                                       M_est.feasible_point,
+                                                                       M_est.target_cov,
+                                                                       M_est.randomizer_precision)
+
+            approx_sd = np.sqrt(np.diag(var))
+            B = 1000
+            boot_pivot = np.zeros((B, nactive))
+            resid = y - X[:, active].dot(M_est.target_observed)
+            for b in range(B):
+                boot_indices = np.random.choice(n, n, replace=True)
+                boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
+                target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
+                boot_mle = mle_map(target_boot)
+                #print("target_boot", boot_mle[0], approx_MLE)
+                boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
+
+            approx_sd = boot_pivot.std(0)
+
+            if nactive == 1:
+                approx_MLE = np.array([approx_MLE])
+                approx_sd = np.array([approx_sd])
+
+            coverage_sel = 0
+            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
+            for j in range(nactive):
+                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and \
+                                (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
+                    coverage_sel += 1
+                print("selective intervals", (approx_MLE[j] - (1.65 * approx_sd[j])),
+                      (approx_MLE[j] + (1.65 * approx_sd[j])))
+
+            break
+
+    if True:
+        return coverage_sel/float(nactive)
+
+if __name__ == "__main__":
+
+    ndraw = 100
+    coverage_sel = 0.
+
+    for i in range(ndraw):
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
+        if approx is not None:
+            coverage_sel += approx
+
+        sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 16da54846..6add0d5e7 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -117,6 +117,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
         if p > n:
             sigma_est = np.std(y) / 2.
+            #sigma_est = np.std(y)
             print("sigma and sigma_est", sigma, sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
@@ -165,10 +166,10 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
         lam = lam_seq[np.argmin(err)]
         print('lambda', lam)
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
         W = np.ones(p) * lam
         penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target, randomization_scale=randomization_scale,sigma=1.)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target,
+                                randomization_scale=randomization_scale,sigma=1.)
         active = M_est._overall
         nactive = np.sum(active)
 
@@ -189,8 +190,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         for x in range(nactive):
             active_bool[x] = (np.in1d(active_set[x], true_set).sum() > 0)
         active_bool_nonrand = np.zeros(nactive_nonrand, np.bool)
-        for y in range(nactive_nonrand):
-            active_bool_nonrand[y] = (np.in1d(active_set_nonrand[y], true_set).sum() > 0)
+        for w in range(nactive_nonrand):
+            active_bool_nonrand[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
 
         if target == "partial":
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
@@ -222,8 +223,6 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             if ((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
                     and ((np.sqrt(n)*rel_LASSO[k]/sigma_est) + (1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
                 coverage_nonrand += 1
-            #print("tuned nonrandomized intervals", ((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])),
-            #      ((np.sqrt(n) * rel_LASSO[k] / sigma_est) + (1.65 * unad_sd_nonrand[k])))
             if active_bool_nonrand[k] == True and (((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])) > 0.
                                                    or ((np.sqrt(n)*rel_LASSO[k]/sigma_est) + (1.65 * unad_sd_nonrand[k])) < 0.):
                 power_nonrand += 1
@@ -240,6 +239,22 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             mle_target_lin, mle_soln_lin, mle_offset = mle_transform
             approx_sd = np.sqrt(np.diag(var))
 
+            if p>n:
+                B = 1000
+                boot_pivot = np.zeros((B, nactive))
+                resid = y - X[:, active].dot(M_est.target_observed)
+                for b in range(B):
+                    boot_indices = np.random.choice(n, n, replace=True)
+                    boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
+                    #target_boot = (np.linalg.inv(X.T.dot(X)).dot(X[boot_indices, :].T))[active].dot(resid[boot_indices]) + M_est.target_observed
+                    target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
+                    #print("check", target_boot, M_est.target_observed)
+                    boot_mle = mle_map(target_boot)
+                    #print("target_boot", boot_mle[0], approx_MLE)
+                    boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
+
+                boot_sd = boot_pivot.std(0)
+
             if nactive == 1:
                 approx_MLE = np.array([approx_MLE])
                 approx_sd = np.array([approx_sd])
@@ -249,6 +264,8 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                                 (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
                 print("selective intervals",(approx_MLE[j] - (1.65 * approx_sd[j])), (approx_MLE[j] + (1.65 * approx_sd[j])))
+                if p>n:
+                    print("boot intervals", (approx_MLE[j] - (1.65 * boot_sd[j])), (approx_MLE[j] + (1.65 * boot_sd[j])))
                 if active_bool[j] == True and (
                                 (approx_MLE[j] - (1.65 * approx_sd[j])) > 0. or (
                             approx_MLE[j] + (1.65 * approx_sd[j])) < 0.):
@@ -256,7 +273,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 if (M_est.target_observed[j] - (1.65 * unad_sd[j])) <= true_target[j] and (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) >= true_target[j]:
                     coverage_rand += 1
-                #print("randomized intervals", (M_est.target_observed[j] - (1.65 * unad_sd[j])),(M_est.target_observed[j] + (1.65 * unad_sd[j])))
+                print("randomized intervals", (M_est.target_observed[j] - (1.65 * unad_sd[j])),(M_est.target_observed[j] + (1.65 * unad_sd[j])))
                 if active_bool[j] == True and ((M_est.target_observed[j] - (1.65 * unad_sd[j])) > 0. or (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) < 0.):
                     power_rand += 1
@@ -340,7 +357,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.10, target="full")
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.10, target="partial")
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
index 73131cd81..ca0d4b825 100644
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ b/selection/adjusted_MLE/tests/test_MLE_boot.py
@@ -111,7 +111,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
                                  weights=dict(zip(np.arange(p), W)), lagrange=1.)
 
         randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma_est)
+        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M= np.identity(p), target="partial", randomization_scale=randomization_scale, sigma=1.)
 
         M_est.solve_map()
         active = M_est._overall
@@ -137,6 +137,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
                 boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
                 target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
                 boot_mle = mle_map(target_boot)
+                #print("boot mle", boot_mle[0], approx_MLE)
                 boot_pivot[b, :] = np.true_divide(boot_mle[0]- approx_MLE, np.sqrt(np.diag(boot_mle[1])))
                 #sys.stderr.write("bootstrap sample" + str(b) + "\n")
 
@@ -144,6 +145,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
             for j in range(nactive):
                 if (approx_MLE[j] - (1.65 * boot_std[j])) <= true_target[j] and true_target[j] <= (approx_MLE[j] + (1.65 * boot_std[j])):
                     coverage[j] += 1
+                print("intervals", (approx_MLE[j] - (1.65 * boot_std[j])), (approx_MLE[j] + (1.65 * boot_std[j])))
             break
 
     return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0), \
@@ -192,7 +194,7 @@ def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., rand
     coverage = 0.
 
     for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=500, p=100, s=5, signal=3., B=1200)
+        approx = boot_pivot_approx_var(n=100, p=1000, s=5, signal=1.42, B=500)
         if approx is not None:
             pivot_boot = approx[3]
             bias += approx[4]

From d0f853ce08a4f373f8ac1740fbc5744f5ab00185 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 18 Dec 2017 12:54:29 -0800
Subject: [PATCH 454/617] commit changes

---
 selection/adjusted_MLE/tests/high_dim_boot_coverage.py | 8 ++++----
 selection/adjusted_MLE/tests/relaxed_lasso.py          | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index cf5c3b7bb..776b5537b 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -66,8 +66,8 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
         if p > n:
-            #sigma_est = np.std(y) / 2.
-            sigma_est = np.std(y)
+            sigma_est = np.std(y) / 2.
+            #sigma_est = np.std(y)
             print("sigma and sigma_est", sigma, sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
@@ -130,7 +130,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
                                                                        M_est.randomizer_precision)
 
             approx_sd = np.sqrt(np.diag(var))
-            B = 1000
+            B = 2000
             boot_pivot = np.zeros((B, nactive))
             resid = y - X[:, active].dot(M_est.target_observed)
             for b in range(B):
@@ -167,7 +167,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
     coverage_sel = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
+        approx = inference_approx(n=300, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
         if approx is not None:
             coverage_sel += approx
 
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 6add0d5e7..48f340744 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -117,7 +117,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
         if p > n:
             sigma_est = np.std(y) / 2.
-            #sigma_est = np.std(y)
+            #sigma_est = sigma
             print("sigma and sigma_est", sigma, sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
@@ -357,7 +357,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.10, target="partial")
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 641d1b6790e66caa4dbce9d01b189f95075838e0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 18 Dec 2017 14:07:13 -0800
Subject: [PATCH 455/617] installed best subset package-- enlist is changed

---
 selection/adjusted_MLE/tests/high_dim_boot_coverage.py | 7 ++++---
 selection/adjusted_MLE/tests/relaxed_lasso.py          | 8 +++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index 776b5537b..b4e5f4db8 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -153,8 +153,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
                 if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and \
                                 (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
-                print("selective intervals", (approx_MLE[j] - (1.65 * approx_sd[j])),
-                      (approx_MLE[j] + (1.65 * approx_sd[j])))
+                print("selective intervals", (approx_MLE[j] - (1.65 * approx_sd[j])),(approx_MLE[j] + (1.65 * approx_sd[j])))
 
             break
 
@@ -167,8 +166,10 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
     coverage_sel = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=300, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
+        approx = inference_approx(n=1000, p=2000, nval=500, rho=0.35, s=20, beta_type=2, snr=0.10, target="partial")
         if approx is not None:
             coverage_sel += approx
 
         sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
+
+        sys.stderr.write("iteration completed" + str(i) + "\n")
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 48f340744..a05b0bd88 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -38,10 +38,12 @@ def glmnet_sigma(X, y):
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
-    source('~/best-subset/bestsubset/R/sim.R')
+    #source('~/best-subset/bestsubset/R/sim.R')
+    library(bestsubset)
+    sim_xy = bestsubset::sim.xy
     ''')
 
-    r_simulate = robjects.globalenv['sim.xy']
+    r_simulate = robjects.globalenv['sim_xy']
     sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
     X = np.array(sim.rx2('x'))
     y = np.array(sim.rx2('y'))
@@ -55,7 +57,7 @@ def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
 
 def tuned_lasso(X, y, X_val,y_val):
     robjects.r('''
-        source('~/best-subset/bestsubset/R/lasso.R')
+        #source('~/best-subset/bestsubset/R/lasso.R')
         tuned_lasso_estimator = function(X,Y,X.val,Y.val){
         Y = as.matrix(Y)
         X = as.matrix(X)

From dc8ad9974a788aa3eea140c8f045dbba3073b4f7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 18 Dec 2017 22:23:08 -0800
Subject: [PATCH 456/617] commit before switch

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index a05b0bd88..afbc8cb5c 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -38,8 +38,7 @@ def glmnet_sigma(X, y):
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
-    #source('~/best-subset/bestsubset/R/sim.R')
-    library(bestsubset)
+    library(bestsubset) #source('~/best-subset/bestsubset/R/sim.R')
     sim_xy = bestsubset::sim.xy
     ''')
 
@@ -359,7 +358,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="full")
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 778feaa8f3967abfa4318ab073384e9129ed13ea Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 19 Dec 2017 12:26:53 -0800
Subject: [PATCH 457/617] commit before switch

---
 selection/adjusted_MLE/tests/high_dim_boot_coverage.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index b4e5f4db8..d07b76e7f 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -38,10 +38,11 @@ def glmnet_sigma(X, y):
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
-    source('~/best-subset/bestsubset/R/sim.R')
+    library(bestsubset) #source('~/best-subset/bestsubset/R/sim.R')
+    sim_xy = bestsubset::sim.xy
     ''')
 
-    r_simulate = robjects.globalenv['sim.xy']
+    r_simulate = robjects.globalenv['sim_xy']
     sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
     X = np.array(sim.rx2('x'))
     y = np.array(sim.rx2('y'))
@@ -137,6 +138,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
                 boot_indices = np.random.choice(n, n, replace=True)
                 boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
                 target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
+                #print("target_boot", target_boot)
                 boot_mle = mle_map(target_boot)
                 #print("target_boot", boot_mle[0], approx_MLE)
                 boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
@@ -162,11 +164,11 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
 
 if __name__ == "__main__":
 
-    ndraw = 100
+    ndraw = 50
     coverage_sel = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=1000, p=2000, nval=500, rho=0.35, s=20, beta_type=2, snr=0.10, target="partial")
+        approx = inference_approx(n=2000, p=1000, nval=1000, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
         if approx is not None:
             coverage_sel += approx
 

From 18dd4583273e9887673f62df817b22b2494f473f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 19 Dec 2017 16:43:28 -0800
Subject: [PATCH 458/617] run qq-norm diagnostic

---
 .../tests/high_dim_boot_coverage.py           | 59 +++++++++++--------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index d07b76e7f..9ab61aca8 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -11,7 +11,7 @@
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 from scipy.stats import norm as ndist
-from selection.algorithms.debiased_lasso import _find_row_approx_inverse
+import scipy.stats as stats
 
 def glmnet_sigma(X, y):
     robjects.r('''
@@ -68,7 +68,6 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
 
         if p > n:
             sigma_est = np.std(y) / 2.
-            #sigma_est = np.std(y)
             print("sigma and sigma_est", sigma, sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
@@ -131,23 +130,23 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
                                                                        M_est.randomizer_precision)
 
             approx_sd = np.sqrt(np.diag(var))
-            B = 2000
-            boot_pivot = np.zeros((B, nactive))
-            resid = y - X[:, active].dot(M_est.target_observed)
-            for b in range(B):
-                boot_indices = np.random.choice(n, n, replace=True)
-                boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
-                target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
-                #print("target_boot", target_boot)
-                boot_mle = mle_map(target_boot)
-                #print("target_boot", boot_mle[0], approx_MLE)
-                boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-
-            approx_sd = boot_pivot.std(0)
-
-            if nactive == 1:
-                approx_MLE = np.array([approx_MLE])
-                approx_sd = np.array([approx_sd])
+            # B = 2000
+            # boot_pivot = np.zeros((B, nactive))
+            # resid = y - X[:, active].dot(M_est.target_observed)
+            # for b in range(B):
+            #     boot_indices = np.random.choice(n, n, replace=True)
+            #     boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
+            #     target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
+            #     #print("target_boot", target_boot)
+            #     boot_mle = mle_map(target_boot)
+            #     #print("target_boot", boot_mle[0], approx_MLE)
+            #     boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
+            #
+            # approx_sd = boot_pivot.std(0)
+
+            #if nactive == 1:
+            #    approx_MLE = np.array([approx_MLE])
+            #    approx_sd = np.array([approx_sd])
 
             coverage_sel = 0
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
@@ -160,18 +159,28 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
             break
 
     if True:
-        return coverage_sel/float(nactive)
+        return coverage_sel/float(nactive), np.true_divide(approx_MLE- true_target, approx_sd)
 
 if __name__ == "__main__":
 
-    ndraw = 50
+    import matplotlib.pyplot as plt
+    ndraw = 100
     coverage_sel = 0.
-
+    pivot_obs_info = []
     for i in range(ndraw):
-        approx = inference_approx(n=2000, p=1000, nval=1000, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
         if approx is not None:
-            coverage_sel += approx
+            coverage_sel += approx[0]
+            pivot = approx[1]
+            for j in range(pivot.shape[0]):
+                pivot_obs_info.append(pivot[j])
 
         sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
-
         sys.stderr.write("iteration completed" + str(i) + "\n")
+        sys.stderr.write("pivot" + str(pivot_obs_info) + "\n")
+
+    stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=plt)
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/high_10.png")
+
+
+

From 29f257e5074fd67796bff6d9c7a3b020c3021923 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 20 Dec 2017 15:03:34 -0800
Subject: [PATCH 459/617] commit before switch

---
 selection/adjusted_MLE/tests/high_dim_boot_coverage.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index 9ab61aca8..457fa50fc 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -144,9 +144,9 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
             #
             # approx_sd = boot_pivot.std(0)
 
-            #if nactive == 1:
-            #    approx_MLE = np.array([approx_MLE])
-            #    approx_sd = np.array([approx_sd])
+            if nactive == 1:
+                approx_MLE = np.array([approx_MLE])
+                approx_sd = np.array([approx_sd])
 
             coverage_sel = 0
             true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
@@ -168,7 +168,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
     coverage_sel = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="partial")
+        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.20, target="partial")
         if approx is not None:
             coverage_sel += approx[0]
             pivot = approx[1]
@@ -180,7 +180,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
         sys.stderr.write("pivot" + str(pivot_obs_info) + "\n")
 
     stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=plt)
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/high_10.png")
+    plt.savefig("/Users/snigdhapanigrahi/Desktop/high_10_0.20_.png")
 
 
 

From 7fc650ee02f4b777cb1516c1e4cf0ff5f68a47fe Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Dec 2017 13:04:56 -0800
Subject: [PATCH 460/617] updates to R and C software

---
 C-software | 2 +-
 R-software | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/C-software b/C-software
index 0b35c6ed8..ec6a954d6 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 0b35c6ed8537cef9aabed526b968b1c63d2f6cb8
+Subproject commit ec6a954d6b335439115e961abde91fa5a07a3669
diff --git a/R-software b/R-software
index 9e7a08192..232760d6a 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit 9e7a081924179ed93469aac41f596ff1dd5b21bb
+Subproject commit 232760d6aef5182e040b82e30555f4af5ad6803c

From a2638641de86c0e2c788c1f14c7de99f083b6ad5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 21 Dec 2017 15:55:45 -0800
Subject: [PATCH 461/617] adding doc to selective MLE code

---
 C-software                                    |   2 +-
 selection/adjusted_MLE/selective_MLE.py       | 195 +++++----------
 selection/adjusted_MLE/tests/relaxed_lasso.py |  44 ++--
 selection/randomized/selective_MLE.py         | 233 ++++++++++++++++++
 selection/randomized/selective_MLE_utils.pyx  |  22 +-
 .../randomized/tests/test_selective_MLE.py    |  92 +++++++
 6 files changed, 427 insertions(+), 161 deletions(-)
 create mode 100644 selection/randomized/selective_MLE.py
 create mode 100644 selection/randomized/tests/test_selective_MLE.py

diff --git a/C-software b/C-software
index ec6a954d6..fc60f471e 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit ec6a954d6b335439115e961abde91fa5a07a3669
+Subproject commit fc60f471ecd5fc40b822ee36d46b1a5aaf7ce7e8
diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index c5d635249..213d499d6 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -1,6 +1,7 @@
 import numpy as np
 import functools
-from selection.randomized.M_estimator import M_estimator
+from ..randomized.M_estimator import M_estimator
+from ..randomized.selective_MLE import solve_barrier_nonneg
 
 class M_estimator_map(M_estimator):
 
@@ -73,158 +74,98 @@ def solve_map_univariate_target(self, j):
         self.data_offset = self._score_linear_term.dot(self.observed_score_state) - self.A * self.target_observed[j]
         self.target_transform = (self.A.reshape((self.A.shape[0],1)),self.data_offset)
 
+# def solve_UMVU(target_transform,
+#                opt_transform,
+#                target_observed,
+#                feasible_point,
+#                target_cov,
+#                randomizer_precision,
+#                step=1,
+#                nstep=30,
+#                tol=1.e-8):
 
-def solve_UMVU(target_transform,
-               opt_transform,
-               target_observed,
-               feasible_point,
-               target_cov,
-               randomizer_precision,
-               step=1,
-               nstep=30,
-               tol=1.e-8):
+#     A, data_offset = target_transform # data_offset = N
+#     B, opt_offset = opt_transform     # opt_offset = u
 
-    A, data_offset = target_transform # data_offset = N
-    B, opt_offset = opt_transform     # opt_offset = u
+#     nopt = B.shape[1]
+#     ntarget = A.shape[1]
 
-    nopt = B.shape[1]
-    ntarget = A.shape[1]
+#     #assert ntarget == 1
 
-    #assert ntarget == 1
+#     # setup joint implied covariance matrix
 
-    # setup joint implied covariance matrix
+#     target_precision = np.linalg.inv(target_cov)
 
-    target_precision = np.linalg.inv(target_cov)
+#     implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
+#     implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
+#     implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
+#     implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
+#     implied_precision[ntarget:,ntarget:] = B.T.dot(randomizer_precision).dot(B)
+#     implied_cov = np.linalg.inv(implied_precision)
 
-    implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
-    implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
-    implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
-    implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
-    implied_precision[ntarget:,ntarget:] = B.T.dot(randomizer_precision).dot(B)
-    implied_cov = np.linalg.inv(implied_precision)
+#     implied_opt = implied_cov[ntarget:,ntarget:]
+#     implied_target = implied_cov[:ntarget,:ntarget]
+#     implied_cross = implied_cov[:ntarget,ntarget:]
 
-    implied_opt = implied_cov[ntarget:,ntarget:]
-    implied_target = implied_cov[:ntarget,:ntarget]
-    implied_cross = implied_cov[:ntarget,ntarget:]
+#     L = implied_cross.dot(np.linalg.inv(implied_opt))
+#     M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
+#     M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
-    L = implied_cross.dot(np.linalg.inv(implied_opt))
-    M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
-    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
+#     #print("check matrices", M_1, M_2, L, data_offset, opt_offset)
 
-    #print("check matrices", M_1, M_2, L, data_offset, opt_offset)
+#     conditioned_value = data_offset + opt_offset
 
-    conditioned_value = data_offset + opt_offset
+#     linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
+#     offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
 
-    linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
-    offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
+#     natparam_transform = (linear_term, offset_term)
+#     conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
 
-    natparam_transform = (linear_term, offset_term)
-    conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
+#     conditional_precision = implied_precision[ntarget:,ntarget:]
 
-    conditional_precision = implied_precision[ntarget:,ntarget:]
+#     M_1_inv = np.linalg.inv(M_1)
+#     mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
+#     mle_transform = (M_1_inv, -M_1_inv.dot(L), mle_offset_term)
+#     var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
+#                      -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
 
-    M_1_inv = np.linalg.inv(M_1)
-    mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
-    mle_transform = (M_1_inv, -M_1_inv.dot(L), mle_offset_term)
-    var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
-                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
+#     cross_covariance = np.linalg.inv(implied_precision[:ntarget, :ntarget]).dot(implied_precision[:ntarget, ntarget:])
+#     var_matrices = (np.linalg.inv(implied_opt), np.linalg.inv(implied_precision[:ntarget,:ntarget]),
+#                     cross_covariance,target_precision)
 
-    cross_covariance = np.linalg.inv(implied_precision[:ntarget, :ntarget]).dot(implied_precision[:ntarget, ntarget:])
-    var_matrices = (np.linalg.inv(implied_opt), np.linalg.inv(implied_precision[:ntarget,:ntarget]),
-                    cross_covariance,target_precision)
+#     def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
+#                 feasible_point, conditional_precision, target_observed):
 
-    def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
-                feasible_point, conditional_precision, target_observed):
+#         param_lin, param_offset = natparam_transform
+#         mle_target_lin, mle_soln_lin, mle_offset = mle_transform
 
-        param_lin, param_offset = natparam_transform
-        mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+#         value, soln, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
+#                                               conditional_precision,
+#                                               max_iter=200)
 
-        soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
-                                              conditional_precision,
-                                              feasible_point=feasible_point)
+#         selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
 
-        selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
+#         var_target_lin, var_offset = var_transform
+#         var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
+#         p = var_precision.shape[0]
+#         grad, opt_val, opt_proposed = np.ones((3, p), np.float)
+#         scaling = np.sqrt(np.diag(conditional_precision))
 
-        var_target_lin, var_offset = var_transform
-        var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
-        _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
-                                          var_precision,
-                                          feasible_point=None,
-                                          step=1,
-                                          nstep=250)
+#         _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
+#                                           var_precision)
 
-        hessian = target_precision.dot(inv_precision_target +
-                                       cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
+#         hessian = target_precision.dot(inv_precision_target +
+#                                        cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
 
-        return selective_MLE, np.linalg.inv(hessian)
+#         return selective_MLE, np.linalg.inv(hessian)
 
-    mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, var_transform, var_matrices,
-                                    feasible_point, conditional_precision)
-    sel_MLE, inv_hessian = mle_partial(target_observed)
+#     mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, var_transform, var_matrices,
+#                                     feasible_point, conditional_precision)
+#     sel_MLE, inv_hessian = mle_partial(target_observed)
 
-    implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value), offset_term])
+#     implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value), offset_term])
 
-    return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
-
-def solve_barrier_nonneg(conjugate_arg,
-                         precision,
-                         feasible_point=None,
-                         step=1,
-                         nstep=150,
-                         tol=1.e-8):
-
-    scaling = np.sqrt(np.diag(precision))
-
-    if feasible_point is None:
-        feasible_point = 1. / scaling
-
-    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
-    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
-    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
-
-    current = feasible_point
-    current_value = np.inf
-
-    for itercount in range(nstep):
-        newton_step = grad(current)
-
-        # make sure proposal is feasible
-
-        count = 0
-        while True:
-            count += 1
-            proposal = current - step * newton_step
-            if np.all(proposal > 0):
-                break
-            step *= 0.5
-            if count >= 40:
-                raise ValueError('not finding a feasible point')
-
-        # make sure proposal is a descent
-
-        count = 0
-        while True:
-            proposal = current - step * newton_step
-            proposed_value = objective(proposal)
-            if proposed_value <= current_value:
-                break
-            step *= 0.5
-
-        # stop if relative decrease is small
-
-        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
-            current = proposal
-            current_value = proposed_value
-            break
-
-        current = proposal
-        current_value = proposed_value
-
-        if itercount % 4 == 0:
-            step *= 2
-
-    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
-    return current, current_value, hess
+#     return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
 
 
 
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index 0c03bbdc4..002aa23ab 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -1,17 +1,20 @@
 from __future__ import print_function, division
+from scipy.stats import norm as ndist
+import numpy as np, sys
+
+import regreg.api as rr
+import statsmodels.api as sm
+
+# rpy2 imports
+
 from rpy2.robjects.packages import importr
 from rpy2 import robjects
-
 import rpy2.robjects.numpy2ri
 rpy2.robjects.numpy2ri.activate()
 
-import statsmodels.api as sm
-import numpy as np, sys
-import regreg.api as rr
 from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from scipy.stats import norm as ndist
-from selection.algorithms.debiased_lasso import _find_row_approx_inverse
+from selection.randomized.selective_MLE import selective_MLE as solve_selective_MLE
+from selection.adjusted_MLE.selective_MLE import M_estimator_map
 
 def glmnet_sigma(X, y):
     robjects.r('''
@@ -127,9 +130,6 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             print("sigma and sigma_est", sigma, sigma_est)
 
         if target == "debiased":
-            # M = np.zeros((p, p))
-            # for var in range(p):
-            #     M[:, var] = _find_row_approx_inverse(X.T.dot(X), var, delta=0.5)
             M = np.linalg.inv(Sigma)
         else:
             M = np.identity(p)
@@ -157,12 +157,12 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             approx_MLE_est = np.zeros(p)
             if nactive>0:
                 M_est.solve_map()
-                approx_MLE = solve_UMVU(M_est.target_transform,
-                                        M_est.opt_transform,
-                                        M_est.target_observed,
-                                        M_est.feasible_point,
-                                        M_est.target_cov,
-                                        M_est.randomizer_precision)[0]
+                approx_MLE = solve_selective_MLE(M_est.target_observed,
+                                                 M_est.target_cov,
+                                                 M_est.target_transform,
+                                                 M_est.opt_transform,
+                                                 M_est.feasible_point,
+                                                 M_est.randomizer_precision)[0]
                 approx_MLE_est[active] = approx_MLE
 
             err[k] = np.mean((y_val - X_val.dot(approx_MLE_est)) ** 2.)
@@ -233,12 +233,12 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
 
         if nactive > 0:
             M_est.solve_map()
-            approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
-                                                                       M_est.opt_transform,
-                                                                       M_est.target_observed,
-                                                                       M_est.feasible_point,
-                                                                       M_est.target_cov,
-                                                                       M_est.randomizer_precision)
+            approx_MLE, var, mle_map, _, _, mle_transform = solve_selective_MLE(M_est.target_observed,
+                                                                                M_est.target_cov,
+                                                                                M_est.target_transform,
+                                                                                M_est.opt_transform,
+                                                                                M_est.feasible_point,
+                                                                                M_est.randomizer_precision)
 
             mle_target_lin, mle_soln_lin, mle_offset = mle_transform
             approx_sd = np.sqrt(np.diag(var))
diff --git a/selection/randomized/selective_MLE.py b/selection/randomized/selective_MLE.py
new file mode 100644
index 000000000..fdd489f78
--- /dev/null
+++ b/selection/randomized/selective_MLE.py
@@ -0,0 +1,233 @@
+from functools import partial
+
+import numpy as np
+
+from regreg.api import power_L
+
+from .selective_MLE_utils import barrier_solve_
+
+def solve_barrier_nonneg(conjugate_arg,
+                         precision,
+                         initial=None,
+                         step=None,
+                         max_iter=150,
+                         value_tol=1.e-6):
+    """
+    Solve a smoothed version of the problem
+
+    .. math::
+    
+        \text{minimize}_{\beta \geq 0} -u^T\beta + \frac{1}{2} \beta^T\Theta \beta
+
+    with `conjugate_arg` as $u$ and `precision` as $\Theta$. The smoothing
+    is done by adding a barrier function with scale determined
+    by the diagonal of precision.
+
+    Parameters
+    ----------
+
+    conjugate_arg: np.float(p)
+        The value of the problem is a convex conjugate -- this is the
+        argument to that function.
+
+    precision: np.float((p,p))
+        A non-negative definite matrix -- precision meaning the inverse
+        of a covariance matrix.
+
+    initial: np.float(p)
+        Optional warm start.
+
+    step: float
+        An initial step size. Defaults to inverse of
+        (approximate) largest eigenvalue of precision.
+
+    max_iter: int
+        When to stop optimization.
+
+    value_tol: float
+        Relative decrease in value for stopping.
+    
+    Returns
+    -------
+
+    value: float
+        The value of the optimization problem.
+
+    soln: np.float(p)
+        The solution to the optimization problem,
+        also the gradient of the value function.
+
+    hess: np.float(p)
+        The Hessian of the value function.
+
+    """
+    p = precision.shape[0]
+    scaling = np.sqrt(np.diag(precision))
+
+    if initial is None:
+        initial, proposed, grad = np.zeros((3, p))
+
+    if step is None:
+        step = 1. / power_L(precision)
+
+    soln, val = barrier_solve_(grad,
+                               initial,
+                               proposed,
+                               conjugate_arg,
+                               precision,
+                               scaling,
+                               step,
+                               value_tol=value_tol)
+
+    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
+    hess = np.linalg.inv(precision + np.diag(barrier_hessian(soln)))
+
+    return val, soln, hess
+
+def selective_MLE(target_observed,
+                  target_cov,
+                  target_transform,
+                  opt_transform,
+                  feasible_point,
+                  randomizer_precision,
+                  step=1,
+                  max_iter=30,
+                  tol=1.e-8):
+
+    """
+
+    Parameters
+    ----------
+
+    target_observed: np.float
+        The observed value of our target estimator.
+    
+    target_cov: np.float
+        Covariance matrix of target estimator.
+
+    target_transform: tuple
+        A pair (A, b) consisting of a linear transformation A and an offset b
+        representing an affine transformation $x \mapsto Ax+b$.
+        This transform should be computed as part of a linear decomposition of the
+        score of an optimization problem with respect to a target
+        of interest.
+
+    opt_transform: tuple
+        A pair (A, b) consisting of a linear transformation A and an offset b
+        representing an affine transformation $x \mapsto Ax+b$.
+        This transformation usually comes from the KKT conditions
+        of an appropriate (randomized) optimization problem.
+
+    feasible_point: np.float
+        An appropriate feasible point for the optimization
+        problem in the approximate likelihood.
+
+    randomization_precision: np.float((p,p))
+        Precision matrix of randomization in the randomized
+        optimization problem.
+
+    step: float
+        An initial step size. Defaults to inverse of
+        (approximate) largest eigenvalue of precision.
+
+    max_iter: int
+        When to stop optimization.
+
+    value_tol: float
+        Relative decrease in value for stopping.
+    
+    
+    Returns
+    -------
+
+    XXXX
+
+    """
+
+    A, data_offset = target_transform # data_offset = N
+    B, opt_offset = opt_transform     # opt_offset = u
+
+    nopt = B.shape[1]
+    ntarget = A.shape[1]
+
+    #assert ntarget == 1
+
+    # setup joint implied covariance matrix
+
+    target_precision = np.linalg.inv(target_cov)
+
+    implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
+    implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
+    implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
+    implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
+    implied_precision[ntarget:,ntarget:] = B.T.dot(randomizer_precision).dot(B)
+    implied_cov = np.linalg.inv(implied_precision)
+
+    implied_opt = implied_cov[ntarget:,ntarget:]
+    implied_target = implied_cov[:ntarget,:ntarget]
+    implied_cross = implied_cov[:ntarget,ntarget:]
+
+    L = implied_cross.dot(np.linalg.inv(implied_opt))
+    M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
+    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
+
+    #print("check matrices", M_1, M_2, L, data_offset, opt_offset)
+
+    conditioned_value = data_offset + opt_offset
+
+    linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
+    offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
+
+    natparam_transform = (linear_term, offset_term)
+    conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
+
+    conditional_precision = implied_precision[ntarget:,ntarget:]
+
+    M_1_inv = np.linalg.inv(M_1)
+    mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
+    mle_transform = (M_1_inv, -M_1_inv.dot(L), mle_offset_term)
+    var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
+                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
+
+    cross_covariance = np.linalg.inv(implied_precision[:ntarget, :ntarget]).dot(implied_precision[:ntarget, ntarget:])
+    var_matrices = (np.linalg.inv(implied_opt), np.linalg.inv(implied_precision[:ntarget,:ntarget]),
+                    cross_covariance,target_precision)
+
+    def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
+                feasible_point, conditional_precision, target_observed):
+
+        param_lin, param_offset = natparam_transform
+        mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+
+        soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
+                                              conditional_precision,
+                                              max_iter=200)
+
+        selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
+
+        var_target_lin, var_offset = var_transform
+        var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
+        p = var_precision.shape[0]
+        grad, opt_val, opt_proposed = np.ones((3, p), np.float)
+        scaling = np.sqrt(np.diag(conditional_precision))
+
+        _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
+                                          var_precision)
+
+        hessian = target_precision.dot(inv_precision_target +
+                                       cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
+
+        return selective_MLE, np.linalg.inv(hessian)
+
+    mle_partial = partial(mle_map, 
+                          natparam_transform, 
+                          mle_transform, 
+                          var_transform, 
+                          var_matrices,
+                          feasible_point, 
+                          conditional_precision)
+    sel_MLE, inv_hessian = mle_partial(target_observed)
+
+    implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value), offset_term])
+
+    return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
diff --git a/selection/randomized/selective_MLE_utils.pyx b/selection/randomized/selective_MLE_utils.pyx
index 4ce8712db..2b593d480 100644
--- a/selection/randomized/selective_MLE_utils.pyx
+++ b/selection/randomized/selective_MLE_utils.pyx
@@ -11,16 +11,16 @@ ctypedef np.int_t DTYPE_int_t
 
 cdef extern from "randomized_lasso.h":
 
-    void barrier_solve(double *gradient,                   # Gradient vector
-                       double *opt_variable,               # Optimization variable
-                       double *opt_proposed,               # New value of optimization variable
-                       double *conjugate_arg,              # Argument to conjugate of Gaussian
-                       double *precision,                  # Precision matrix of Gaussian
-                       double *scaling,                    # Diagonal scaling matrix for log barrier
-                       int ndim,                           # Dimension of opt_variable
-                       int max_iter,                       # Maximum number of iterations
-                       double value_tol,                   # Tolerance for convergence based on value
-                       double initial_step)                # Initial stepsize 
+    double barrier_solve(double *gradient,                   # Gradient vector
+                         double *opt_variable,               # Optimization variable
+                         double *opt_proposed,               # New value of optimization variable
+                         double *conjugate_arg,              # Argument to conjugate of Gaussian
+                         double *precision,                  # Precision matrix of Gaussian
+                         double *scaling,                    # Diagonal scaling matrix for log barrier
+                         int ndim,                           # Dimension of opt_variable
+                         int max_iter,                       # Maximum number of iterations
+                         double value_tol,                   # Tolerance for convergence based on value
+                         double initial_step)                # Initial stepsize 
 
 def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient vector
                    np.ndarray[DTYPE_float_t, ndim=1] opt_variable,  # Optimization variable
@@ -28,10 +28,10 @@ def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient v
                    np.ndarray[DTYPE_float_t, ndim=1] conjugate_arg, # Argument to conjugate of Gaussian
                    np.ndarray[DTYPE_float_t, ndim=2] precision,     # Precision matrix of Gaussian
                    np.ndarray[DTYPE_float_t, ndim=1] scaling,       # Diagonal scaling matrix for log barrier
+                   double initial_step,
                    int max_iter=100,
                    double value_tol=1.e-6):
    
-    initial_step = power_L(precision)
     ndim = precision.shape[0]
 
     value = barrier_solve(<double *>gradient.data,
diff --git a/selection/randomized/tests/test_selective_MLE.py b/selection/randomized/tests/test_selective_MLE.py
new file mode 100644
index 000000000..09851c8cf
--- /dev/null
+++ b/selection/randomized/tests/test_selective_MLE.py
@@ -0,0 +1,92 @@
+import numpy as np
+import functools
+
+from ...tests.decorators import set_seed_iftrue
+from ..selective_MLE_utils import barrier_solve_
+
+def solve_barrier_nonneg(conjugate_arg,
+                         precision,
+                         feasible_point=None,
+                         step=1,
+                         nstep=150,
+                         tol=1.e-8):
+
+    scaling = np.sqrt(np.diag(precision))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
+
+    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
+    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
+    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        newton_step = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * newton_step
+            if np.all(proposal > 0):
+                break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            proposal = current - step * newton_step
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
+
+    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
+    return current, current_value, hess
+
+@set_seed_iftrue(True)
+def test_C_solver():
+
+    X = np.random.standard_normal((10, 5))
+    precision = X.T.dot(X) / 10
+    conjugate_arg = np.random.standard_normal(5)
+
+
+    soln1, val1, _ = solve_barrier_nonneg(conjugate_arg,
+                                          precision,
+                                          tol=1.e-12)
+
+    grad, opt_val, opt_proposed = np.ones((3, 5))
+    scaling = np.sqrt(np.diag(precision))
+
+    soln2, val2 = barrier_solve_(grad,
+                                 opt_val,
+                                 opt_proposed,
+                                 conjugate_arg,
+                                 precision,
+                                 scaling,
+                                 value_tol=1.e-12)
+
+    np.testing.assert_allclose(soln1, soln2, atol=1.e-4, rtol=1.e-4)
+    assert (np.fabs(val1 - val2) < 1.e-4 * np.fabs(val1))
+

From 5fce4ceba8924e55ed33886294dbb5097a239a86 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 27 Dec 2017 23:28:39 -0800
Subject: [PATCH 462/617] coverage wo bootstrap

---
 selection/adjusted_MLE/selective_MLE.py       |  4 +-
 .../tests/high_dim_boot_coverage.py           | 72 ++++++++++++++-----
 2 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index c5d635249..ccf3f2b80 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -151,7 +151,7 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
                                           var_precision,
                                           feasible_point=None,
                                           step=1,
-                                          nstep=250)
+                                          nstep=2000)
 
         hessian = target_precision.dot(inv_precision_target +
                                        cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
@@ -170,7 +170,7 @@ def solve_barrier_nonneg(conjugate_arg,
                          precision,
                          feasible_point=None,
                          step=1,
-                         nstep=150,
+                         nstep=2000,
                          tol=1.e-8):
 
     scaling = np.sqrt(np.diag(precision))
diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index 457fa50fc..f1d3c3520 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -67,7 +67,8 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
         if p > n:
-            sigma_est = np.std(y) / 2.
+            #sigma_est = np.std(y) / 2.
+            sigma_est = np.std(y)
             print("sigma and sigma_est", sigma, sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
@@ -129,58 +130,91 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
                                                                        M_est.target_cov,
                                                                        M_est.randomizer_precision)
 
-            approx_sd = np.sqrt(np.diag(var))
-            # B = 2000
+            approx_sd0 = np.sqrt(np.diag(var))
+            # B = 3000
             # boot_pivot = np.zeros((B, nactive))
+            # boot_mle_vec = np.zeros((B, nactive))
             # resid = y - X[:, active].dot(M_est.target_observed)
             # for b in range(B):
             #     boot_indices = np.random.choice(n, n, replace=True)
             #     boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
             #     target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
-            #     #print("target_boot", target_boot)
+            #     #print("target_boot", target_boot, M_est.target_observed)
             #     boot_mle = mle_map(target_boot)
-            #     #print("target_boot", boot_mle[0], approx_MLE)
+            #     print("target_boot", boot_mle[0], approx_MLE)
             #     boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-            #
-            # approx_sd = boot_pivot.std(0)
+            #     boot_mle_vec[b,:] = boot_mle[0]
+
+            # for b in range(B):
+            #     boot_indices = np.random.choice(n, n, replace=True)
+            #     target_boot = np.linalg.inv(X.T.dot(X)).dot((X[boot_indices, :]).T)[active].dot(resid[boot_indices]) \
+            #                   + M_est.target_observed
+            #     #print("target_boot", target_boot, M_est.target_observed)
+            #     boot_mle = mle_map(target_boot)
+            #     print("target_boot", boot_mle[0], approx_MLE)
+            #     boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
+            #     boot_mle_vec[b,:] = boot_mle[0]
+
+            #approx_sd = boot_pivot.std(0)* approx_sd0
+            # approx_sd_boot = boot_mle_vec.std(0)
+            # lower_q = np.percentile(boot_pivot, 5, axis=0)
+            # upper_q = np.percentile(boot_pivot, 95, axis=0)
 
             if nactive == 1:
                 approx_MLE = np.array([approx_MLE])
-                approx_sd = np.array([approx_sd])
+                approx_sd0 = np.array([approx_sd0])
+                #approx_sd = np.array([approx_sd])
+
+            coverage_sel = 0.
+            coverage_sel0 = 0.
+            #true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
+            true_target = np.linalg.pinv(X)[active].dot(true_mean)
+            print("true target", true_target)
 
-            coverage_sel = 0
-            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
             for j in range(nactive):
-                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and \
-                                (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
-                    coverage_sel += 1
-                print("selective intervals", (approx_MLE[j] - (1.65 * approx_sd[j])),(approx_MLE[j] + (1.65 * approx_sd[j])))
+                # if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and \
+                #                 (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
+                #     coverage_sel += 1
+                if (approx_MLE[j] - (1.65 * approx_sd0[j])) <= true_target[j] and \
+                                (approx_MLE[j] + (1.65 * approx_sd0[j])) >= true_target[j]:
+                    coverage_sel0 += 1
+                coverage_sel = coverage_sel0
+                print("selective intervals wo bootstrap", (approx_MLE[j] - (1.65 * approx_sd0[j])),
+                      (approx_MLE[j] + (1.65 * approx_sd0[j])))
+                # print("selective intervals w boot pivot", (approx_MLE[j] - (1.65 * approx_sd[j])),
+                #       (approx_MLE[j] + (1.65 * approx_sd[j])))
+                # print("selective intervals w boot mle", (approx_MLE[j] - (1.65 * approx_sd_boot[j])),
+                #       (approx_MLE[j] + (1.65 * approx_sd_boot[j])))
 
             break
 
     if True:
-        return coverage_sel/float(nactive), np.true_divide(approx_MLE- true_target, approx_sd)
+        return coverage_sel/float(nactive), coverage_sel0/float(nactive), np.true_divide(approx_MLE- true_target, approx_sd0)
 
 if __name__ == "__main__":
 
     import matplotlib.pyplot as plt
     ndraw = 100
     coverage_sel = 0.
+    coverage_sel0 = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.20, target="partial")
+        approx = inference_approx(n=200, p=1000, nval=200, rho=0.35, s=10, beta_type=2, snr=0.10, target="full")
         if approx is not None:
             coverage_sel += approx[0]
-            pivot = approx[1]
+            coverage_sel0 += approx[1]
+            pivot = approx[2]
             for j in range(pivot.shape[0]):
                 pivot_obs_info.append(pivot[j])
 
+        sys.stderr.write("selective coverage wo boot" + str(coverage_sel0 / float(i + 1)) + "\n")
         sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
         sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("pivot" + str(pivot_obs_info) + "\n")
+        #sys.stderr.write("pivot" + str(pivot_obs_info) + "\n")
 
     stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=plt)
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/high_10_0.20_.png")
+    plt.show()
+    #plt.savefig("/Users/snigdhapanigrahi/Desktop/high_10_0.20_.png")
 
 
 

From 536063ede5ae004e531a2c5c9c740b8e8504814e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 28 Dec 2017 02:23:40 -0800
Subject: [PATCH 463/617] commit changes

---
 selection/adjusted_MLE/tests/high_dim_boot_coverage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index f1d3c3520..6e2f80389 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -199,7 +199,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
     coverage_sel0 = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = inference_approx(n=200, p=1000, nval=200, rho=0.35, s=10, beta_type=2, snr=0.10, target="full")
+        approx = inference_approx(n=200, p=1000, nval=200, rho=0.35, s=10, beta_type=1, snr=0.20, target="full")
         if approx is not None:
             coverage_sel += approx[0]
             coverage_sel0 += approx[1]

From 4f6755a1abcff8f800ad9db13c9e758734b55481 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 28 Dec 2017 16:12:40 -0800
Subject: [PATCH 464/617] selective coverage improving in high dimensions

---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 33 ++++++-------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index afbc8cb5c..f2b872c6e 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -117,8 +117,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
         if p > n:
-            sigma_est = np.std(y) / 2.
-            #sigma_est = sigma
+            sigma_est = np.std(y)
             print("sigma and sigma_est", sigma, sigma_est)
         else:
             ols_fit = sm.OLS(y, X).fit()
@@ -216,6 +215,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
         coverage_sel = 0.
         coverage_rand = 0.
         coverage_nonrand = 0.
+
         power_sel = 0.
         power_rand = 0.
         power_nonrand = 0.
@@ -240,22 +240,6 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
             mle_target_lin, mle_soln_lin, mle_offset = mle_transform
             approx_sd = np.sqrt(np.diag(var))
 
-            if p>n:
-                B = 1000
-                boot_pivot = np.zeros((B, nactive))
-                resid = y - X[:, active].dot(M_est.target_observed)
-                for b in range(B):
-                    boot_indices = np.random.choice(n, n, replace=True)
-                    boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
-                    #target_boot = (np.linalg.inv(X.T.dot(X)).dot(X[boot_indices, :].T))[active].dot(resid[boot_indices]) + M_est.target_observed
-                    target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
-                    #print("check", target_boot, M_est.target_observed)
-                    boot_mle = mle_map(target_boot)
-                    #print("target_boot", boot_mle[0], approx_MLE)
-                    boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-
-                boot_sd = boot_pivot.std(0)
-
             if nactive == 1:
                 approx_MLE = np.array([approx_MLE])
                 approx_sd = np.array([approx_sd])
@@ -264,17 +248,20 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
                 if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and \
                                 (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
-                print("selective intervals",(approx_MLE[j] - (1.65 * approx_sd[j])), (approx_MLE[j] + (1.65 * approx_sd[j])))
-                if p>n:
-                    print("boot intervals", (approx_MLE[j] - (1.65 * boot_sd[j])), (approx_MLE[j] + (1.65 * boot_sd[j])))
+                print("selective intervals",sigma_est* (approx_MLE[j] - (1.65 * approx_sd[j])),
+                      sigma_est* (approx_MLE[j] + (1.65 * approx_sd[j])))
+
                 if active_bool[j] == True and (
                                 (approx_MLE[j] - (1.65 * approx_sd[j])) > 0. or (
                             approx_MLE[j] + (1.65 * approx_sd[j])) < 0.):
                     power_sel += 1
+
                 if (M_est.target_observed[j] - (1.65 * unad_sd[j])) <= true_target[j] and (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) >= true_target[j]:
                     coverage_rand += 1
-                print("randomized intervals", (M_est.target_observed[j] - (1.65 * unad_sd[j])),(M_est.target_observed[j] + (1.65 * unad_sd[j])))
+                print("randomized intervals", sigma_est* (M_est.target_observed[j] - (1.65 * unad_sd[j])),
+                      sigma_est* (M_est.target_observed[j] + (1.65 * unad_sd[j])))
+
                 if active_bool[j] == True and ((M_est.target_observed[j] - (1.65 * unad_sd[j])) > 0. or (
                             M_est.target_observed[j] + (1.65 * unad_sd[j])) < 0.):
                     power_rand += 1
@@ -358,7 +345,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=100, p=1000, nval=100, rho=0.35, s=10, beta_type=2, snr=0.10, target="full")
+        approx = inference_approx(n=200, p=1000, nval=200, rho=0.35, s=10, beta_type=2, snr=0.10, target="full")
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From bbe7e50b636dd632faa2b4c49365e7d2b7c8774a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 28 Dec 2017 17:41:17 -0800
Subject: [PATCH 465/617] clean-up of code

---
 selection/adjusted_MLE/selective_MLE.py       | 16 ++++++----------
 selection/adjusted_MLE/tests/relaxed_lasso.py |  2 +-
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/selection/adjusted_MLE/selective_MLE.py b/selection/adjusted_MLE/selective_MLE.py
index ccf3f2b80..00f32228b 100644
--- a/selection/adjusted_MLE/selective_MLE.py
+++ b/selection/adjusted_MLE/selective_MLE.py
@@ -79,10 +79,7 @@ def solve_UMVU(target_transform,
                target_observed,
                feasible_point,
                target_cov,
-               randomizer_precision,
-               step=1,
-               nstep=30,
-               tol=1.e-8):
+               randomizer_precision):
 
     A, data_offset = target_transform # data_offset = N
     B, opt_offset = opt_transform     # opt_offset = u
@@ -90,8 +87,6 @@ def solve_UMVU(target_transform,
     nopt = B.shape[1]
     ntarget = A.shape[1]
 
-    #assert ntarget == 1
-
     # setup joint implied covariance matrix
 
     target_precision = np.linalg.inv(target_cov)
@@ -111,8 +106,6 @@ def solve_UMVU(target_transform,
     M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
     M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
-    #print("check matrices", M_1, M_2, L, data_offset, opt_offset)
-
     conditioned_value = data_offset + opt_offset
 
     linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
@@ -141,7 +134,10 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
 
         soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
                                               conditional_precision,
-                                              feasible_point=feasible_point)
+                                              feasible_point=feasible_point,
+                                              step=1,
+                                              nstep=2000,
+                                              tol=1.e-8)
 
         selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
 
@@ -170,7 +166,7 @@ def solve_barrier_nonneg(conjugate_arg,
                          precision,
                          feasible_point=None,
                          step=1,
-                         nstep=2000,
+                         nstep=1000,
                          tol=1.e-8):
 
     scaling = np.sqrt(np.diag(precision))
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index f2b872c6e..a7457c019 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -345,7 +345,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=200, p=1000, nval=200, rho=0.35, s=10, beta_type=2, snr=0.10, target="full")
+        approx = inference_approx(n=200, p=2000, nval=200, rho=0.35, s=10, beta_type=2, snr=0.05, target="full")
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From cf1cdb1117c492d7783fa897e108353358e19ba3 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 28 Dec 2017 23:15:21 -0800
Subject: [PATCH 466/617] removed bootstrapping for interval construction

---
 .../tests/high_dim_boot_coverage.py           | 63 +++----------------
 selection/adjusted_MLE/tests/relaxed_lasso.py |  2 +-
 2 files changed, 11 insertions(+), 54 deletions(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index 6e2f80389..228af2034 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -10,7 +10,6 @@
 import regreg.api as rr
 from selection.randomized.api import randomization
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from scipy.stats import norm as ndist
 import scipy.stats as stats
 
 def glmnet_sigma(X, y):
@@ -67,7 +66,6 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
         if p > n:
-            #sigma_est = np.std(y) / 2.
             sigma_est = np.std(y)
             print("sigma and sigma_est", sigma, sigma_est)
         else:
@@ -130,84 +128,43 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
                                                                        M_est.target_cov,
                                                                        M_est.randomizer_precision)
 
-            approx_sd0 = np.sqrt(np.diag(var))
-            # B = 3000
-            # boot_pivot = np.zeros((B, nactive))
-            # boot_mle_vec = np.zeros((B, nactive))
-            # resid = y - X[:, active].dot(M_est.target_observed)
-            # for b in range(B):
-            #     boot_indices = np.random.choice(n, n, replace=True)
-            #     boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
-            #     target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
-            #     #print("target_boot", target_boot, M_est.target_observed)
-            #     boot_mle = mle_map(target_boot)
-            #     print("target_boot", boot_mle[0], approx_MLE)
-            #     boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-            #     boot_mle_vec[b,:] = boot_mle[0]
-
-            # for b in range(B):
-            #     boot_indices = np.random.choice(n, n, replace=True)
-            #     target_boot = np.linalg.inv(X.T.dot(X)).dot((X[boot_indices, :]).T)[active].dot(resid[boot_indices]) \
-            #                   + M_est.target_observed
-            #     #print("target_boot", target_boot, M_est.target_observed)
-            #     boot_mle = mle_map(target_boot)
-            #     print("target_boot", boot_mle[0], approx_MLE)
-            #     boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-            #     boot_mle_vec[b,:] = boot_mle[0]
-
-            #approx_sd = boot_pivot.std(0)* approx_sd0
-            # approx_sd_boot = boot_mle_vec.std(0)
-            # lower_q = np.percentile(boot_pivot, 5, axis=0)
-            # upper_q = np.percentile(boot_pivot, 95, axis=0)
+            approx_sd = np.sqrt(np.diag(var))
 
             if nactive == 1:
                 approx_MLE = np.array([approx_MLE])
-                approx_sd0 = np.array([approx_sd0])
-                #approx_sd = np.array([approx_sd])
+                approx_sd = np.array([approx_sd])
 
             coverage_sel = 0.
-            coverage_sel0 = 0.
             #true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
             true_target = np.linalg.pinv(X)[active].dot(true_mean)
             print("true target", true_target)
 
             for j in range(nactive):
-                # if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and \
-                #                 (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
-                #     coverage_sel += 1
-                if (approx_MLE[j] - (1.65 * approx_sd0[j])) <= true_target[j] and \
-                                (approx_MLE[j] + (1.65 * approx_sd0[j])) >= true_target[j]:
-                    coverage_sel0 += 1
-                coverage_sel = coverage_sel0
-                print("selective intervals wo bootstrap", (approx_MLE[j] - (1.65 * approx_sd0[j])),
-                      (approx_MLE[j] + (1.65 * approx_sd0[j])))
-                # print("selective intervals w boot pivot", (approx_MLE[j] - (1.65 * approx_sd[j])),
-                #       (approx_MLE[j] + (1.65 * approx_sd[j])))
-                # print("selective intervals w boot mle", (approx_MLE[j] - (1.65 * approx_sd_boot[j])),
-                #       (approx_MLE[j] + (1.65 * approx_sd_boot[j])))
+                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
+                    coverage_sel += 1
+
+                print("selective intervals wo bootstrap", sigma_est*(approx_MLE[j] - (1.65 * approx_sd[j])),
+                      sigma_est *(approx_MLE[j] + (1.65 * approx_sd[j])))
 
             break
 
     if True:
-        return coverage_sel/float(nactive), coverage_sel0/float(nactive), np.true_divide(approx_MLE- true_target, approx_sd0)
+        return coverage_sel/float(nactive), np.true_divide(approx_MLE- true_target, approx_sd)
 
 if __name__ == "__main__":
 
     import matplotlib.pyplot as plt
     ndraw = 100
     coverage_sel = 0.
-    coverage_sel0 = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = inference_approx(n=200, p=1000, nval=200, rho=0.35, s=10, beta_type=1, snr=0.20, target="full")
+        approx = inference_approx(n=500, p=4000, nval=500, rho=0.35, s=10, beta_type=1, snr=0.20, target="full")
         if approx is not None:
             coverage_sel += approx[0]
-            coverage_sel0 += approx[1]
-            pivot = approx[2]
+            pivot = approx[1]
             for j in range(pivot.shape[0]):
                 pivot_obs_info.append(pivot[j])
 
-        sys.stderr.write("selective coverage wo boot" + str(coverage_sel0 / float(i + 1)) + "\n")
         sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
         sys.stderr.write("iteration completed" + str(i) + "\n")
         #sys.stderr.write("pivot" + str(pivot_obs_info) + "\n")
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
index a7457c019..e8c439c7f 100644
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ b/selection/adjusted_MLE/tests/relaxed_lasso.py
@@ -345,7 +345,7 @@ def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2
     partial_risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        approx = inference_approx(n=200, p=2000, nval=200, rho=0.35, s=10, beta_type=2, snr=0.05, target="full")
+        approx = inference_approx(n=200, p=1000, nval=200, rho=0.70, s=10, beta_type=2, snr=0.20, target="full")
         if approx is not None:
             bias += approx[0]
             risk_selMLE += approx[1]

From 256061d9696eb057bb8bad297fb7aa282d0b0280 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 29 Dec 2017 12:30:50 -0800
Subject: [PATCH 467/617] more clean up and tests

---
 .../tests/high_dim_boot_coverage.py           | 36 ++++---------------
 1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index 228af2034..4a264408f 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -1,5 +1,4 @@
 from __future__ import print_function
-from rpy2.robjects.packages import importr
 from rpy2 import robjects
 
 import rpy2.robjects.numpy2ri
@@ -12,32 +11,9 @@
 from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
 import scipy.stats as stats
 
-def glmnet_sigma(X, y):
-    robjects.r('''
-                glmnet_cv = function(X,y){
-                y = as.matrix(y)
-                X = as.matrix(X)
-                n = nrow(X)
-                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                lam_1se = out$lambda.1se
-                lam_min = out$lambda.min
-                return(list(lam_min = n * as.numeric(lam_min), lam_1se = n* as.numeric(lam_1se)))
-                }''')
-
-    lambda_cv_R = robjects.globalenv['glmnet_cv']
-    n, p = X.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-
-    lam = lambda_cv_R(r_X, r_y)
-    lam_min = np.array(lam.rx2('lam_min'))
-    lam_1se = np.array(lam.rx2('lam_1se'))
-    return lam_min, lam_1se
-
-
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
-    library(bestsubset) #source('~/best-subset/bestsubset/R/sim.R')
+    library(bestsubset)
     sim_xy = bestsubset::sim.xy
     ''')
 
@@ -135,15 +111,17 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
                 approx_sd = np.array([approx_sd])
 
             coverage_sel = 0.
-            #true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
-            true_target = np.linalg.pinv(X)[active].dot(true_mean)
+            if target == "full":
+                true_target = np.linalg.pinv(X)[active].dot(true_mean)
+            if target == "partial":
+                true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
             print("true target", true_target)
 
             for j in range(nactive):
                 if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
                     coverage_sel += 1
 
-                print("selective intervals wo bootstrap", sigma_est*(approx_MLE[j] - (1.65 * approx_sd[j])),
+                print("selective intervals", sigma_est*(approx_MLE[j] - (1.65 * approx_sd[j])),
                       sigma_est *(approx_MLE[j] + (1.65 * approx_sd[j])))
 
             break
@@ -158,7 +136,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
     coverage_sel = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=4000, nval=500, rho=0.35, s=10, beta_type=1, snr=0.20, target="full")
+        approx = inference_approx(n=500, p=2500, nval=500, rho=0.35, s=20, beta_type=1, snr=0.10, target="full")
         if approx is not None:
             coverage_sel += approx[0]
             pivot = approx[1]

From d0bb945981ae57da8947b4a5f0577d565011bf6e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 29 Dec 2017 15:26:29 -0800
Subject: [PATCH 468/617] commit before switch

---
 selection/adjusted_MLE/tests/high_dim_boot_coverage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
index 4a264408f..fb2e1b121 100644
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
@@ -136,7 +136,7 @@ def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.
     coverage_sel = 0.
     pivot_obs_info = []
     for i in range(ndraw):
-        approx = inference_approx(n=500, p=2500, nval=500, rho=0.35, s=20, beta_type=1, snr=0.10, target="full")
+        approx = inference_approx(n=500, p=2500, nval=500, rho=0.35, s=20, beta_type=1, snr=0.20, target="full")
         if approx is not None:
             coverage_sel += approx[0]
             pivot = approx[1]

From 8f79a6a4abf2931159541a84d1c6bb5c37bf3ee3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 6 Feb 2018 17:20:47 -0800
Subject: [PATCH 469/617] moving changes of MLE under randomized

---
 selection/randomized/selective_MLE.py        | 28 ++++++++------------
 selection/randomized/selective_MLE_utils.pyx |  2 +-
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/selection/randomized/selective_MLE.py b/selection/randomized/selective_MLE.py
index fdd489f78..f820ae21a 100644
--- a/selection/randomized/selective_MLE.py
+++ b/selection/randomized/selective_MLE.py
@@ -61,6 +61,7 @@ def solve_barrier_nonneg(conjugate_arg,
         The Hessian of the value function.
 
     """
+
     p = precision.shape[0]
     scaling = np.sqrt(np.diag(precision))
 
@@ -150,8 +151,6 @@ def selective_MLE(target_observed,
     nopt = B.shape[1]
     ntarget = A.shape[1]
 
-    #assert ntarget == 1
-
     # setup joint implied covariance matrix
 
     target_precision = np.linalg.inv(target_cov)
@@ -171,8 +170,6 @@ def selective_MLE(target_observed,
     M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
     M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
 
-    #print("check matrices", M_1, M_2, L, data_offset, opt_offset)
-
     conditioned_value = data_offset + opt_offset
 
     linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
@@ -201,31 +198,28 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
 
         soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
                                               conditional_precision,
-                                              max_iter=200)
+                                              feasible_point=feasible_point,
+                                              step=1,
+                                              nstep=2000,
+                                              tol=1.e-8)
 
         selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
 
         var_target_lin, var_offset = var_transform
         var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
-        p = var_precision.shape[0]
-        grad, opt_val, opt_proposed = np.ones((3, p), np.float)
-        scaling = np.sqrt(np.diag(conditional_precision))
-
         _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
-                                          var_precision)
+                                          var_precision,
+                                          feasible_point=None,
+                                          step=1,
+                                          nstep=2000)
 
         hessian = target_precision.dot(inv_precision_target +
                                        cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
 
         return selective_MLE, np.linalg.inv(hessian)
 
-    mle_partial = partial(mle_map, 
-                          natparam_transform, 
-                          mle_transform, 
-                          var_transform, 
-                          var_matrices,
-                          feasible_point, 
-                          conditional_precision)
+    mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, var_transform, var_matrices,
+                                    feasible_point, conditional_precision)
     sel_MLE, inv_hessian = mle_partial(target_observed)
 
     implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value), offset_term])
diff --git a/selection/randomized/selective_MLE_utils.pyx b/selection/randomized/selective_MLE_utils.pyx
index 2b593d480..5149946df 100644
--- a/selection/randomized/selective_MLE_utils.pyx
+++ b/selection/randomized/selective_MLE_utils.pyx
@@ -29,7 +29,7 @@ def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient v
                    np.ndarray[DTYPE_float_t, ndim=2] precision,     # Precision matrix of Gaussian
                    np.ndarray[DTYPE_float_t, ndim=1] scaling,       # Diagonal scaling matrix for log barrier
                    double initial_step,
-                   int max_iter=100,
+                   int max_iter=1000,
                    double value_tol=1.e-6):
    
     ndim = precision.shape[0]

From 8d916705c5fce740ea76c4c495d81b914b758198 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 8 Feb 2018 06:52:19 -0800
Subject: [PATCH 470/617] refector lasso so it can use affine gaussian sampler

---
 selection/randomized/M_estimator.py      |   25 +-
 selection/randomized/convenience.py      |  729 -----------
 selection/randomized/glm.py              |   28 -
 selection/randomized/group_lasso.py      |  690 +++++++++++
 selection/randomized/lasso.py            | 1419 ++++++++++++++++++++++
 selection/randomized/query.py            |  313 +++--
 selection/randomized/randomization.py    |   20 +-
 selection/randomized/tests/test_lasso.py |   75 ++
 8 files changed, 2400 insertions(+), 899 deletions(-)
 create mode 100644 selection/randomized/group_lasso.py
 create mode 100644 selection/randomized/lasso.py
 create mode 100644 selection/randomized/tests/test_lasso.py

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
index e4c3dba86..e45424d31 100644
--- a/selection/randomized/M_estimator.py
+++ b/selection/randomized/M_estimator.py
@@ -111,6 +111,7 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
                 unpenalized[group] = True
 
         self.active_penalty = active_penalty
+
         # solve the restricted problem
 
         self._overall = active + unpenalized > 0
@@ -197,7 +198,6 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
 
         Mest_slice = slice(0, overall.sum())
-        # _Mest_hessian = _hessian[:,overall]
         X, y = loss.data
         W = self.loss.saturated_loss.hessian(X.dot(beta_full))
         _Mest_hessian = np.dot(X.T, X[:, overall] * W[:, None])
@@ -217,7 +217,6 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         if len(active_directions)==0:
             _opt_hessian=0
         else:
-            #_opt_hessian = (_hessian + epsilon * np.identity(p)).dot(active_directions)
             _opt_hessian = np.dot(_Mest_hessian, active_directions[overall]) + epsilon * active_directions
         _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling
 
@@ -228,7 +227,6 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
         unpenalized_directions = np.identity(p)[:,unpenalized]
         if unpenalized.sum():
-            #_opt_linear_term[:, unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling
             _opt_linear_term[:, unpenalized_slice] = (np.dot(_Mest_hessian, unpenalized_directions[overall])
                                                       + epsilon * unpenalized_directions) / _sqrt_scaling
         self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling
@@ -287,30 +285,11 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
 
         self.nboot = nboot
 
-
-#         if not self._setup:
-#             raise ValueError('setup_sampler should be called before using this function')
-
-#         if ('subgradient' not in self.selection_variable and 
-#             'scaling' not in self.selection_variable): # have not conditioned on any thing else
-
-#         elif ('subgradient' not in self.selection_variable and
-#               'scaling' in self.selection_variable): # conditioned on the initial scalings
-#                                                      # only the subgradient in opt_state
-#             new_state = self.group_lasso_dual.bound_prox(opt_state)
-#         elif ('subgradient' in self.selection_variable and
-#               'scaling' not in self.selection_variable): # conditioned on the subgradient
-#                                                          # only the scaling in opt_state
-#             new_state = np.maximum(opt_state, 0)
-#         else:
-#             new_state = opt_state
-#         return new_state
-
-
     def get_sampler(self):
         # setup the default optimization sampler
 
         if not hasattr(self, "_sampler"):
+
             def projection(group_lasso_dual, subgrad_slice, scaling_slice, opt_state):
                 """
                 Full projection for Langevin.
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index c0740959a..cd0ec063b 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -18,735 +18,6 @@
 from .query import multiple_queries
 from .M_estimator import restricted_Mest
 
-class lasso(object):
-
-    r"""
-    A class for the LASSO for post-selection inference.
-    The problem solved is
-
-    .. math::
-
-        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
-            \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
-
-    where $\lambda$ is `lam`, $\omega$ is a randomization generated below
-    and the last term is a small ridge penalty.
-
-    """
-
-    def __init__(self, 
-                 loglike, 
-                 feature_weights,
-                 ridge_term,
-                 randomizer_scale,
-                 randomizer='gaussian',
-                 parametric_cov_estimator=False):
-        r"""
-
-        Create a new post-selection object for the LASSO problem
-
-        Parameters
-        ----------
-
-        loglike : `regreg.smooth.glm.glm`
-            A (negative) log-likelihood as implemented in `regreg`.
-
-        feature_weights : np.ndarray
-            Feature weights for L-1 penalty. If a float,
-            it is brodcast to all features.
-
-        ridge_term : float
-            How big a ridge term to add?
-
-        randomizer_scale : float
-            Scale for IID components of randomization.
-
-        randomizer : str (optional)
-            One of ['laplace', 'logistic', 'gaussian']
-
-
-        """
-
-        self.loglike = loglike
-        self.nfeature = p = self.loglike.shape[0]
-
-        if np.asarray(feature_weights).shape == ():
-            feature_weights = np.ones(loglike.shape) * feature_weights
-        self.feature_weights = np.asarray(feature_weights)
-
-        self.parametric_cov_estimator = parametric_cov_estimator
-
-        if randomizer == 'laplace':
-            self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
-        elif randomizer == 'gaussian':
-            self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
-        elif randomizer == 'logistic':
-            self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
-
-        self.ridge_term = ridge_term
-
-        self.penalty = rr.group_lasso(np.arange(p),
-                                      weights=dict(zip(np.arange(p), self.feature_weights)), lagrange=1.)
-
-    def fit(self, 
-            solve_args={'tol':1.e-12, 'min_its':50}, 
-            views=[], 
-            nboot=1000):
-        """
-        Fit the randomized lasso using `regreg`.
-
-        Parameters
-        ----------
-
-        solve_args : keyword args
-             Passed to `regreg.problems.simple_problem.solve`.
-
-        views : list
-             Other views of the data, e.g. cross-validation.
-
-        Returns
-        -------
-
-        sign_beta : np.float
-             Support and non-zero signs of randomized lasso solution.
-             
-        """
-
-        p = self.nfeature
-        if self.parametric_cov_estimator==True:
-            self._view = glm_group_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
-        else:
-            self._view = glm_group_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
-        self._view.solve(nboot=nboot)
-
-        views = copy(views); views.append(self._view)
-        self._queries = multiple_queries(views)
-        self._queries.solve()
-   
-        self.signs = np.sign(self._view.initial_soln)
-        self.selection_variable = self._view.selection_variable
-        return self.signs
-
-    def decompose_subgradient(self,
-                              conditioning_groups=None,
-                              marginalizing_groups=None):
-        """
-
-        Marginalize over some if inactive part of subgradient
-        if applicable.
-
-        Parameters
-        ----------
-
-        conditioning_groups : np.bool
-             Which groups' subgradients should we condition on.
-
-        marginalizing_groups : np.bool
-             Which groups' subgradients should we marginalize over.
-
-        Returns
-        -------
-
-        None
-
-        """
-
-        if not hasattr(self, "_view"):
-            raise ValueError("fit method should be run first")
-        self._view.decompose_subgradient(conditioning_groups=conditioning_groups, 
-                                         marginalizing_groups=marginalizing_groups)
-
-    def summary(self,
-                selected_features,
-                parameter=None,
-                level=0.9,
-                ndraw=10000, 
-                burnin=2000,
-                compute_intervals=False,
-                bootstrap_sampler=False):
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-
-        Parameters
-        ----------
-
-        selected_features : np.bool
-            Binary encoding of which features to use in final
-            model and targets.
-
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-
-        level : float
-            Confidence level.
-
-        ndraw : int (optional)
-            Defaults to 1000.
-
-        burnin : int (optional)
-            Defaults to 1000.
-
-        bootstrap : bool
-            Use wild bootstrap instead of Gaussian plugin.
-
-        """
-        if not hasattr(self, "_queries"):
-            raise ValueError('run `fit` method before producing summary.')
-
-        if parameter is None:
-            parameter = np.zeros(self.loglike.shape[0])
-
-        unpenalized_mle = restricted_Mest(self.loglike, selected_features)
-
-        if self.parametric_cov_estimator == False:
-            n = self.loglike.data[0].shape[0]
-            form_covariances = glm_nonparametric_bootstrap(n, n)
-            boot_target, boot_target_observed = pairs_bootstrap_glm(self.loglike, selected_features, inactive=None)
-            target_info = boot_target
-        else:
-            target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
-            form_covariances = glm_parametric_covariance(self.loglike)
-
-        opt_samplers = []
-        for q in self._queries.objectives:
-            cov_info = q.setup_sampler()
-            if self.parametric_cov_estimator == False:
-                target_cov, score_cov = form_covariances(target_info,  
-                                                         cross_terms=[cov_info],
-                                                         nsample=q.nboot)
-            else:
-                target_cov, score_cov = form_covariances(target_info,  
-                                                         cross_terms=[cov_info])
-
-            opt_samplers.append(q.sampler)
-
-        opt_samples = [opt_sampler.sample(ndraw,
-                                          burnin) for opt_sampler in opt_samplers]
-
-        ### TODO -- this only uses one view -- what about other queries?
-
-        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter, sample=opt_samples[0])
-        if not np.all(parameter == 0):
-            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=np.zeros_like(parameter), sample=opt_samples[0])
-        else:
-            pvalues = pivots
-
-        intervals = None
-        if compute_intervals:
-            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_samples[0])
-
-        return pivots, pvalues, intervals
-
-    @staticmethod
-    def gaussian(X, 
-                 Y, 
-                 feature_weights, 
-                 sigma=1.,
-                 parametric_cov_estimator=False,
-                 quadratic=None,
-                 ridge_term=None,
-                 randomizer_scale=None,
-                 randomizer='gaussian'):
-        r"""
-        Squared-error LASSO with feature weights.
-
-        Objective function (before randomizer) is 
-        $$
-        \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-
-        where $\lambda$ is `feature_weights`. The ridge term
-        is determined by the Hessian and `np.std(Y)` by default,
-        as is the randomizer scale.
-
-        Parameters
-        ----------
-
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-
-        Y : ndarray
-            Shape (n,) -- the response.
-
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
-            a float, then all parameters are penalized equally.
-
-        sigma : float (optional)
-            Noise variance. Set to 1 if `covariance_estimator` is not None.
-            This scales the loglikelihood by `sigma**(-2)`.
-
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
-            coefficient to 0.
-
-        ridge_term : float
-            How big a ridge term to add?
-
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
-        Returns
-        -------
-
-        L : `selection.randomized.convenience.lasso`
-        
-
-        """
-
-        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
-        n, p = X.shape
-
-        mean_diag = np.mean((X**2).sum(0))
-        if ridge_term is None:
-            ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
-
-        return lasso(loglike, np.asarray(feature_weights) / sigma**2,
-                     ridge_term, randomizer_scale, randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator)
-
-    @staticmethod
-    def logistic(X, 
-                 successes, 
-                 feature_weights, 
-                 trials=None,
-                 parametric_cov_estimator=False,
-                 quadratic=None,
-                 ridge_term=None,
-                 randomizer='gaussian',
-                 randomizer_scale=None):
-        r"""
-        Logistic LASSO with feature weights.
-
-        Objective function is 
-        $$
-        \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-
-        where $\ell$ is the negative of the logistic 
-        log-likelihood (half the logistic deviance)
-        and $\lambda$ is `feature_weights`.
-
-        Parameters
-        ----------
-
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-
-        successes : ndarray
-            Shape (n,) -- response vector. An integer number of successes.
-            For data that is proportions, multiply the proportions
-            by the number of trials first.
-
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
-            a float, then all parameters are penalized equally.
-
-        trials : ndarray (optional)
-            Number of trials per response, defaults to
-            ones the same shape as Y. 
-
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
-            coefficient to 0.
-
-        ridge_term : float
-            How big a ridge term to add?
-
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
-        Returns
-        -------
-
-        L : `selection.randomized.convenience.lasso`
-        
-
-        """
-        n, p = X.shape
-
-        loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
-
-        mean_diag = np.mean((X**2).sum(0))
-
-        if ridge_term is None:
-            ridge_term = mean_diag / np.sqrt(n)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 
-
-        return lasso(loglike, feature_weights, 
-                     ridge_term, 
-                     randomizer_scale,
-                     parametric_cov_estimator=parametric_cov_estimator,
-                     randomizer=randomizer)
-
-    @staticmethod
-    def coxph(X, 
-              times, 
-              status, 
-              feature_weights,
-              parametric_cov_estimator=False,
-              quadratic=None,
-              ridge_term=None,
-              randomizer='gaussian',
-              randomizer_scale=None):
-        r"""
-        Cox proportional hazards LASSO with feature weights.
-
-        Objective function is 
-        $$
-        \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-
-        where $\ell^{\text{Cox}}$ is the 
-        negative of the log of the Cox partial
-        likelihood and $\lambda$ is `feature_weights`.
-
-        Uses Efron's tie breaking method.
-
-        Parameters
-        ----------
-
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-
-        times : ndarray
-            Shape (n,) -- the survival times.
-
-        status : ndarray
-            Shape (n,) -- the censoring status.
-
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
-            a float, then all parameters are penalized equally.
-
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
-            coefficient to 0.
-
-        ridge_term : float
-            How big a ridge term to add?
-
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
-        Returns
-        -------
-
-        L : `selection.randomized.convenience.lasso`
-        
-
-        """
-        loglike = coxph_obj(X, times, status, quadratic=quadratic)
-
-        # scale for randomization seems kind of meaningless here...
-
-        mean_diag = np.mean((X**2).sum(0))
-
-        if ridge_term is None:
-            ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
-
-        return lasso(loglike, 
-                     feature_weights, 
-                     ridge_term,
-                     randomizer_scale, 
-                     randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator)
-
-    @staticmethod
-    def poisson(X, 
-                counts, 
-                feature_weights,
-                parametric_cov_estimator=False,
-                quadratic=None,
-                ridge_term=None,
-                randomizer_scale=None,
-                randomizer='gaussian'):
-        r"""
-        Poisson log-linear LASSO with feature weights.
-
-        Objective function is 
-        $$
-        \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-
-        where $\ell^{\text{Poisson}}$ is the negative
-        of the log of the Poisson likelihood (half the deviance)
-        and $\lambda$ is `feature_weights`.
-
-        Parameters
-        ----------
-
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-
-        counts : ndarray
-            Shape (n,) -- the response.
-
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
-            a float, then all parameters are penalized equally.
-
-
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
-            coefficient to 0.
-
-        ridge_term : float
-            How big a ridge term to add?
-
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
-        Returns
-        -------
-
-        L : `selection.randomized.convenience.lasso`
-        
-
-        """
-        n, p = X.shape
-        loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
-
-        # scale for randomizer seems kind of meaningless here...
-
-        mean_diag = np.mean((X**2).sum(0))
-
-        if ridge_term is None:
-            ridge_term = np.std(counts)**2 * mean_diag / np.sqrt(n)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
-
-        return lasso(loglike, 
-                     feature_weights, 
-                     ridge_term,
-                     randomizer_scale, 
-                     randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator)
-
-    @staticmethod
-    def sqrt_lasso(X, 
-                   Y, 
-                   feature_weights, 
-                   quadratic=None,
-                   parametric_cov_estimator=False,
-                   sigma_estimate='truncated',
-                   solve_args={'min_its':200},
-                   randomizer_scale=None,
-                   randomizer='gaussian'):
-        r"""
-        Use sqrt-LASSO to choose variables.
-
-        Objective function is 
-        $$
-        \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-
-        where $\lambda$ is `feature_weights`. After solving the problem
-        treat as if `gaussian` with implied variance and choice of 
-        multiplier. See arxiv.org/abs/1504.08031 for details.
-
-        Parameters
-        ----------
-
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-
-        Y : ndarray
-            Shape (n,) -- the response.
-
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
-            a float, then all parameters are penalized equally.
-
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
-            coefficient to 0.
-
-        covariance : str
-            One of 'parametric' or 'sandwich'. Method
-            used to estimate covariance for inference
-            in second stage.
-
-        sigma_estimate : str
-            One of 'truncated' or 'OLS'. Method
-            used to estimate $\sigma$ when using
-            parametric covariance.
-
-        solve_args : dict
-            Arguments passed to solver.
-
-        ridge_term : float
-            How big a ridge term to add?
-
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
-        Returns
-        -------
-
-        L : `selection.randomized.convenience.lasso`
-        
-        Notes
-        -----
-
-        Unlike other variants of LASSO, this
-        solves the problem on construction as the active
-        set is needed to find equivalent gaussian LASSO.
-
-        Assumes parametric model is correct for inference,
-        i.e. does not accept a covariance estimator.
-
-        """
-
-        raise NotImplementedError
-
-        n, p = X.shape
-
-        # scale for randomization seems kind of meaningless here...
-
-        mean_diag = np.mean((X**2).sum(0))
-        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
-        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
-
-        if np.asarray(feature_weights).shape == ():
-            feature_weights = np.ones(p) * feature_weights
-        feature_weights = np.asarray(feature_weights)
-
-        # TODO: refits sqrt lasso more than once -- make an override for avoiding refitting?
-
-        soln = solve_sqrt_lasso(X, Y, weights=feature_weights, quadratic=quadratic, solve_args=solve_args)[0]
-
-        # find active set, and estimate of sigma
-
-        active = (soln != 0)
-        nactive = active.sum()
-
-        if nactive:
-
-            subgrad = np.sign(soln[active]) * feature_weights[active]
-            X_E = X[:,active]
-            X_Ei = np.linalg.pinv(X_E)
-            sigma_E = np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)
-            multiplier = np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))
-
-            # check truncation interval for sigma_E
-
-            # the KKT conditions imply an inequality like
-            # \hat{\sigma}_E \cdot LHS \leq RHS
-
-            penalized = feature_weights[active] != 0
-
-            if penalized.sum():
-                D_E = np.sign(soln[active][penalized]) # diagonal matrix of signs
-                LHS = D_E * np.linalg.solve(X_E.T.dot(X_E), subgrad)[penalized]
-                RHS = D_E * X_Ei.dot(Y)[penalized] 
-
-                ratio = RHS / LHS
-
-                group1 = LHS > 0
-                upper_bound = np.inf
-                if group1.sum():
-                    upper_bound = min(upper_bound, np.min(ratio[group1])) # necessarily these will have RHS > 0
-
-                group2 = (LHS <= 0) * (RHS <= 0) # we can ignore the other possibility since this gives a lower bound of 0
-                lower_bound = 0
-                if group2.sum():
-                    lower_bound = max(lower_bound, np.max(ratio[group2]))
-
-                upper_bound /= multiplier
-                lower_bound /= multiplier
-
-            else:
-                lower_bound = 0
-                upper_bound = np.inf
-
-            _sigma_estimator_args = (sigma_E, 
-                                     n - nactive,
-                                     lower_bound, 
-                                     upper_bound)
-
-            if sigma_estimate == 'truncated':
-                _sigma_hat = estimate_sigma(*_sigma_estimator_args)
-            elif sigma_estimate == 'OLS':
-                _sigma_hat = sigma_E
-            else:
-                raise ValueError('sigma_estimate must be one of ["truncated", "OLS"]')
-        else:
-            _sigma_hat = np.linalg.norm(Y) / np.sqrt(n)
-            multiplier = np.sqrt(n)
-            sigma_E = _sigma_hat
-
-        # XXX how should quadratic be changed?
-        # multiply everything by sigma_E?
-
-        if quadratic is not None:
-            qc = quadratic.collapsed()
-            qc.coef *= np.sqrt(n - nactive) / sigma_E
-            qc.linear_term *= np.sqrt(n - nactive) / sigma_E
-            quadratic = qc
-
-        loglike = rr.glm.gaussian(X, Y, quadratic=quadratic)
-
-        L = lasso(loglike, feature_weights * multiplier * sigma_E,
-                  parametric_cov_estimator=parametric_cov_estimator,
-                  ignore_inactive_constraints=True)
-
-        # these arguments are reused for data carving
-
-        if nactive:
-            L._sigma_hat = _sigma_hat
-            L._sigma_estimator_args = _sigma_estimator_args
-            L._weight_multiplier = multiplier * sigma_E
-            L._multiplier = multiplier
-            L.lasso_solution = soln
-
-        return L
-
-
 class step(lasso):
 
     r"""
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 77225441b..35b546bf8 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -466,16 +466,6 @@ def subsample_diff(m, n, indices):
 
         return bootstrap_score
 
-
-class glm_group_lasso_parametric(M_estimator):
-
-    # this setup_sampler returns only the active set
-
-    def setup_sampler(self):
-
-        return self.selection_variable['variables']
-
-
 class glm_greedy_step(greedy_score_step, glm):
 
     # XXX this makes the assumption that our
@@ -500,24 +490,6 @@ def setup_sampler(self):
         return bootstrap_score
 
 
-class fixedX_group_lasso(M_estimator):
-
-    def __init__(self, X, Y, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
-        loss = glm.gaussian(X, Y)
-        M_estimator.__init__(self,
-                             loss, 
-                             epsilon, 
-                             penalty, 
-                             randomization, solve_args=solve_args)
-
-    def setup_sampler(self):
-
-        X, Y = self.loss.data
-
-        bootstrap_score = resid_bootstrap(self.loss,
-                                          self.selection_variable['variables'],
-                                          ~self.selection_variable['variables'])[0]
-        return bootstrap_score
 
 # Methods to form appropriate covariances
 
diff --git a/selection/randomized/group_lasso.py b/selection/randomized/group_lasso.py
new file mode 100644
index 000000000..e45424d31
--- /dev/null
+++ b/selection/randomized/group_lasso.py
@@ -0,0 +1,690 @@
+from __future__ import print_function
+import functools
+from copy import copy
+
+import numpy as np
+import scipy
+from scipy import matrix
+
+import regreg.api as rr
+import regreg.affine as ra
+
+from .query import query, optimization_sampler
+from .reconstruction import reconstruct_full_from_internal
+from .randomization import split
+
+class M_estimator(query):
+
+    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
+        """
+        Fits the logistic regression to a candidate active set, without penalty.
+        Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
+
+        Computes $\bar{\beta}_E$ which is the restricted 
+        M-estimator (i.e. subject to the constraint $\beta_{-E}=0$).
+
+        Parameters:
+        -----------
+
+        active: np.bool
+            The active set from fitting the logistic lasso
+
+        solve_args: dict
+            Arguments to be passed to regreg solver.
+
+        Returns:
+        --------
+
+        None
+
+        Notes:
+        ------
+
+        Sets self._beta_unpenalized which will be used in the covariance matrix calculation.
+        Also computes Hessian of loss at restricted M-estimator as well as the bootstrap covariance.
+
+        """
+
+        query.__init__(self, randomization)
+
+        (self.loss,
+         self.epsilon,
+         self.penalty,
+         self.randomization,
+         self.solve_args) = (loss,
+                             epsilon,
+                             penalty,
+                             randomization,
+                             solve_args)
+         
+    # Methods needed for subclassing a query
+
+    def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
+
+        self.randomize()
+
+        (loss,
+         randomized_loss,
+         epsilon,
+         penalty,
+         randomization,
+         solve_args) = (self.loss,
+                        self.randomized_loss, 
+                        self.epsilon,
+                        self.penalty,
+                        self.randomization,
+                        self.solve_args)
+
+        # initial solution
+
+        problem = rr.simple_problem(randomized_loss, penalty)
+        self.initial_soln = problem.solve(**solve_args)
+
+        # find the active groups and their direction vectors
+        # as well as unpenalized groups
+
+        groups = np.unique(penalty.groups) 
+        active_groups = np.zeros(len(groups), np.bool)
+        unpenalized_groups = np.zeros(len(groups), np.bool)
+
+        active_directions = []
+        active = np.zeros(loss.shape, np.bool)
+        unpenalized = np.zeros(loss.shape, np.bool)
+
+        initial_scalings = []
+
+        active_directions_list = [] ## added for group lasso
+        active_penalty = []
+        for i, g in enumerate(groups):
+            group = penalty.groups == g
+            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) > 1.e-6 * penalty.weights[g]) and (penalty.weights[g] > 0)
+            unpenalized_groups[i] = (penalty.weights[g] == 0)
+            if active_groups[i]:
+                active[group] = True
+                z = np.zeros(active.shape, np.float)
+                z[group] = self.initial_soln[group] / np.linalg.norm(self.initial_soln[group])
+                active_directions.append(z)
+                active_directions_list.append(z[group]) ## added for group lasso
+                active_penalty.append(penalty.weights[g]) ## added
+                initial_scalings.append(np.linalg.norm(self.initial_soln[group]))
+            if unpenalized_groups[i]:
+                unpenalized[group] = True
+
+        self.active_penalty = active_penalty
+
+        # solve the restricted problem
+
+        self._overall = active + unpenalized > 0
+        self._inactive = ~self._overall
+        self._unpenalized = unpenalized
+
+        self.active_directions_list = active_directions_list ## added for group lasso
+        self._active_directions = np.array(active_directions).T
+        self._active_groups = np.array(active_groups, np.bool)
+        self._unpenalized_groups = np.array(unpenalized_groups, np.bool)
+
+        self.selection_variable = {'groups':self._active_groups, 
+                                   'variables':self._overall,
+                                   'directions':self._active_directions}
+
+        # initial state for opt variables
+
+        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') + 
+                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad')) 
+                          # the quadratic of a smooth_atom is not included in computing the smooth_objective
+        self.initial_subgrad = initial_subgrad
+        initial_subgrad = initial_subgrad[self._inactive]
+        initial_unpenalized = self.initial_soln[self._unpenalized]
+        self.observed_opt_state = np.concatenate([initial_scalings,
+                                                  initial_unpenalized,
+                                                  initial_subgrad], axis=0)
+
+        # set the _solved bit
+
+        self._solved = True
+
+        # Now setup the pieces for linear decomposition
+
+        (loss,
+         epsilon,
+         penalty,
+         initial_soln,
+         overall,
+         inactive,
+         unpenalized,
+         active_groups,
+         active_directions) = (self.loss,
+                               self.epsilon,
+                               self.penalty,
+                               self.initial_soln,
+                               self._overall,
+                               self._inactive,
+                               self._unpenalized,
+                               self._active_groups,
+                               self._active_directions)
+
+        # scaling should be chosen to be Lipschitz constant for gradient of Gaussian part
+
+        # we are implicitly assuming that
+        # loss is a pairs model
+
+        self.scaling = scaling
+        _sqrt_scaling = np.sqrt(self.scaling)
+
+        _beta_unpenalized = restricted_Mest(loss, overall, solve_args=solve_args)
+
+        beta_full = np.zeros(overall.shape)
+        beta_full[overall] = _beta_unpenalized
+        #_hessian = loss.hessian(beta_full)
+        self._beta_full = beta_full
+
+        # observed state for score in internal coordinates
+
+        self.observed_internal_state = np.hstack([_beta_unpenalized * _sqrt_scaling,
+                                                  -loss.smooth_objective(beta_full, 'grad')[inactive] / _sqrt_scaling])
+
+        # form linear part
+        self.num_opt_var = self.observed_opt_state.shape[0]
+        p = loss.shape[0] # shorthand for p
+
+        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
+        # E for active
+        # U for unpenalized
+        # -E for inactive
+
+        _opt_linear_term = np.zeros((p, self._active_groups.sum() + unpenalized.sum() + inactive.sum()))
+        _score_linear_term = np.zeros((p, p))
+
+        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
+
+        Mest_slice = slice(0, overall.sum())
+        X, y = loss.data
+        W = self.loss.saturated_loss.hessian(X.dot(beta_full))
+        _Mest_hessian = np.dot(X.T, X[:, overall] * W[:, None])
+        self._Mest_hessian = _Mest_hessian
+        _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
+
+        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
+
+        null_idx = range(overall.sum(), p)
+        inactive_idx = np.nonzero(inactive)[0]
+        for _i, _n in zip(inactive_idx, null_idx):
+            _score_linear_term[_i,_n] = -_sqrt_scaling
+
+        # c_E piece 
+
+        scaling_slice = slice(0, active_groups.sum())
+        if len(active_directions)==0:
+            _opt_hessian=0
+        else:
+            _opt_hessian = np.dot(_Mest_hessian, active_directions[overall]) + epsilon * active_directions
+        _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling
+
+        self.observed_opt_state[scaling_slice] *= _sqrt_scaling
+
+        # beta_U piece
+
+        unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
+        unpenalized_directions = np.identity(p)[:,unpenalized]
+        if unpenalized.sum():
+            _opt_linear_term[:, unpenalized_slice] = (np.dot(_Mest_hessian, unpenalized_directions[overall])
+                                                      + epsilon * unpenalized_directions) / _sqrt_scaling
+        self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling
+
+        # subgrad piece
+
+        subgrad_idx = range(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
+        subgrad_slice = slice(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
+        for _i, _s in zip(inactive_idx, subgrad_idx):
+            _opt_linear_term[_i,_s] = _sqrt_scaling
+
+        self.observed_opt_state[subgrad_idx] /= _sqrt_scaling
+
+        # form affine part
+
+        _opt_affine_term = np.zeros(p)
+        idx = 0
+        groups = np.unique(penalty.groups) 
+        for i, g in enumerate(groups):
+            if active_groups[i]:
+                group = penalty.groups == g
+                _opt_affine_term[group] = active_directions[:,idx][group] * penalty.weights[g]
+                idx += 1
+
+        # two transforms that encode score and optimization
+        # variable roles 
+
+        # later, we will modify `score_transform`
+        # in `linear_decomposition`
+
+        self.opt_transform = (_opt_linear_term, _opt_affine_term)
+        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
+
+        # now store everything needed for the projections
+        # the projection acts only on the optimization
+        # variables
+
+        self.scaling_slice = scaling_slice
+
+        # weights are scaled here because the linear terms scales them by scaling
+
+        new_groups = penalty.groups[inactive]
+        new_weights = dict([(g, penalty.weights[g] / _sqrt_scaling) for g in penalty.weights.keys() if g in np.unique(new_groups)])
+
+        # we form a dual group lasso object
+        # to do the projection
+
+        self.group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, bound=1.)
+        self.subgrad_slice = subgrad_slice
+
+        self._setup = True
+        self._marginalize_subgradient = False
+        self.scaling_slice = scaling_slice
+        self.unpenalized_slice = unpenalized_slice
+        self.ndim = loss.shape[0]
+
+        self.nboot = nboot
+
+    def get_sampler(self):
+        # setup the default optimization sampler
+
+        if not hasattr(self, "_sampler"):
+
+            def projection(group_lasso_dual, subgrad_slice, scaling_slice, opt_state):
+                """
+                Full projection for Langevin.
+
+                The state here will be only the state of the optimization variables.
+                """
+
+                new_state = opt_state.copy() # not really necessary to copy
+                new_state[scaling_slice] = np.maximum(opt_state[scaling_slice], 0)
+                new_state[subgrad_slice] = group_lasso_dual.bound_prox(opt_state[subgrad_slice])
+                return new_state
+
+            projection = functools.partial(projection, self.group_lasso_dual, self.subgrad_slice, self.scaling_slice)
+
+            def grad_log_density(query,
+                                 opt_linear,
+                                 rand_gradient,
+                                 internal_state,
+                                 opt_state):
+                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                return opt_linear.T.dot(rand_gradient(full_state).T)
+
+            grad_log_density = functools.partial(grad_log_density, self, self.opt_transform[0], self.randomization.gradient)
+
+            def log_density(query,
+                            opt_linear,
+                            rand_log_density,
+                            internal_state,
+                            opt_state):
+                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                return rand_log_density(full_state)
+
+            log_density = functools.partial(log_density, self, self.opt_transform[0], self.randomization.log_density)
+
+            self._sampler = optimization_sampler(self.observed_opt_state,
+                                                 self.observed_internal_state.copy(),
+                                                 self.score_transform,
+                                                 self.opt_transform,
+                                                 projection,
+                                                 grad_log_density,
+                                                 log_density)
+        return self._sampler
+
+    sampler = property(get_sampler, query.set_sampler)
+
+
+    def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=None):
+        """
+        ADD DOCSTRING
+
+        conditioning_groups and marginalizing_groups should be disjoint
+        """
+
+        groups = np.unique(self.penalty.groups)
+        condition_inactive_groups = np.zeros_like(groups, dtype=bool)
+
+        if conditioning_groups is None:
+            conditioning_groups = np.zeros_like(groups, dtype=np.bool)
+
+        if marginalizing_groups is None:
+            marginalizing_groups = np.zeros_like(groups, dtype=np.bool)
+
+        if np.any(conditioning_groups * marginalizing_groups):
+            raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
+
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
+
+        condition_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
+        moving_inactive_groups = np.zeros_like(groups, dtype=bool)
+        moving_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
+        _inactive_groups = ~(self._active_groups+self._unpenalized)
+
+        inactive_marginal_groups = np.zeros_like(self._inactive, dtype=bool)
+        limits_marginal_groups = np.zeros_like(self._inactive, np.float)
+
+        for i, g in enumerate(groups):
+            if (_inactive_groups[i]) and conditioning_groups[i]:
+                group = self.penalty.groups == g
+                condition_inactive_groups[i] = True
+                condition_inactive_variables[group] = True
+            elif (_inactive_groups[i]) and (~conditioning_groups[i]) and (~marginalizing_groups[i]):
+                group = self.penalty.groups == g
+                moving_inactive_groups[i] = True
+                moving_inactive_variables[group] = True
+            if (_inactive_groups[i]) and marginalizing_groups[i]:
+                group = self.penalty.groups == g
+                inactive_marginal_groups[i] = True
+                limits_marginal_groups[i] = self.penalty.weights[g]
+
+        opt_linear, opt_offset = self.opt_transform
+
+        new_linear = np.zeros((opt_linear.shape[0], (self._active_groups.sum() +
+                                                     self._unpenalized_groups.sum() +
+                                                     moving_inactive_variables.sum())))
+        new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice]
+        new_linear[:, self.unpenalized_slice] = opt_linear[:, self.unpenalized_slice]
+
+        inactive_moving_idx = np.nonzero(moving_inactive_variables)[0]
+        subgrad_idx = range(self._active_groups.sum() + self._unpenalized.sum(),
+                            self._active_groups.sum() + self._unpenalized.sum() +
+                            moving_inactive_variables.sum())
+        subgrad_slice = subgrad_idx
+        for _i, _s in zip(inactive_moving_idx, subgrad_idx):
+            new_linear[_i, _s] = 1.
+
+        observed_opt_state = self.observed_opt_state[:(self._active_groups.sum() +
+                                                       self._unpenalized_groups.sum() +
+                                                       moving_inactive_variables.sum())]
+        observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive_variables]
+
+        condition_linear = np.zeros((opt_linear.shape[0], (self._active_groups.sum() +
+                                                           self._unpenalized_groups.sum() +
+                                                           condition_inactive_variables.sum())))
+        inactive_condition_idx = np.nonzero(condition_inactive_variables)[0]
+        subgrad_condition_idx = range(self._active_groups.sum() + self._unpenalized.sum(),
+                                      self._active_groups.sum() + self._unpenalized.sum() + condition_inactive_variables.sum())
+
+        for _i, _s in zip(inactive_condition_idx, subgrad_condition_idx):
+            condition_linear[_i, _s] = 1.
+
+        new_offset = condition_linear[:,subgrad_condition_idx].dot(self.initial_subgrad[condition_inactive_variables]) + opt_offset
+
+        new_opt_transform = (new_linear, new_offset)
+
+        print("limits marginal groups", limits_marginal_groups)
+        print("inactive marginal groups", inactive_marginal_groups)
+
+        def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, inactive_marginal_groups):
+            return (np.divide(_pdf(full_state_plus) - _pdf(full_state_minus),
+                              _cdf(full_state_plus) - _cdf(full_state_minus)))[inactive_marginal_groups]
+
+        def new_grad_log_density(query, 
+                                 limits_marginal_groups,
+                                 inactive_marginal_groups,
+                                 _cdf,
+                                 _pdf,
+                                 opt_linear,
+                                 deriv_log_dens,
+                                 internal_state, 
+                                 opt_state):
+
+            full_state = reconstruct_full_from_internal(new_opt_transform, query.score_transform, internal_state, opt_state)
+
+            p = query.penalty.shape[0]
+            weights = np.zeros(p)
+
+            if inactive_marginal_groups.sum()>0:
+                full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
+                full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
+                weights[inactive_marginal_groups] = _fraction(_cdf, _pdf, full_state_plus, full_state_minus, inactive_marginal_groups)
+            weights[~inactive_marginal_groups] = deriv_log_dens(full_state)[~inactive_marginal_groups]
+            return -opt_linear.T.dot(weights)
+
+        new_grad_log_density = functools.partial(new_grad_log_density,
+                                                 self,
+                                                 limits_marginal_groups,
+                                                 inactive_marginal_groups,
+                                                 self.randomization._cdf,
+                                                 self.randomization._pdf,
+                                                 new_opt_transform[0],
+                                                 self.randomization._derivative_log_density)
+
+        def new_log_density(query, 
+                            limits_marginal_groups,
+                            inactive_marginal_groups,
+                            _cdf,
+                            _pdf,
+                            opt_linear,
+                            log_dens,
+                            internal_state, 
+                            opt_state):
+
+            full_state = reconstruct_full_from_internal(new_opt_transform,
+                                                        query.score_transform,
+                                                        internal_state,
+                                                        opt_state)
+            full_state = np.atleast_2d(full_state)
+            p = query.penalty.shape[0]
+            logdens = np.zeros(full_state.shape[0])
+
+            if inactive_marginal_groups.sum()>0:
+                full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
+                full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
+                logdens += np.sum(np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:,inactive_marginal_groups], axis=1)
+
+            logdens += log_dens(full_state[:,~inactive_marginal_groups])
+
+            return np.squeeze(logdens) # should this be negative to match the gradient log density?
+
+        new_log_density = functools.partial(new_log_density,
+                                            self,
+                                            limits_marginal_groups,
+                                            inactive_marginal_groups,
+                                            self.randomization._cdf,
+                                            self.randomization._pdf,
+                                            self.opt_transform[0],
+                                            self.randomization._log_density)
+
+        new_groups = self.penalty.groups[moving_inactive_groups]
+        _sqrt_scaling = np.sqrt(self.scaling)
+        new_weights = dict([(g, self.penalty.weights[g] / _sqrt_scaling) for g in self.penalty.weights.keys() if g in np.unique(new_groups)])
+        new_group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, bound=1.)
+
+        def new_projection(group_lasso_dual,
+                           noverall,
+                           opt_state):
+            new_state = opt_state.copy()
+            new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
+            new_state[noverall:] = group_lasso_dual.bound_prox(opt_state[noverall:])
+            return new_state
+
+        new_projection = functools.partial(new_projection,
+                                           new_group_lasso_dual,
+                                           self._overall.sum())
+                                           
+        new_selection_variable = copy(self.selection_variable)
+        new_selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
+
+        self.sampler = optimization_sampler(observed_opt_state,
+                                            self.observed_internal_state.copy(),
+                                            self.score_transform,
+                                            new_opt_transform,
+                                            new_projection,
+                                            new_grad_log_density,
+                                            new_log_density,
+                                            selection_info=(self, new_selection_variable))
+
+    def condition_on_scalings(self):
+        """
+        Maybe we should allow subgradients of only some variables...
+        """
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
+
+        opt_linear, opt_offset = self.opt_transform
+        
+        new_offset = opt_linear[:,self.scaling_slice].dot(self.observed_opt_state[self.scaling_slice]) + opt_offset
+        new_linear = opt_linear[:,self.subgrad_slice]
+
+        self.opt_transform = (new_linear, new_offset)
+
+        # for group LASSO this will induce a bigger jacobian
+        self.selection_variable['scalings'] = self.observed_opt_state[self.scaling_slice]
+
+        # reset slices 
+
+        self.observed_opt_state = self.observed_opt_state[self.subgrad_slice]
+        self.subgrad_slice = slice(None, None, None)
+        self.scaling_slice = np.zeros(new_linear.shape[1], np.bool)
+        self.num_opt_var = new_linear.shape[1]
+
+#     def grad_log_density(self, internal_state, opt_state):
+#         """
+#             marginalizing over the sub-gradient
+
+#             full_state is 
+#             density should be expressed in terms of opt_state coordinates
+#         """
+
+#         if not self._setup:
+#             raise ValueError('setup_sampler should be called before using this function')
+
+#         if self._marginalize_subgradient:
+
+#             full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
+
+#             p = self.penalty.shape[0]
+#             weights = np.zeros(p)
+
+#             if self.inactive_marginal_groups.sum()>0:
+#                 full_state_plus = full_state + np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
+#                 full_state_minus = full_state - np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
+
+
+#             def fraction(full_state_plus, full_state_minus, inactive_marginal_groups):
+#                 return (np.divide(self.randomization._pdf(full_state_plus) - self.randomization._pdf(full_state_minus),
+#                        self.randomization._cdf(full_state_plus) - self.randomization._cdf(full_state_minus)))[inactive_marginal_groups]
+
+#             if self.inactive_marginal_groups.sum() > 0:
+#                 weights[self.inactive_marginal_groups] = fraction(full_state_plus, full_state_minus, self.inactive_marginal_groups)
+#             weights[~self.inactive_marginal_groups] = self.randomization._derivative_log_density(full_state)[~self.inactive_marginal_groups]
+
+#             opt_linear = self.opt_transform[0]
+#             return -opt_linear.T.dot(weights)
+#         else:
+#             return query.grad_log_density(self, internal_state, opt_state)
+
+def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
+    """
+    Fit a restricted model using only columns `active`.
+
+    Parameters
+    ----------
+
+    Mest_loss : objective function
+        A GLM loss.
+
+    active : ndarray
+        Which columns to use.
+
+    solve_args : dict
+        Passed to `solve`.
+
+    Returns
+    -------
+
+    soln : ndarray
+        Solution to restricted problem.
+
+    """
+    X, Y = Mest_loss.data
+
+    if not Mest_loss._is_transform and hasattr(Mest_loss, 'saturated_loss'): # M_est is a glm
+        X_restricted = X[:,active]
+        loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
+    else:
+        I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
+        loss_restricted = rr.affine_smooth(Mest_loss, I_restricted.T)
+    beta_E = loss_restricted.solve(**solve_args)
+    
+    return beta_E
+
+class M_estimator_split(M_estimator):
+
+    def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10}):
+
+        total_size = loss.saturated_loss.shape[0]
+        self.randomization = split(loss.shape, subsample_size, total_size)
+
+        M_estimator.__init__(self, loss, epsilon, penalty, self.randomization, solve_args=solve_args)
+
+        total_size = loss.saturated_loss.shape[0]
+        if subsample_size > total_size:
+            raise ValueError('subsample size must be smaller than total sample size')
+
+        self.total_size, self.subsample_size = total_size, subsample_size
+        
+
+class M_estimator_group_lasso(M_estimator):
+
+    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
+
+        M_estimator.__init__(self, loss, epsilon, penalty, randomization, solve_args=solve_args)
+
+        self.Q = self._Mest_hessian[self._overall,:] + epsilon * np.identity(self._overall.sum())
+        self.Qinv = np.linalg.inv(self.Q)
+        self.form_VQLambda()
+
+    def form_VQLambda(self):
+        nactive_groups = len(self.active_directions_list)
+        nactive_vars = sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
+        V = np.zeros((nactive_vars, nactive_vars - nactive_groups))
+
+        Lambda = np.zeros((nactive_vars, nactive_vars))
+        temp_row, temp_col = 0, 0
+        for g in range(len(self.active_directions_list)):
+            size_curr_group = self.active_directions_list[g].shape[0]
+
+            Lambda[temp_row:(temp_row + size_curr_group), temp_row:(temp_row + size_curr_group)] \
+                = self.active_penalty[g] * np.identity(size_curr_group)
+
+            def null(A, eps=1e-12):
+                u, s, vh = np.linalg.svd(A)
+                padding = max(0, np.shape(A)[1] - np.shape(s)[0])
+                null_mask = np.concatenate(((s <= eps), np.ones((padding,), dtype=bool)), axis=0)
+                null_space = scipy.compress(null_mask, vh, axis=0)
+                return scipy.transpose(null_space)
+
+            V_g = null(matrix(self.active_directions_list[g]))
+            V[temp_row:(temp_row + V_g.shape[0]), temp_col:(temp_col + V_g.shape[1])] = V_g
+            temp_row += V_g.shape[0]
+            temp_col += V_g.shape[1]
+        self.VQLambda = np.dot(np.dot(V.T, self.Qinv), Lambda.dot(V))
+
+        return self.VQLambda
+
+    def derivative_logdet_jacobian(self, scalings):
+        nactive_groups = len(self.active_directions_list)
+        nactive_vars = np.sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
+        from scipy.linalg import block_diag
+        matrix_list = [scalings[i] * np.identity(self.active_directions_list[i].shape[0] - 1) for i in
+                       range(scalings.shape[0])]
+        Gamma_minus = block_diag(*matrix_list)
+        jacobian_inv = np.linalg.inv(Gamma_minus + self.VQLambda)
+
+        group_sizes = [self._active_directions[i].shape[0] for i in range(nactive_groups)]
+        group_sizes_cumsum = np.concatenate(([0], np.array(group_sizes).cumsum()))
+
+        jacobian_inv_blocks = [
+            jacobian_inv[group_sizes_cumsum[i]:group_sizes_cumsum[i + 1],
+            group_sizes_cumsum[i]:group_sizes_cumsum[i + 1]]
+            for i in range(nactive_groups)]
+
+        der = np.zeros(self.observed_opt_state.shape[0])
+        der[self.scaling_slice] = np.array([np.matrix.trace(jacobian_inv_blocks[i]) for i in range(scalings.shape[0])])
+        return der
+
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
new file mode 100644
index 000000000..b30234fe5
--- /dev/null
+++ b/selection/randomized/lasso.py
@@ -0,0 +1,1419 @@
+from __future__ import print_function
+import functools
+from copy import copy
+
+import numpy as np
+import scipy
+from scipy import matrix
+
+import regreg.api as rr
+import regreg.affine as ra
+
+from ..constraints.affine import constraints
+
+from .query import (query, 
+                    multiple_queries,
+                    langevin_sampler,
+                    affine_gaussian_sampler)
+
+from .reconstruction import reconstruct_full_from_internal
+from .randomization import split, randomization
+from .glm import (pairs_bootstrap_glm,
+                  glm_nonparametric_bootstrap)
+
+class lasso_view(query):
+
+    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
+        """
+        Fits the logistic regression to a candidate active set, without penalty.
+        Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
+
+        Computes $\bar{\beta}_E$ which is the restricted 
+        M-estimator (i.e. subject to the constraint $\beta_{-E}=0$).
+
+        Parameters:
+        -----------
+
+        active: np.bool
+            The active set from fitting the logistic lasso
+
+        solve_args: dict
+            Arguments to be passed to regreg solver.
+
+        Returns:
+        --------
+
+        None
+
+        Notes:
+        ------
+
+        Sets self._beta_unpenalized which will be used in the covariance matrix calculation.
+        Also computes Hessian of loss at restricted M-estimator as well as the bootstrap covariance.
+
+        """
+
+        query.__init__(self, randomization)
+
+        (self.loss,
+         self.epsilon,
+         self.penalty,
+         self.randomization,
+         self.solve_args) = (loss,
+                             epsilon,
+                             penalty,
+                             randomization,
+                             solve_args)
+         
+    # Methods needed for subclassing a query
+
+    def solve(self, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
+
+        self.randomize()
+
+        (loss,
+         randomized_loss,
+         epsilon,
+         penalty,
+         randomization,
+         solve_args) = (self.loss,
+                        self.randomized_loss, 
+                        self.epsilon,
+                        self.penalty,
+                        self.randomization,
+                        self.solve_args)
+
+        # initial solution
+
+        p = penalty.shape[0]
+
+        problem = rr.simple_problem(randomized_loss, penalty)
+        self.initial_soln = problem.solve(**solve_args)
+
+        # find the active groups and their direction vectors
+        # as well as unpenalized groups
+
+        active_signs = np.sign(self.initial_soln)
+        active = self._active = active_signs != 0
+
+        if isinstance(penalty, rr.l1norm):
+            self._lagrange = penalty.lagrange * np.ones(p)
+            unpenalized = np.zeros(p, np.bool)
+        elif isinstance(penalty, rr.weighted_l1norm):
+            self._lagrange = penalty.weights
+            unpenalized = self._lagrange == 0
+        else:
+            raise ValueError('penalty must be `l1norm` or `weighted_l1norm`')
+
+        active *= ~unpenalized
+
+        # solve the restricted problem
+
+        self._overall = (active + unpenalized) > 0
+        self._inactive = ~self._overall
+        self._unpenalized = unpenalized
+
+        _active_signs = active_signs.copy()
+        _active_signs[unpenalized] = np.nan # don't release sign of unpenalized variables
+        self.selection_variable = {'sign':_active_signs,
+                                   'variables':self._overall}
+
+        # initial state for opt variables
+
+        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') + 
+                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad')) 
+                          # the quadratic of a smooth_atom is not included in computing the smooth_objective
+        self.initial_subgrad = initial_subgrad
+
+        initial_scalings = np.fabs(self.initial_soln[active])
+        initial_subgrad = initial_subgrad[self._inactive]
+        initial_unpenalized = self.initial_soln[self._unpenalized]
+
+        self.observed_opt_state = np.concatenate([initial_scalings,
+                                                  initial_unpenalized,
+                                                  initial_subgrad], axis=0)
+
+        # set the _solved bit
+
+        self._solved = True
+
+        # Now setup the pieces for linear decomposition
+
+        (loss,
+         epsilon,
+         penalty,
+         initial_soln,
+         overall,
+         inactive,
+         unpenalized) = (self.loss,
+                         self.epsilon,
+                         self.penalty,
+                         self.initial_soln,
+                         self._overall,
+                         self._inactive,
+                         self._unpenalized)
+
+        # we are implicitly assuming that
+        # loss is a pairs model
+
+        _beta_unpenalized = restricted_estimator(loss, overall, solve_args=solve_args)
+
+        beta_bar = np.zeros(p)
+        beta_bar[overall] = _beta_unpenalized
+        self._beta_full = beta_bar
+
+        # observed state for score in internal coordinates
+
+        self.observed_internal_state = np.hstack([_beta_unpenalized,
+                                                  -loss.smooth_objective(beta_bar, 'grad')[inactive]])
+
+        # form linear part
+
+        self.num_opt_var = self.observed_opt_state.shape[0]
+
+        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
+        # E for active
+        # U for unpenalized
+        # -E for inactive
+
+        _opt_linear_term = np.zeros((p, p))
+        _score_linear_term = np.zeros((p, p))
+
+        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
+
+        est_slice = slice(0, overall.sum())
+        X, y = loss.data
+        W = self.loss.saturated_loss.hessian(X.dot(beta_bar))
+        _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
+        _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
+        #self._hessian = _hessian
+        _score_linear_term[:, est_slice] = -np.hstack([_hessian_active, _hessian_unpen])
+
+        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
+
+        null_idx = np.arange(overall.sum(), p)
+        inactive_idx = np.nonzero(inactive)[0]
+        for _i, _n in zip(inactive_idx, null_idx):
+            _score_linear_term[_i,_n] = -1
+
+        # c_E piece 
+
+        def signed_basis_vector(p, j, s):
+            v = np.zeros(p)
+            v[j] = s
+            return v
+
+        active_directions = np.array([signed_basis_vector(p, j, active_signs[j]) for j in np.nonzero(active)[0]]).T
+
+        scaling_slice = slice(0, active.sum())
+        if np.sum(active) == 0:
+            _opt_hessian = 0
+        else:
+            _opt_hessian = _hessian_active * active_signs[None, active] + epsilon * active_directions
+        _opt_linear_term[:, scaling_slice] = _opt_hessian
+
+        # beta_U piece
+
+        unpenalized_slice = slice(active.sum(), active.sum() + unpenalized.sum())
+        unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
+        if unpenalized.sum():
+            _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
+                                                      + epsilon * unpenalized_directions) 
+
+        # subgrad piece
+
+        subgrad_idx = range(active.sum() + unpenalized.sum(), active.sum() + inactive.sum() + unpenalized.sum())
+        subgrad_slice = slice(active.sum() + unpenalized.sum(), active.sum() + inactive.sum() + unpenalized.sum())
+        for _i, _s in zip(inactive_idx, subgrad_idx):
+            _opt_linear_term[_i,_s] = 1
+
+        # form affine part
+
+        _opt_affine_term = np.zeros(p)
+        idx = 0
+        if np.asarray(penalty.lagrange).shape in [(), (1,)]:
+            _opt_affine_term[active] = active_signs[active] * penalty.lagrange
+            
+        else:
+            _opt_affine_term[active] = active_signs[active] * penalty.lagrange[active]
+
+        # two transforms that encode score and optimization
+        # variable roles 
+
+        # later, we will modify `score_transform`
+        # in `linear_decomposition`
+
+        self.opt_transform = (_opt_linear_term, _opt_affine_term)
+        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
+
+        # now store everything needed for the projections
+        # the projection acts only on the optimization
+        # variables
+
+        # we form a dual group lasso object
+        # to do the projection
+
+
+        self._setup = True
+        self.subgrad_slice = subgrad_slice
+        self.scaling_slice = scaling_slice
+        self.unpenalized_slice = unpenalized_slice
+        self.ndim = loss.shape[0]
+
+        self.nboot = nboot
+
+    def get_sampler(self):
+        # setup the default optimization sampler
+
+        if not hasattr(self, "_sampler"):
+
+            penalty, inactive = self.penalty, self._inactive
+            inactive_lagrange = self.penalty.weights[inactive]
+
+            if not hasattr(self.randomization, "cov_prec"): # means randomization is not Gaussian
+
+                dual = rr.weighted_supnorm(1. / inactive_lagrange, bound=1.)
+
+                def projection(dual, subgrad_slice, scaling_slice, opt_state):
+                    """
+                    Full projection for Langevin.
+
+                    The state here will be only the state of the optimization variables.
+                    """
+
+                    new_state = opt_state.copy() # not really necessary to copy
+                    new_state[scaling_slice] = np.maximum(opt_state[scaling_slice], 0)
+                    new_state[subgrad_slice] = dual.bound_prox(opt_state[subgrad_slice])
+                    return new_state
+
+                projection = functools.partial(projection, dual, self.subgrad_slice, self.scaling_slice)
+
+                def grad_log_density(query,
+                                     opt_linear,
+                                     rand_gradient,
+                                     internal_state,
+                                     opt_state):
+                    full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                    return opt_linear.T.dot(rand_gradient(full_state).T)
+
+                grad_log_density = functools.partial(grad_log_density, self, self.opt_transform[0], self.randomization.gradient)
+
+                def log_density(query,
+                                opt_linear,
+                                rand_log_density,
+                                internal_state,
+                                opt_state):
+                    full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                    return rand_log_density(full_state)
+
+                log_density = functools.partial(log_density, self, self.opt_transform[0], self.randomization.log_density)
+
+                self._sampler = langevin_sampler(self.observed_opt_state,
+                                                 self.observed_internal_state.copy(),
+                                                 self.score_transform,
+                                                 self.opt_transform,
+                                                 projection,
+                                                 grad_log_density,
+                                                 log_density)
+            else:
+
+                # compute implied mean and covariance
+
+                cov, prec = self.randomization.cov_prec
+                opt_linear, opt_offset = self.opt_transform
+                score_linear, score_offset = self.score_transform
+                cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
+                cond_cov = np.linalg.inv(cond_precision)
+
+                offset = reconstruct_full_from_internal(self.opt_transform, 
+                                                        self.score_transform, 
+                                                        self.observed_internal_state, 
+                                                        np.zeros(opt_linear.shape[1]))
+                cond_mean = cond_cov.dot(opt_linear.T.dot(prec.dot(offset)))
+
+                # need a log_density function
+                # the conditional density of opt variables
+                # given the score
+
+                logdens_offset = cond_cov.dot(opt_linear.T.dot(prec.dot(score_offset + opt_offset)))
+                logdens_linear = cond_cov.dot(opt_linear.T.dot(prec.dot(score_linear)))
+
+                def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
+                    mean_term = logdens_linear.dot(score.T).T + logdens_offset
+                    diff = opt - mean_term
+                    return - 0.5 * np.sum(diff * cond_prec.dot(diff.T).T, 1)
+                log_density = functools.partial(log_density, logdens_offset, logdens_linear, cond_precision)
+
+                # now make the constraints
+
+                # scaling constraints
+
+                I = np.identity(cond_cov.shape[0])
+                A_scaling = -I[self.scaling_slice]
+                b_scaling = np.zeros(A_scaling.shape[0])
+
+                A_subgrad = np.vstack([I[self.subgrad_slice],
+                                       -I[self.subgrad_slice]])
+                b_subgrad = np.hstack([inactive_lagrange,
+                                       inactive_lagrange])
+
+                linear_term = np.vstack([A_scaling, A_subgrad])
+                offset = np.hstack([b_scaling, b_subgrad])
+
+                affine_con = constraints(linear_term,
+                                         offset,
+                                         mean=cond_mean,
+                                         covariance=cond_cov)
+
+                self._sampler = affine_gaussian_sampler(affine_con,
+                                                        self.observed_opt_state,
+                                                        self.observed_internal_state,
+                                                        log_density,
+                                                        selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
+
+        return self._sampler
+
+    sampler = property(get_sampler, query.set_sampler)
+
+    def decompose_subgradient(self, condition=None, marginalize=None):
+        """
+        ADD DOCSTRING
+
+        condition and marginalize should be disjoint
+        """
+
+        p = self.penalty.shape[0]
+        condition_inactive = np.zeros(p, dtype=np.bool)
+
+        if condition is None:
+            condition = np.zeros(p, dtype=np.bool)
+
+        if marginalize is None:
+            marginalize = np.zeros(p, dtype=np.bool)
+            marginalize[self._overall] = 0
+
+        if np.any(condition * marginalize):
+            raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
+
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
+
+        _inactive = self._inactive
+
+        limits_marginal = np.zeros_like(_inactive, np.float)
+
+        condition_inactive = _inactive * condition
+        moving_inactive = _inactive * ~(marginalize + condition)
+        margin_inactive = _inactive * marginalize
+
+        limits_marginal = self._lagrange
+        if np.asarray(self._lagrange).shape in [(), (1,)]:
+            limits_marginal = np.zeros_like(_inactive) * self._lagrange
+
+        opt_linear, opt_offset = self.opt_transform
+
+        new_linear = np.zeros((opt_linear.shape[0], (self._active.sum() +
+                                                     self._unpenalized.sum() +
+                                                     moving_inactive.sum())))
+        new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice]
+        new_linear[:, self.unpenalized_slice] = opt_linear[:, self.unpenalized_slice]
+
+        inactive_moving_idx = np.nonzero(moving_inactive)[0]
+        subgrad_idx = range(self._active.sum() + self._unpenalized.sum(),
+                            self._active.sum() + self._unpenalized.sum() +
+                            moving_inactive.sum())
+        subgrad_slice = subgrad_idx
+        for _i, _s in zip(inactive_moving_idx, subgrad_idx):
+            new_linear[_i, _s] = 1.
+
+        observed_opt_state = self.observed_opt_state[:(self._active.sum() +
+                                                       self._unpenalized.sum() +
+                                                       moving_inactive.sum())]
+        observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive]
+
+        condition_linear = np.zeros((opt_linear.shape[0], (self._active.sum() +
+                                                           self._unpenalized.sum() +
+                                                           condition_inactive.sum())))
+        inactive_condition_idx = np.nonzero(condition_inactive)[0]
+        subgrad_condition_idx = range(self._active.sum() + self._unpenalized.sum(),
+                                      self._active.sum() + self._unpenalized.sum() + condition_inactive.sum())
+
+        for _i, _s in zip(inactive_condition_idx, subgrad_condition_idx):
+            condition_linear[_i, _s] = 1.
+
+        new_offset = condition_linear[:,subgrad_condition_idx].dot(self.initial_subgrad[condition_inactive]) + opt_offset
+
+        new_opt_transform = (new_linear, new_offset)
+
+        if not hasattr(self.randomization, "cov_prec") or marginalize.sum(): # use Langevin -- not gaussian
+
+            def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive):
+                return (np.divide(_pdf(full_state_plus) - _pdf(full_state_minus),
+                                  _cdf(full_state_plus) - _cdf(full_state_minus)))[margin_inactive]
+
+            def new_grad_log_density(query, 
+                                     limits_marginal,
+                                     margin_inactive,
+                                     _cdf,
+                                     _pdf,
+                                     opt_linear,
+                                     deriv_log_dens,
+                                     internal_state, 
+                                     opt_state):
+
+                full_state = reconstruct_full_from_internal(new_opt_transform, query.score_transform, internal_state, opt_state)
+
+                p = query.penalty.shape[0]
+                weights = np.zeros(p)
+
+                if margin_inactive.sum()>0:
+                    full_state_plus = full_state + limits_marginal * margin_inactive
+                    full_state_minus = full_state - limits_marginal * margin_inactive
+                    weights[margin_inactive] = _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive)
+                weights[~margin_inactive] = deriv_log_dens(full_state)[~margin_inactive]
+                return -opt_linear.T.dot(weights)
+
+            new_grad_log_density = functools.partial(new_grad_log_density,
+                                                     self,
+                                                     limits_marginal,
+                                                     margin_inactive,
+                                                     self.randomization._cdf,
+                                                     self.randomization._pdf,
+                                                     new_opt_transform[0],
+                                                     self.randomization._derivative_log_density)
+
+            def new_log_density(query, 
+                                limits_marginal,
+                                margin_inactive,
+                                _cdf,
+                                _pdf,
+                                opt_linear,
+                                log_dens,
+                                internal_state, 
+                                opt_state):
+
+                full_state = reconstruct_full_from_internal(new_opt_transform,
+                                                            query.score_transform,
+                                                            internal_state,
+                                                            opt_state)
+                full_state = np.atleast_2d(full_state)
+                p = query.penalty.shape[0]
+                logdens = np.zeros(full_state.shape[0])
+
+                if margin_inactive.sum()>0:
+                    full_state_plus = full_state + limits_marginal * margin_inactive
+                    full_state_minus = full_state - limits_marginal * margin_inactive
+                    logdens += np.sum(np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:,margin_inactive], axis=1)
+
+                logdens += log_dens(full_state[:,~margin_inactive])
+
+                return np.squeeze(logdens) # should this be negative to match the gradient log density?
+
+            new_log_density = functools.partial(new_log_density,
+                                                self,
+                                                limits_marginal,
+                                                margin_inactive,
+                                                self.randomization._cdf,
+                                                self.randomization._pdf,
+                                                self.opt_transform[0],
+                                                self.randomization._log_density)
+
+            new_lagrange = self.penalty.weights[moving_inactive]
+            new_dual = rr.weighted_l1norm(new_lagrange, lagrange=1.).conjugate
+
+            def new_projection(dual,
+                               noverall,
+                               opt_state):
+                new_state = opt_state.copy()
+                new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
+                new_state[noverall:] = dual.bound_prox(opt_state[noverall:])
+                return new_state
+
+            new_projection = functools.partial(new_projection,
+                                               new_dual,
+                                               self._overall.sum())
+
+            new_selection_variable = copy(self.selection_variable)
+            new_selection_variable['subgradient'] = self.observed_opt_state[condition_inactive]
+
+            self.sampler = langevin_sampler(observed_opt_state,
+                                            self.observed_internal_state.copy(),
+                                            self.score_transform,
+                                            new_opt_transform,
+                                            new_projection,
+                                            new_grad_log_density,
+                                            new_log_density,
+                                            selection_info=(self, new_selection_variable))
+        else:
+
+            cov, prec = self.randomization.cov_prec
+            cond_precision = new_linear.T.dot(prec.dot(new_linear))
+            score_linear, score_offset = self.score_transform
+
+            cond_cov = np.linalg.inv(cond_precision)
+
+            offset = reconstruct_full_from_internal(new_opt_transform, 
+                                                    self.score_transform, 
+                                                    self.observed_internal_state, 
+                                                    np.zeros(new_linear.shape[1]))
+            cond_mean = cond_cov.dot(new_linear.T.dot(prec.dot(offset)))
+
+            # need a log_density function
+            # the conditional density of opt variables
+            # given the score
+
+            logdens_offset = cond_cov.dot(new_linear.T.dot(prec.dot(score_offset + opt_offset)))
+            logdens_linear = cond_cov.dot(new_linear.T.dot(prec.dot(score_linear)))
+
+            def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
+                mean_term = logdens_linear.dot(score.T).T + logdens_offset
+                diff = opt - mean_term
+                return - 0.5 * np.sum(diff * cond_prec.dot(diff.T).T, 1)
+            log_density = functools.partial(log_density, logdens_offset, logdens_linear, cond_precision)
+
+            # now make the constraints
+
+            # scaling constraints
+
+            # the scalings are first set of opt variables
+            # then unpenalized
+            # then the subgradients
+
+            I = np.identity(cond_cov.shape[0])
+            A_scaling = -I[self.scaling_slice]
+            b_scaling = np.zeros(A_scaling.shape[0])
+
+            A_subgrad = np.vstack([I[self._overall.sum():],
+                                   -I[self._overall.sum():]])
+
+            inactive_lagrange = self.penalty.weights[moving_inactive]
+            b_subgrad = np.hstack([inactive_lagrange,
+                                   inactive_lagrange])
+
+            print(self._overall)
+            print(A_scaling.shape, A_subgrad.shape)
+            print(b_scaling.shape, b_subgrad.shape)
+
+            linear_term = np.vstack([A_scaling, A_subgrad])
+            offset = np.hstack([b_scaling, b_subgrad])
+
+            affine_con = constraints(linear_term,
+                                     offset,
+                                     mean=cond_mean,
+                                     covariance=cond_cov)
+
+            self._sampler = affine_gaussian_sampler(affine_con,
+                                                    observed_opt_state,
+                                                    self.observed_internal_state,
+                                                    log_density,
+                                                    selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
+
+
+class glm_lasso(lasso_view):
+
+    def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
+
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self.selection_variable['variables'],
+                                              beta_full=self._beta_full,
+                                              inactive=~self.selection_variable['variables'])[0]
+
+        return bootstrap_score
+
+class glm_lasso_parametric(lasso_view):
+
+    # this setup_sampler returns only the active set
+
+    def setup_sampler(self):
+
+        return self.selection_variable['variables']
+
+
+class fixedX_lasso(lasso_view):
+
+    def __init__(self, X, Y, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
+
+        loss = glm.gaussian(X, Y)
+        lasso_view.__init__(self,
+                            loss, 
+                            epsilon, 
+                            penalty, 
+                            randomization, 
+                            solve_args=solve_args)
+
+    def setup_sampler(self):
+
+        X, Y = self.loss.data
+
+        bootstrap_score = resid_bootstrap(self.loss,
+                                          self.selection_variable['variables'],
+                                          ~self.selection_variable['variables'])[0]
+        return bootstrap_score
+
+##### The class for users
+
+class lasso(object):
+
+    r"""
+    A class for the LASSO for post-selection inference.
+    The problem solved is
+
+    .. math::
+
+        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
+            \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
+
+    where $\lambda$ is `lam`, $\omega$ is a randomization generated below
+    and the last term is a small ridge penalty.
+
+    """
+
+    def __init__(self, 
+                 loglike, 
+                 feature_weights,
+                 ridge_term,
+                 randomizer_scale,
+                 randomizer='gaussian',
+                 parametric_cov_estimator=False):
+        r"""
+
+        Create a new post-selection object for the LASSO problem
+
+        Parameters
+        ----------
+
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is brodcast to all features.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomization.
+
+        randomizer : str (optional)
+            One of ['laplace', 'logistic', 'gaussian']
+
+
+        """
+
+        self.loglike = loglike
+        self.nfeature = p = self.loglike.shape[0]
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+        self.parametric_cov_estimator = parametric_cov_estimator
+
+        if randomizer == 'laplace':
+            self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
+        elif randomizer == 'gaussian':
+            self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
+        elif randomizer == 'logistic':
+            self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
+
+        self.ridge_term = ridge_term
+
+        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
+
+
+    def fit(self, 
+            solve_args={'tol':1.e-12, 'min_its':50}, 
+            views=[], 
+            nboot=1000):
+        """
+        Fit the randomized lasso using `regreg`.
+
+        Parameters
+        ----------
+
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+
+        views : list
+             Other views of the data, e.g. cross-validation.
+
+        Returns
+        -------
+
+        signs : np.float
+             Support and non-zero signs of randomized lasso solution.
+             
+        """
+
+        p = self.nfeature
+        if self.parametric_cov_estimator==True:
+            self._view = glm_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
+        else:
+            self._view = glm_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
+        self._view.solve(nboot=nboot)
+
+        views = copy(views); views.append(self._view)
+        self._queries = multiple_queries(views)
+        self._queries.solve()
+   
+        self.signs = np.sign(self._view.initial_soln)
+        self.selection_variable = self._view.selection_variable
+        return self.signs
+
+    def decompose_subgradient(self,
+                              condition=None,
+                              marginalize=None):
+        """
+
+        Marginalize over some if inactive part of subgradient
+        if applicable.
+
+        Parameters
+        ----------
+
+        condition : np.bool
+             Which groups' subgradients should we condition on.
+
+        marginalize : np.bool
+             Which groups' subgradients should we marginalize over.
+
+        Returns
+        -------
+
+        None
+
+        """
+
+        if not hasattr(self, "_view"):
+            raise ValueError("fit method should be run first")
+        self._view.decompose_subgradient(condition=condition, 
+                                         marginalize=marginalize)
+
+    def summary(self,
+                selected_features,
+                parameter=None,
+                level=0.9,
+                ndraw=10000, 
+                burnin=2000,
+                compute_intervals=False,
+                bootstrap_sampler=False):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        selected_features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+
+        level : float
+            Confidence level.
+
+        ndraw : int (optional)
+            Defaults to 1000.
+
+        burnin : int (optional)
+            Defaults to 1000.
+
+        bootstrap : bool
+            Use wild bootstrap instead of Gaussian plugin.
+
+        """
+        if not hasattr(self, "_queries"):
+            raise ValueError('run `fit` method before producing summary.')
+
+        if parameter is None:
+            parameter = np.zeros(self.loglike.shape[0])
+
+        unpenalized_mle = restricted_estimator(self.loglike, selected_features)
+
+        if self.parametric_cov_estimator == False:
+            n = self.loglike.data[0].shape[0]
+            form_covariances = glm_nonparametric_bootstrap(n, n)
+            boot_target, boot_target_observed = pairs_bootstrap_glm(self.loglike, selected_features, inactive=None)
+            target_info = boot_target
+        else:
+            target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
+            form_covariances = glm_parametric_covariance(self.loglike)
+
+        opt_samplers = []
+        for q in self._queries.objectives:
+            cov_info = q.setup_sampler()
+            if self.parametric_cov_estimator == False:
+                target_cov, score_cov = form_covariances(target_info,  
+                                                         cross_terms=[cov_info],
+                                                         nsample=q.nboot)
+            else:
+                target_cov, score_cov = form_covariances(target_info,  
+                                                         cross_terms=[cov_info])
+
+            opt_samplers.append(q.sampler)
+
+        opt_samples = [opt_sampler.sample(ndraw,
+                                          burnin) for opt_sampler in opt_samplers]
+
+        print(opt_samplers)
+        ### TODO -- this only uses one view -- what about other queries?
+
+        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter, sample=opt_samples[0])
+        if not np.all(parameter == 0):
+            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=np.zeros_like(parameter), sample=opt_samples[0])
+        else:
+            pvalues = pivots
+
+        intervals = None
+        if compute_intervals:
+            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_samples[0])
+
+        return pivots, pvalues, intervals
+
+    @staticmethod
+    def gaussian(X, 
+                 Y, 
+                 feature_weights, 
+                 sigma=1.,
+                 parametric_cov_estimator=False,
+                 quadratic=None,
+                 ridge_term=None,
+                 randomizer_scale=None,
+                 randomizer='gaussian'):
+        r"""
+        Squared-error LASSO with feature weights.
+
+        Objective function (before randomizer) is 
+        $$
+        \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\lambda$ is `feature_weights`. The ridge term
+        is determined by the Hessian and `np.std(Y)` by default,
+        as is the randomizer scale.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        sigma : float (optional)
+            Noise variance. Set to 1 if `covariance_estimator` is not None.
+            This scales the loglikelihood by `sigma**(-2)`.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+
+        """
+
+        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
+        n, p = X.shape
+
+        mean_diag = np.mean((X**2).sum(0))
+        if ridge_term is None:
+            ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return lasso(loglike, np.asarray(feature_weights) / sigma**2,
+                     ridge_term, randomizer_scale, randomizer=randomizer,
+                     parametric_cov_estimator=parametric_cov_estimator)
+
+    @staticmethod
+    def logistic(X, 
+                 successes, 
+                 feature_weights, 
+                 trials=None,
+                 parametric_cov_estimator=False,
+                 quadratic=None,
+                 ridge_term=None,
+                 randomizer='gaussian',
+                 randomizer_scale=None):
+        r"""
+        Logistic LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell$ is the negative of the logistic 
+        log-likelihood (half the logistic deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y. 
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+
+        """
+        n, p = X.shape
+
+        loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
+
+        mean_diag = np.mean((X**2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = mean_diag / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 
+
+        return lasso(loglike, feature_weights, 
+                     ridge_term, 
+                     randomizer_scale,
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     randomizer=randomizer)
+
+    @staticmethod
+    def coxph(X, 
+              times, 
+              status, 
+              feature_weights,
+              parametric_cov_estimator=False,
+              quadratic=None,
+              ridge_term=None,
+              randomizer='gaussian',
+              randomizer_scale=None):
+        r"""
+        Cox proportional hazards LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell^{\text{Cox}}$ is the 
+        negative of the log of the Cox partial
+        likelihood and $\lambda$ is `feature_weights`.
+
+        Uses Efron's tie breaking method.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        times : ndarray
+            Shape (n,) -- the survival times.
+
+        status : ndarray
+            Shape (n,) -- the censoring status.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+
+        """
+        loglike = coxph_obj(X, times, status, quadratic=quadratic)
+
+        # scale for randomization seems kind of meaningless here...
+
+        mean_diag = np.mean((X**2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return lasso(loglike, 
+                     feature_weights, 
+                     ridge_term,
+                     randomizer_scale, 
+                     randomizer=randomizer,
+                     parametric_cov_estimator=parametric_cov_estimator)
+
+    @staticmethod
+    def poisson(X, 
+                counts, 
+                feature_weights,
+                parametric_cov_estimator=False,
+                quadratic=None,
+                ridge_term=None,
+                randomizer_scale=None,
+                randomizer='gaussian'):
+        r"""
+        Poisson log-linear LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell^{\text{Poisson}}$ is the negative
+        of the log of the Poisson likelihood (half the deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        counts : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+
+        """
+        n, p = X.shape
+        loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
+
+        # scale for randomizer seems kind of meaningless here...
+
+        mean_diag = np.mean((X**2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = np.std(counts)**2 * mean_diag / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
+
+        return lasso(loglike, 
+                     feature_weights, 
+                     ridge_term,
+                     randomizer_scale, 
+                     randomizer=randomizer,
+                     parametric_cov_estimator=parametric_cov_estimator)
+
+    @staticmethod
+    def sqrt_lasso(X, 
+                   Y, 
+                   feature_weights, 
+                   quadratic=None,
+                   parametric_cov_estimator=False,
+                   sigma_estimate='truncated',
+                   solve_args={'min_its':200},
+                   randomizer_scale=None,
+                   randomizer='gaussian'):
+        r"""
+        Use sqrt-LASSO to choose variables.
+
+        Objective function is 
+        $$
+        \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\lambda$ is `feature_weights`. After solving the problem
+        treat as if `gaussian` with implied variance and choice of 
+        multiplier. See arxiv.org/abs/1504.08031 for details.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        covariance : str
+            One of 'parametric' or 'sandwich'. Method
+            used to estimate covariance for inference
+            in second stage.
+
+        sigma_estimate : str
+            One of 'truncated' or 'OLS'. Method
+            used to estimate $\sigma$ when using
+            parametric covariance.
+
+        solve_args : dict
+            Arguments passed to solver.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+        Notes
+        -----
+
+        Unlike other variants of LASSO, this
+        solves the problem on construction as the active
+        set is needed to find equivalent gaussian LASSO.
+
+        Assumes parametric model is correct for inference,
+        i.e. does not accept a covariance estimator.
+
+        """
+
+        raise NotImplementedError
+
+        n, p = X.shape
+
+        # scale for randomization seems kind of meaningless here...
+
+        mean_diag = np.mean((X**2).sum(0))
+        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(p) * feature_weights
+        feature_weights = np.asarray(feature_weights)
+
+        # TODO: refits sqrt lasso more than once -- make an override for avoiding refitting?
+
+        soln = solve_sqrt_lasso(X, Y, weights=feature_weights, quadratic=quadratic, solve_args=solve_args)[0]
+
+        # find active set, and estimate of sigma
+
+        active = (soln != 0)
+        nactive = active.sum()
+
+        if nactive:
+
+            subgrad = np.sign(soln[active]) * feature_weights[active]
+            X_E = X[:,active]
+            X_Ei = np.linalg.pinv(X_E)
+            sigma_E = np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)
+            multiplier = np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))
+
+            # check truncation interval for sigma_E
+
+            # the KKT conditions imply an inequality like
+            # \hat{\sigma}_E \cdot LHS \leq RHS
+
+            penalized = feature_weights[active] != 0
+
+            if penalized.sum():
+                D_E = np.sign(soln[active][penalized]) # diagonal matrix of signs
+                LHS = D_E * np.linalg.solve(X_E.T.dot(X_E), subgrad)[penalized]
+                RHS = D_E * X_Ei.dot(Y)[penalized] 
+
+                ratio = RHS / LHS
+
+                group1 = LHS > 0
+                upper_bound = np.inf
+                if group1.sum():
+                    upper_bound = min(upper_bound, np.min(ratio[group1])) # necessarily these will have RHS > 0
+
+                group2 = (LHS <= 0) * (RHS <= 0) # we can ignore the other possibility since this gives a lower bound of 0
+                lower_bound = 0
+                if group2.sum():
+                    lower_bound = max(lower_bound, np.max(ratio[group2]))
+
+                upper_bound /= multiplier
+                lower_bound /= multiplier
+
+            else:
+                lower_bound = 0
+                upper_bound = np.inf
+
+            _sigma_estimator_args = (sigma_E, 
+                                     n - nactive,
+                                     lower_bound, 
+                                     upper_bound)
+
+            if sigma_estimate == 'truncated':
+                _sigma_hat = estimate_sigma(*_sigma_estimator_args)
+            elif sigma_estimate == 'OLS':
+                _sigma_hat = sigma_E
+            else:
+                raise ValueError('sigma_estimate must be one of ["truncated", "OLS"]')
+        else:
+            _sigma_hat = np.linalg.norm(Y) / np.sqrt(n)
+            multiplier = np.sqrt(n)
+            sigma_E = _sigma_hat
+
+        # XXX how should quadratic be changed?
+        # multiply everything by sigma_E?
+
+        if quadratic is not None:
+            qc = quadratic.collapsed()
+            qc.coef *= np.sqrt(n - nactive) / sigma_E
+            qc.linear_term *= np.sqrt(n - nactive) / sigma_E
+            quadratic = qc
+
+        loglike = rr.glm.gaussian(X, Y, quadratic=quadratic)
+
+        L = lasso(loglike, feature_weights * multiplier * sigma_E,
+                  parametric_cov_estimator=parametric_cov_estimator,
+                  ignore_inactive_constraints=True)
+
+        # these arguments are reused for data carving
+
+        if nactive:
+            L._sigma_hat = _sigma_hat
+            L._sigma_estimator_args = _sigma_estimator_args
+            L._weight_multiplier = multiplier * sigma_E
+            L._multiplier = multiplier
+            L.lasso_solution = soln
+
+        return L
+
+
+def restricted_estimator(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
+    """
+    Fit a restricted model using only columns `active`.
+
+    Parameters
+    ----------
+
+    Mest_loss : objective function
+        A GLM loss.
+
+    active : ndarray
+        Which columns to use.
+
+    solve_args : dict
+        Passed to `solve`.
+
+    Returns
+    -------
+
+    soln : ndarray
+        Solution to restricted problem.
+
+    """
+    X, Y = Mest_loss.data
+
+    if not Mest_loss._is_transform and hasattr(Mest_loss, 'saturated_loss'): # M_est is a glm
+        X_restricted = X[:,active]
+        loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
+    else:
+        I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
+        loss_restricted = rr.affine_smooth(Mest_loss, I_restricted.T)
+    beta_E = loss_restricted.solve(**solve_args)
+    
+    return beta_E
+
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 31a300617..0365f4bc7 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1,3 +1,7 @@
+
+
+
+
 from itertools import product
 import numpy as np
 
@@ -8,6 +12,7 @@
 
 from ..distributions.api import discrete_family
 from ..sampling.langevin import projected_langevin
+from ..constraints.affine import sample_from_constraints
 from .reconstruction import reconstruct_full_from_internal
 
 class query(object):
@@ -129,95 +134,20 @@ def solve(self):
 
 class optimization_sampler(object):
 
-    '''
-    Object to sample only optimization variables of a selective sampler
-    fixing the observed score.
-    '''
-
-    def __init__(self,
-                 observed_opt_state,
-                 observed_internal_state,
-                 score_transform,
-                 opt_transform,
-                 projection,
-                 grad_log_density,
-                 log_density,
-                 selection_info=None):
-
-        '''
-        Parameters
-        ----------
+    def __init__(self):
+        raise NotImplementedError("abstract method")
 
-        multi_view : `multiple_queries`
-           Instance of `multiple_queries`. Attributes
-           `objectives`, `score_info` are key
-           attributed. (Should maybe change constructor
-           to reflect only what is needed.)
-        '''
-
-        self.observed_opt_state = observed_opt_state.copy()
-        self.observed_internal_state = observed_internal_state.copy()
-        self.score_linear, self.score_offset = score_transform
-        self.opt_linear, self.opt_offset = opt_transform
-        self.projection = projection
-        self.gradient = lambda opt: - grad_log_density(self.observed_internal_state, opt)
-        self.log_density = log_density
-        self.selection_info = selection_info # a way to record what view and what was conditioned on -- not used in calculations
-
-    def sample(self, ndraw, burnin, stepsize=None):
-        '''
-        Sample `target` from selective density
-        using projected Langevin sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-
-        Parameters
-        ----------
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        keep_opt : bool
-           Should we return optimization variables
-           as well as the target?
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if self.observed_opt_state.shape in ((), (0,)): # no opt variables to sample:
-            return None
-
-        if stepsize is None:
-            stepsize = 1./max(len(self.observed_opt_state), 1)
-
-        target_langevin = projected_langevin(self.observed_opt_state.copy(),
-                                             self.gradient,
-                                             self.projection,
-                                             stepsize)
-
-        samples = []
-
-        for i in range(ndraw + burnin):
-            target_langevin.next()
-            if (i >= burnin):
-                samples.append(target_langevin.state.copy())
-        return np.asarray(samples)
+    def sample(self):
+        raise NotImplementedError("abstract method")
 
     def hypothesis_test(self,
                         test_stat,
                         observed_value,
                         target_cov,
                         score_cov,
-                        ndraw=10000,
-                        burnin=2000,
-                        stepsize=None,
+                        sample_args=(),
                         sample=None,
-                        parameter=None,
+                        parameter=0,
                         alternative='twosided'):
 
         '''
@@ -225,31 +155,30 @@ def hypothesis_test(self,
         using projected Langevin sampler with
         gradient map `self.gradient` and
         projection map `self.projection`.
+
         Parameters
         ----------
+
         test_stat : callable
            Test statistic to evaluate on sample from
            selective distribution.
+
         observed_value : float
            Observed value of test statistic.
            Used in p-value calculation.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
+
+        sample_args : sequence
+           Arguments to `self.sample` if sample is None.
+
         sample : np.array (optional)
            If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
+           representing a sample of the target from parameters.
            Allows reuse of the same sample for construction of confidence
            intervals, hypothesis tests, etc. If not None,
            `ndraw, burnin, stepsize` are ignored.
+
         parameter : np.float (optional)
-           If not None, defaults to `self.reference`.
-           Otherwise, sample is reweighted using Gaussian tilting.
+
         alternative : ['greater', 'less', 'twosided']
             What alternative to use.
         Returns
@@ -261,14 +190,15 @@ def hypothesis_test(self,
             raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
 
         if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+            sample = self.sample(*sample_args)
 
         if parameter is None:
             parameter = self.reference
 
         sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
 
-        delta = self.target_inv_cov.dot(parameter - self.reference)
+        target_inv_cov = np.linalg.inv(target_cov)
+        delta = target_inv_cov.dot(parameter - self.reference)
         W = np.exp(sample.dot(delta))
 
         family = discrete_family(sample_test_stat, W)
@@ -285,45 +215,46 @@ def confidence_intervals(self,
                              observed_target,
                              target_cov,
                              score_cov,
-                             ndraw=10000,
-                             burnin=2000,
-                             stepsize=None,
+                             sample_args=(),
                              sample=None,
                              level=0.9):
         '''
+
         Parameters
         ----------
+
         observed : np.float
             A vector of parameters with shape `self.shape`,
             representing coordinates of the target.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
+
+        sample_args : sequence
+           Arguments to `self.sample` if sample is None.
+
         sample : np.array (optional)
            If not None, assumed to be a sample of shape (-1,) + `self.shape`
            representing a sample of the target from parameters `self.reference`.
            Allows reuse of the same sample for construction of confidence
            intervals, hypothesis tests, etc.
+
         level : float (optional)
             Specify the
             confidence level.
+
         Notes
         -----
+
         Construct selective confidence intervals
         for each parameter of the target.
+
         Returns
         -------
+
         intervals : [(float, float)]
             List of confidence intervals.
         '''
 
         if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+            sample = self.sample(*sample_args)
         else:
             ndraw = sample.shape[0]
 
@@ -344,38 +275,37 @@ def coefficient_pvalues(self,
                             target_cov,
                             score_cov,
                             parameter=None,
-                            ndraw=10000,
-                            burnin=2000,
-                            stepsize=None,
+                            sample_args=(),
                             sample=None,
                             alternative='twosided'):
         '''
         Construct selective p-values
         for each parameter of the target.
+
         Parameters
         ----------
+
         observed : np.float
             A vector of parameters with shape `self.shape`,
             representing coordinates of the target.
+
         parameter : np.float (optional)
             A vector of parameters with shape `self.shape`
             at which to evaluate p-values. Defaults
             to `np.zeros(self.shape)`.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
+
+        sample_args : sequence
+           Arguments to `self.sample` if sample is None.
+
         sample : np.array (optional)
            If not None, assumed to be a sample of shape (-1,) + `self.shape`
            representing a sample of the target from parameters `self.reference`.
            Allows reuse of the same sample for construction of confidence
            intervals, hypothesis tests, etc.
+
         alternative : ['greater', 'less', 'twosided']
             What alternative to use.
+
         Returns
         -------
         pvalues : np.float
@@ -386,7 +316,7 @@ def coefficient_pvalues(self,
             raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
 
         if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
+            sample = self.sample(*sample_args)
         else:
             ndraw = sample.shape[0]
 
@@ -404,6 +334,89 @@ def coefficient_pvalues(self,
 
         return np.array(pvals)
 
+class langevin_sampler(optimization_sampler):
+
+    '''
+    Object to sample only optimization variables of a selective sampler
+    fixing the observed score.
+    '''
+
+    def __init__(self,
+                 observed_opt_state,
+                 observed_internal_state,
+                 score_transform,
+                 opt_transform,
+                 projection,
+                 grad_log_density,
+                 log_density,
+                 selection_info=None):
+
+        '''
+        Parameters
+        ----------
+
+        multi_view : `multiple_queries`
+           Instance of `multiple_queries`. Attributes
+           `objectives`, `score_info` are key
+           attributed. (Should maybe change constructor
+           to reflect only what is needed.)
+        '''
+
+        self.observed_opt_state = observed_opt_state.copy()
+        self.observed_internal_state = observed_internal_state.copy()
+        self.score_linear, self.score_offset = score_transform
+        self.opt_linear, self.opt_offset = opt_transform
+        self.projection = projection
+        self.gradient = lambda opt: - grad_log_density(self.observed_internal_state, opt)
+        self.log_density = log_density
+        self.selection_info = selection_info # a way to record what view and what was conditioned on -- not used in calculations
+
+    def sample(self, ndraw, burnin, stepsize=None):
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+
+        Parameters
+        ----------
+
+        ndraw : int
+           How long a chain to return?
+
+        burnin : int
+           How many samples to discard?
+
+        stepsize : float
+           Stepsize for Langevin sampler. Defaults
+           to a crude estimate based on the
+           dimension of the problem.
+
+        Returns
+        -------
+
+        gradient : np.float
+        '''
+
+        if self.observed_opt_state.shape in ((), (0,)): # no opt variables to sample:
+            return None
+
+        if stepsize is None:
+            stepsize = 1./max(len(self.observed_opt_state), 1)
+
+        target_langevin = projected_langevin(self.observed_opt_state.copy(),
+                                             self.gradient,
+                                             self.projection,
+                                             stepsize)
+
+        samples = []
+
+        for i in range(ndraw + burnin):
+            target_langevin.next()
+            if (i >= burnin):
+                samples.append(target_langevin.state.copy())
+        return np.asarray(samples)
+
     def crude_lipschitz(self):
         """
         A crude Lipschitz constant for the
@@ -419,6 +432,70 @@ def crude_lipschitz(self):
             lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
         return lipschitz
 
+class affine_gaussian_sampler(optimization_sampler):
+
+    '''
+    Sample from an affine truncated Gaussian
+    '''
+
+    def __init__(self,
+                 affine_con,
+                 initial_point,
+                 observed_internal_state,
+                 log_density,
+                 selection_info=None):
+
+        '''
+        Parameters
+        ----------
+
+        multi_view : `multiple_queries`
+           Instance of `multiple_queries`. Attributes
+           `objectives`, `score_info` are key
+           attributed. (Should maybe change constructor
+           to reflect only what is needed.)
+        '''
+
+        self.affine_con = affine_con
+        self.initial_point = initial_point
+        self.observed_internal_state = observed_internal_state
+        self.selection_info = selection_info
+        self.log_density = log_density
+
+    def sample(self, ndraw, burnin):
+        '''
+        Sample `target` from selective density
+        using projected Langevin sampler with
+        gradient map `self.gradient` and
+        projection map `self.projection`.
+
+        Parameters
+        ----------
+
+        ndraw : int
+           How long a chain to return?
+
+        burnin : int
+           How many samples to discard?
+
+        '''
+
+        return sample_from_constraints(self.affine_con,
+                                       self.initial_point,
+                                       ndraw=ndraw,
+                                       burnin=burnin)
+        # sample_from_constraints
+
+#     def log_density(self, 
+#                     internal_state,
+#                     opt_sample):
+#         """
+#         Conditional density of opt variables for a given value of the internal state.
+#         """
+#         # Hmm.....
+#         return np.random.sample(opt_sample.shape[0])
+
+
 class optimization_intervals(object):
 
     def __init__(self,
diff --git a/selection/randomized/randomization.py b/selection/randomized/randomization.py
index d6b68b6bf..cb51dda02 100644
--- a/selection/randomized/randomization.py
+++ b/selection/randomized/randomization.py
@@ -23,6 +23,7 @@ def __init__(self,
                  log_density=None,
                  CGF=None,  # cumulant generating function and gradient
                  CGF_conjugate=None,  # convex conjugate of CGF and gradient
+                 cov_prec=None      # will have a covariance matrix if Gaussian
                  ):
 
         rr.smooth_atom.__init__(self,
@@ -41,6 +42,8 @@ def __init__(self,
         self._log_density = log_density
         self.CGF = CGF
         self.CGF_conjugate = CGF_conjugate
+        if cov_prec is not None:
+            self.cov_prec = cov_prec
 
     def smooth_objective(self, perturbation, mode='both', check_feasibility=False):
         """
@@ -113,6 +116,7 @@ def isotropic_gaussian(shape, scale):
         CGF_conjugate = isotropic_gaussian_CGF_conjugate(shape, scale)
 
         p = np.product(shape)
+        I = np.identity(p)
         constant = -0.5 * p * np.log(2 * np.pi * scale**2)
         return randomization(shape,
                              density,
@@ -125,6 +129,7 @@ def isotropic_gaussian(shape, scale):
                              log_density = lambda x: -0.5 * (np.atleast_2d(x)**2).sum(1) / scale**2 + constant,
                              CGF=CGF,
                              CGF_conjugate=CGF_conjugate,
+                             cov_prec=(scale**2 * I, I / scale**2)
                              )
 
     @staticmethod
@@ -157,7 +162,8 @@ def gaussian(covariance):
                              grad_negative_log_density,
                              sampler,
                              lipschitz=np.linalg.svd(precision)[1].max(),
-                             log_density = lambda x: -np.sum(sqrt_precision.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const))
+                             log_density = lambda x: -np.sum(sqrt_precision.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const),
+                             cov_prec=(covariance, precision))
 
     @staticmethod
     def laplace(shape, scale):
@@ -240,6 +246,10 @@ def __init__(self, shape, subsample_size, total_size):
         rr.smooth_atom.__init__(self,
                                 shape)
 
+    def get_covariance(self):
+        if hasattr(self, "_covariance"):
+            return self._covariance
+
     def set_covariance(self, covariance):
         """
         Once covariance has been set, then
@@ -247,6 +257,7 @@ def set_covariance(self, covariance):
         """
         self._covariance = covariance
         precision = np.linalg.inv(covariance)
+        self._cov_prec = (covariance, precision)
         sqrt_precision = np.linalg.cholesky(precision).T
         _det = np.linalg.det(covariance)
         p = covariance.shape[0]
@@ -259,6 +270,13 @@ def _log_density(x):
             return -np.sum(sqrt_precision.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const)
         self._log_density = _log_density
 
+    covariance = property(get_covariance, set_covariance)
+
+    @property
+    def cov_prec(self):
+        if hasattr(self, "_covariance"):
+            return self._cov_prec
+
     def smooth_objective(self, perturbation, mode='both', check_feasibility=False):
         if not hasattr(self, "_covariance"):
             raise ValueError('first set the covariance')
diff --git a/selection/randomized/tests/test_lasso.py b/selection/randomized/tests/test_lasso.py
new file mode 100644
index 000000000..e7749c845
--- /dev/null
+++ b/selection/randomized/tests/test_lasso.py
@@ -0,0 +1,75 @@
+from itertools import product
+import numpy as np
+import nose.tools as nt
+
+from ..lasso import lasso
+from ...tests.instance import (gaussian_instance,
+                               logistic_instance,
+                               poisson_instance)
+from ...tests.flags import SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue 
+
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=20)
+def test_lasso_constructors(ndraw=1000, burnin=200):
+    """
+    Smoke tests for lasso convenience constructors
+    """
+    cls = lasso
+    for const_info, rand, marginalize, condition in product(zip([gaussian_instance,
+                                                                 logistic_instance,
+                                                                 poisson_instance],
+                                                                [cls.gaussian,
+                                                                 cls.logistic,
+                                                                 cls.poisson]),
+                                                            ['gaussian', 'logistic', 'laplace'],
+                                                            [False, True],
+                                                            [False, True]):
+
+        print(rand)
+        inst, const = const_info
+        X, Y = inst(n=100, p=20, signal=5, s=10)[:2]
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * 0.2
+        W[0] = 0
+        W[3:] = 50.
+        np.random.shuffle(W)
+        conv = const(X, Y, W, randomizer=rand)
+        nboot = 1000
+        if SMALL_SAMPLES:
+            nboot = 20
+        signs = conv.fit(nboot=nboot)
+
+        marginalize = None
+        if marginalize:
+            marginalize = np.zeros(p, np.bool)
+            marginalize[:int(p/2)] = True
+        
+        condition = None
+        if condition:
+            if marginalize:
+                condition = ~marginalize
+            else:
+                condition = np.ones(p, np.bool)
+            condition[-int(p/4):] = False
+
+        selected_features = np.zeros(p, np.bool)
+        selected_features[:3] = True
+
+        conv.summary(selected_features,
+                     ndraw=ndraw,
+                     burnin=burnin,
+                     compute_intervals=True)
+
+        conv.decompose_subgradient(marginalize=marginalize,
+                                   condition=condition)
+
+        conv.summary(selected_features,
+                     ndraw=ndraw,
+                     burnin=burnin)
+
+        conv.decompose_subgradient(condition=np.ones(p, np.bool))
+
+        conv.summary(selected_features,
+                     ndraw=ndraw,
+                     burnin=burnin)

From e977ea4d2668f96603a40e79328f1d5344692e3b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 8 Feb 2018 07:02:51 -0800
Subject: [PATCH 471/617] renaming restricted_Mest to restricted_estimator, put
 in base module

---
 selection/randomized/M_estimator.py     | 690 ------------------------
 selection/randomized/base.py            |  37 ++
 selection/randomized/convenience.py     |   1 -
 selection/randomized/glm.py             | 100 +---
 selection/randomized/greedy_step.py     |   4 +-
 selection/randomized/group_lasso.py     |  92 +++-
 selection/randomized/lasso.py           |  37 +-
 selection/randomized/threshold_score.py |   4 +-
 8 files changed, 140 insertions(+), 825 deletions(-)
 delete mode 100644 selection/randomized/M_estimator.py
 create mode 100644 selection/randomized/base.py

diff --git a/selection/randomized/M_estimator.py b/selection/randomized/M_estimator.py
deleted file mode 100644
index e45424d31..000000000
--- a/selection/randomized/M_estimator.py
+++ /dev/null
@@ -1,690 +0,0 @@
-from __future__ import print_function
-import functools
-from copy import copy
-
-import numpy as np
-import scipy
-from scipy import matrix
-
-import regreg.api as rr
-import regreg.affine as ra
-
-from .query import query, optimization_sampler
-from .reconstruction import reconstruct_full_from_internal
-from .randomization import split
-
-class M_estimator(query):
-
-    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
-        """
-        Fits the logistic regression to a candidate active set, without penalty.
-        Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
-
-        Computes $\bar{\beta}_E$ which is the restricted 
-        M-estimator (i.e. subject to the constraint $\beta_{-E}=0$).
-
-        Parameters:
-        -----------
-
-        active: np.bool
-            The active set from fitting the logistic lasso
-
-        solve_args: dict
-            Arguments to be passed to regreg solver.
-
-        Returns:
-        --------
-
-        None
-
-        Notes:
-        ------
-
-        Sets self._beta_unpenalized which will be used in the covariance matrix calculation.
-        Also computes Hessian of loss at restricted M-estimator as well as the bootstrap covariance.
-
-        """
-
-        query.__init__(self, randomization)
-
-        (self.loss,
-         self.epsilon,
-         self.penalty,
-         self.randomization,
-         self.solve_args) = (loss,
-                             epsilon,
-                             penalty,
-                             randomization,
-                             solve_args)
-         
-    # Methods needed for subclassing a query
-
-    def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
-
-        self.randomize()
-
-        (loss,
-         randomized_loss,
-         epsilon,
-         penalty,
-         randomization,
-         solve_args) = (self.loss,
-                        self.randomized_loss, 
-                        self.epsilon,
-                        self.penalty,
-                        self.randomization,
-                        self.solve_args)
-
-        # initial solution
-
-        problem = rr.simple_problem(randomized_loss, penalty)
-        self.initial_soln = problem.solve(**solve_args)
-
-        # find the active groups and their direction vectors
-        # as well as unpenalized groups
-
-        groups = np.unique(penalty.groups) 
-        active_groups = np.zeros(len(groups), np.bool)
-        unpenalized_groups = np.zeros(len(groups), np.bool)
-
-        active_directions = []
-        active = np.zeros(loss.shape, np.bool)
-        unpenalized = np.zeros(loss.shape, np.bool)
-
-        initial_scalings = []
-
-        active_directions_list = [] ## added for group lasso
-        active_penalty = []
-        for i, g in enumerate(groups):
-            group = penalty.groups == g
-            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) > 1.e-6 * penalty.weights[g]) and (penalty.weights[g] > 0)
-            unpenalized_groups[i] = (penalty.weights[g] == 0)
-            if active_groups[i]:
-                active[group] = True
-                z = np.zeros(active.shape, np.float)
-                z[group] = self.initial_soln[group] / np.linalg.norm(self.initial_soln[group])
-                active_directions.append(z)
-                active_directions_list.append(z[group]) ## added for group lasso
-                active_penalty.append(penalty.weights[g]) ## added
-                initial_scalings.append(np.linalg.norm(self.initial_soln[group]))
-            if unpenalized_groups[i]:
-                unpenalized[group] = True
-
-        self.active_penalty = active_penalty
-
-        # solve the restricted problem
-
-        self._overall = active + unpenalized > 0
-        self._inactive = ~self._overall
-        self._unpenalized = unpenalized
-
-        self.active_directions_list = active_directions_list ## added for group lasso
-        self._active_directions = np.array(active_directions).T
-        self._active_groups = np.array(active_groups, np.bool)
-        self._unpenalized_groups = np.array(unpenalized_groups, np.bool)
-
-        self.selection_variable = {'groups':self._active_groups, 
-                                   'variables':self._overall,
-                                   'directions':self._active_directions}
-
-        # initial state for opt variables
-
-        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') + 
-                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad')) 
-                          # the quadratic of a smooth_atom is not included in computing the smooth_objective
-        self.initial_subgrad = initial_subgrad
-        initial_subgrad = initial_subgrad[self._inactive]
-        initial_unpenalized = self.initial_soln[self._unpenalized]
-        self.observed_opt_state = np.concatenate([initial_scalings,
-                                                  initial_unpenalized,
-                                                  initial_subgrad], axis=0)
-
-        # set the _solved bit
-
-        self._solved = True
-
-        # Now setup the pieces for linear decomposition
-
-        (loss,
-         epsilon,
-         penalty,
-         initial_soln,
-         overall,
-         inactive,
-         unpenalized,
-         active_groups,
-         active_directions) = (self.loss,
-                               self.epsilon,
-                               self.penalty,
-                               self.initial_soln,
-                               self._overall,
-                               self._inactive,
-                               self._unpenalized,
-                               self._active_groups,
-                               self._active_directions)
-
-        # scaling should be chosen to be Lipschitz constant for gradient of Gaussian part
-
-        # we are implicitly assuming that
-        # loss is a pairs model
-
-        self.scaling = scaling
-        _sqrt_scaling = np.sqrt(self.scaling)
-
-        _beta_unpenalized = restricted_Mest(loss, overall, solve_args=solve_args)
-
-        beta_full = np.zeros(overall.shape)
-        beta_full[overall] = _beta_unpenalized
-        #_hessian = loss.hessian(beta_full)
-        self._beta_full = beta_full
-
-        # observed state for score in internal coordinates
-
-        self.observed_internal_state = np.hstack([_beta_unpenalized * _sqrt_scaling,
-                                                  -loss.smooth_objective(beta_full, 'grad')[inactive] / _sqrt_scaling])
-
-        # form linear part
-        self.num_opt_var = self.observed_opt_state.shape[0]
-        p = loss.shape[0] # shorthand for p
-
-        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
-        # E for active
-        # U for unpenalized
-        # -E for inactive
-
-        _opt_linear_term = np.zeros((p, self._active_groups.sum() + unpenalized.sum() + inactive.sum()))
-        _score_linear_term = np.zeros((p, p))
-
-        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
-
-        Mest_slice = slice(0, overall.sum())
-        X, y = loss.data
-        W = self.loss.saturated_loss.hessian(X.dot(beta_full))
-        _Mest_hessian = np.dot(X.T, X[:, overall] * W[:, None])
-        self._Mest_hessian = _Mest_hessian
-        _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
-
-        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
-
-        null_idx = range(overall.sum(), p)
-        inactive_idx = np.nonzero(inactive)[0]
-        for _i, _n in zip(inactive_idx, null_idx):
-            _score_linear_term[_i,_n] = -_sqrt_scaling
-
-        # c_E piece 
-
-        scaling_slice = slice(0, active_groups.sum())
-        if len(active_directions)==0:
-            _opt_hessian=0
-        else:
-            _opt_hessian = np.dot(_Mest_hessian, active_directions[overall]) + epsilon * active_directions
-        _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling
-
-        self.observed_opt_state[scaling_slice] *= _sqrt_scaling
-
-        # beta_U piece
-
-        unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
-        unpenalized_directions = np.identity(p)[:,unpenalized]
-        if unpenalized.sum():
-            _opt_linear_term[:, unpenalized_slice] = (np.dot(_Mest_hessian, unpenalized_directions[overall])
-                                                      + epsilon * unpenalized_directions) / _sqrt_scaling
-        self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling
-
-        # subgrad piece
-
-        subgrad_idx = range(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
-        subgrad_slice = slice(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
-        for _i, _s in zip(inactive_idx, subgrad_idx):
-            _opt_linear_term[_i,_s] = _sqrt_scaling
-
-        self.observed_opt_state[subgrad_idx] /= _sqrt_scaling
-
-        # form affine part
-
-        _opt_affine_term = np.zeros(p)
-        idx = 0
-        groups = np.unique(penalty.groups) 
-        for i, g in enumerate(groups):
-            if active_groups[i]:
-                group = penalty.groups == g
-                _opt_affine_term[group] = active_directions[:,idx][group] * penalty.weights[g]
-                idx += 1
-
-        # two transforms that encode score and optimization
-        # variable roles 
-
-        # later, we will modify `score_transform`
-        # in `linear_decomposition`
-
-        self.opt_transform = (_opt_linear_term, _opt_affine_term)
-        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
-
-        # now store everything needed for the projections
-        # the projection acts only on the optimization
-        # variables
-
-        self.scaling_slice = scaling_slice
-
-        # weights are scaled here because the linear terms scales them by scaling
-
-        new_groups = penalty.groups[inactive]
-        new_weights = dict([(g, penalty.weights[g] / _sqrt_scaling) for g in penalty.weights.keys() if g in np.unique(new_groups)])
-
-        # we form a dual group lasso object
-        # to do the projection
-
-        self.group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, bound=1.)
-        self.subgrad_slice = subgrad_slice
-
-        self._setup = True
-        self._marginalize_subgradient = False
-        self.scaling_slice = scaling_slice
-        self.unpenalized_slice = unpenalized_slice
-        self.ndim = loss.shape[0]
-
-        self.nboot = nboot
-
-    def get_sampler(self):
-        # setup the default optimization sampler
-
-        if not hasattr(self, "_sampler"):
-
-            def projection(group_lasso_dual, subgrad_slice, scaling_slice, opt_state):
-                """
-                Full projection for Langevin.
-
-                The state here will be only the state of the optimization variables.
-                """
-
-                new_state = opt_state.copy() # not really necessary to copy
-                new_state[scaling_slice] = np.maximum(opt_state[scaling_slice], 0)
-                new_state[subgrad_slice] = group_lasso_dual.bound_prox(opt_state[subgrad_slice])
-                return new_state
-
-            projection = functools.partial(projection, self.group_lasso_dual, self.subgrad_slice, self.scaling_slice)
-
-            def grad_log_density(query,
-                                 opt_linear,
-                                 rand_gradient,
-                                 internal_state,
-                                 opt_state):
-                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
-                return opt_linear.T.dot(rand_gradient(full_state).T)
-
-            grad_log_density = functools.partial(grad_log_density, self, self.opt_transform[0], self.randomization.gradient)
-
-            def log_density(query,
-                            opt_linear,
-                            rand_log_density,
-                            internal_state,
-                            opt_state):
-                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
-                return rand_log_density(full_state)
-
-            log_density = functools.partial(log_density, self, self.opt_transform[0], self.randomization.log_density)
-
-            self._sampler = optimization_sampler(self.observed_opt_state,
-                                                 self.observed_internal_state.copy(),
-                                                 self.score_transform,
-                                                 self.opt_transform,
-                                                 projection,
-                                                 grad_log_density,
-                                                 log_density)
-        return self._sampler
-
-    sampler = property(get_sampler, query.set_sampler)
-
-
-    def decompose_subgradient(self, conditioning_groups=None, marginalizing_groups=None):
-        """
-        ADD DOCSTRING
-
-        conditioning_groups and marginalizing_groups should be disjoint
-        """
-
-        groups = np.unique(self.penalty.groups)
-        condition_inactive_groups = np.zeros_like(groups, dtype=bool)
-
-        if conditioning_groups is None:
-            conditioning_groups = np.zeros_like(groups, dtype=np.bool)
-
-        if marginalizing_groups is None:
-            marginalizing_groups = np.zeros_like(groups, dtype=np.bool)
-
-        if np.any(conditioning_groups * marginalizing_groups):
-            raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
-
-        if not self._setup:
-            raise ValueError('setup_sampler should be called before using this function')
-
-        condition_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
-        moving_inactive_groups = np.zeros_like(groups, dtype=bool)
-        moving_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
-        _inactive_groups = ~(self._active_groups+self._unpenalized)
-
-        inactive_marginal_groups = np.zeros_like(self._inactive, dtype=bool)
-        limits_marginal_groups = np.zeros_like(self._inactive, np.float)
-
-        for i, g in enumerate(groups):
-            if (_inactive_groups[i]) and conditioning_groups[i]:
-                group = self.penalty.groups == g
-                condition_inactive_groups[i] = True
-                condition_inactive_variables[group] = True
-            elif (_inactive_groups[i]) and (~conditioning_groups[i]) and (~marginalizing_groups[i]):
-                group = self.penalty.groups == g
-                moving_inactive_groups[i] = True
-                moving_inactive_variables[group] = True
-            if (_inactive_groups[i]) and marginalizing_groups[i]:
-                group = self.penalty.groups == g
-                inactive_marginal_groups[i] = True
-                limits_marginal_groups[i] = self.penalty.weights[g]
-
-        opt_linear, opt_offset = self.opt_transform
-
-        new_linear = np.zeros((opt_linear.shape[0], (self._active_groups.sum() +
-                                                     self._unpenalized_groups.sum() +
-                                                     moving_inactive_variables.sum())))
-        new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice]
-        new_linear[:, self.unpenalized_slice] = opt_linear[:, self.unpenalized_slice]
-
-        inactive_moving_idx = np.nonzero(moving_inactive_variables)[0]
-        subgrad_idx = range(self._active_groups.sum() + self._unpenalized.sum(),
-                            self._active_groups.sum() + self._unpenalized.sum() +
-                            moving_inactive_variables.sum())
-        subgrad_slice = subgrad_idx
-        for _i, _s in zip(inactive_moving_idx, subgrad_idx):
-            new_linear[_i, _s] = 1.
-
-        observed_opt_state = self.observed_opt_state[:(self._active_groups.sum() +
-                                                       self._unpenalized_groups.sum() +
-                                                       moving_inactive_variables.sum())]
-        observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive_variables]
-
-        condition_linear = np.zeros((opt_linear.shape[0], (self._active_groups.sum() +
-                                                           self._unpenalized_groups.sum() +
-                                                           condition_inactive_variables.sum())))
-        inactive_condition_idx = np.nonzero(condition_inactive_variables)[0]
-        subgrad_condition_idx = range(self._active_groups.sum() + self._unpenalized.sum(),
-                                      self._active_groups.sum() + self._unpenalized.sum() + condition_inactive_variables.sum())
-
-        for _i, _s in zip(inactive_condition_idx, subgrad_condition_idx):
-            condition_linear[_i, _s] = 1.
-
-        new_offset = condition_linear[:,subgrad_condition_idx].dot(self.initial_subgrad[condition_inactive_variables]) + opt_offset
-
-        new_opt_transform = (new_linear, new_offset)
-
-        print("limits marginal groups", limits_marginal_groups)
-        print("inactive marginal groups", inactive_marginal_groups)
-
-        def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, inactive_marginal_groups):
-            return (np.divide(_pdf(full_state_plus) - _pdf(full_state_minus),
-                              _cdf(full_state_plus) - _cdf(full_state_minus)))[inactive_marginal_groups]
-
-        def new_grad_log_density(query, 
-                                 limits_marginal_groups,
-                                 inactive_marginal_groups,
-                                 _cdf,
-                                 _pdf,
-                                 opt_linear,
-                                 deriv_log_dens,
-                                 internal_state, 
-                                 opt_state):
-
-            full_state = reconstruct_full_from_internal(new_opt_transform, query.score_transform, internal_state, opt_state)
-
-            p = query.penalty.shape[0]
-            weights = np.zeros(p)
-
-            if inactive_marginal_groups.sum()>0:
-                full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
-                full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
-                weights[inactive_marginal_groups] = _fraction(_cdf, _pdf, full_state_plus, full_state_minus, inactive_marginal_groups)
-            weights[~inactive_marginal_groups] = deriv_log_dens(full_state)[~inactive_marginal_groups]
-            return -opt_linear.T.dot(weights)
-
-        new_grad_log_density = functools.partial(new_grad_log_density,
-                                                 self,
-                                                 limits_marginal_groups,
-                                                 inactive_marginal_groups,
-                                                 self.randomization._cdf,
-                                                 self.randomization._pdf,
-                                                 new_opt_transform[0],
-                                                 self.randomization._derivative_log_density)
-
-        def new_log_density(query, 
-                            limits_marginal_groups,
-                            inactive_marginal_groups,
-                            _cdf,
-                            _pdf,
-                            opt_linear,
-                            log_dens,
-                            internal_state, 
-                            opt_state):
-
-            full_state = reconstruct_full_from_internal(new_opt_transform,
-                                                        query.score_transform,
-                                                        internal_state,
-                                                        opt_state)
-            full_state = np.atleast_2d(full_state)
-            p = query.penalty.shape[0]
-            logdens = np.zeros(full_state.shape[0])
-
-            if inactive_marginal_groups.sum()>0:
-                full_state_plus = full_state + np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
-                full_state_minus = full_state - np.multiply(limits_marginal_groups, np.array(inactive_marginal_groups, np.float))
-                logdens += np.sum(np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:,inactive_marginal_groups], axis=1)
-
-            logdens += log_dens(full_state[:,~inactive_marginal_groups])
-
-            return np.squeeze(logdens) # should this be negative to match the gradient log density?
-
-        new_log_density = functools.partial(new_log_density,
-                                            self,
-                                            limits_marginal_groups,
-                                            inactive_marginal_groups,
-                                            self.randomization._cdf,
-                                            self.randomization._pdf,
-                                            self.opt_transform[0],
-                                            self.randomization._log_density)
-
-        new_groups = self.penalty.groups[moving_inactive_groups]
-        _sqrt_scaling = np.sqrt(self.scaling)
-        new_weights = dict([(g, self.penalty.weights[g] / _sqrt_scaling) for g in self.penalty.weights.keys() if g in np.unique(new_groups)])
-        new_group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, bound=1.)
-
-        def new_projection(group_lasso_dual,
-                           noverall,
-                           opt_state):
-            new_state = opt_state.copy()
-            new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
-            new_state[noverall:] = group_lasso_dual.bound_prox(opt_state[noverall:])
-            return new_state
-
-        new_projection = functools.partial(new_projection,
-                                           new_group_lasso_dual,
-                                           self._overall.sum())
-                                           
-        new_selection_variable = copy(self.selection_variable)
-        new_selection_variable['subgradient'] = self.observed_opt_state[self.subgrad_slice]
-
-        self.sampler = optimization_sampler(observed_opt_state,
-                                            self.observed_internal_state.copy(),
-                                            self.score_transform,
-                                            new_opt_transform,
-                                            new_projection,
-                                            new_grad_log_density,
-                                            new_log_density,
-                                            selection_info=(self, new_selection_variable))
-
-    def condition_on_scalings(self):
-        """
-        Maybe we should allow subgradients of only some variables...
-        """
-        if not self._setup:
-            raise ValueError('setup_sampler should be called before using this function')
-
-        opt_linear, opt_offset = self.opt_transform
-        
-        new_offset = opt_linear[:,self.scaling_slice].dot(self.observed_opt_state[self.scaling_slice]) + opt_offset
-        new_linear = opt_linear[:,self.subgrad_slice]
-
-        self.opt_transform = (new_linear, new_offset)
-
-        # for group LASSO this will induce a bigger jacobian
-        self.selection_variable['scalings'] = self.observed_opt_state[self.scaling_slice]
-
-        # reset slices 
-
-        self.observed_opt_state = self.observed_opt_state[self.subgrad_slice]
-        self.subgrad_slice = slice(None, None, None)
-        self.scaling_slice = np.zeros(new_linear.shape[1], np.bool)
-        self.num_opt_var = new_linear.shape[1]
-
-#     def grad_log_density(self, internal_state, opt_state):
-#         """
-#             marginalizing over the sub-gradient
-
-#             full_state is 
-#             density should be expressed in terms of opt_state coordinates
-#         """
-
-#         if not self._setup:
-#             raise ValueError('setup_sampler should be called before using this function')
-
-#         if self._marginalize_subgradient:
-
-#             full_state = reconstruct_full_from_internal(self, internal_state, opt_state)
-
-#             p = self.penalty.shape[0]
-#             weights = np.zeros(p)
-
-#             if self.inactive_marginal_groups.sum()>0:
-#                 full_state_plus = full_state + np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
-#                 full_state_minus = full_state - np.multiply(self.limits_marginal_groups, np.array(self.inactive_marginal_groups, np.float))
-
-
-#             def fraction(full_state_plus, full_state_minus, inactive_marginal_groups):
-#                 return (np.divide(self.randomization._pdf(full_state_plus) - self.randomization._pdf(full_state_minus),
-#                        self.randomization._cdf(full_state_plus) - self.randomization._cdf(full_state_minus)))[inactive_marginal_groups]
-
-#             if self.inactive_marginal_groups.sum() > 0:
-#                 weights[self.inactive_marginal_groups] = fraction(full_state_plus, full_state_minus, self.inactive_marginal_groups)
-#             weights[~self.inactive_marginal_groups] = self.randomization._derivative_log_density(full_state)[~self.inactive_marginal_groups]
-
-#             opt_linear = self.opt_transform[0]
-#             return -opt_linear.T.dot(weights)
-#         else:
-#             return query.grad_log_density(self, internal_state, opt_state)
-
-def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
-    """
-    Fit a restricted model using only columns `active`.
-
-    Parameters
-    ----------
-
-    Mest_loss : objective function
-        A GLM loss.
-
-    active : ndarray
-        Which columns to use.
-
-    solve_args : dict
-        Passed to `solve`.
-
-    Returns
-    -------
-
-    soln : ndarray
-        Solution to restricted problem.
-
-    """
-    X, Y = Mest_loss.data
-
-    if not Mest_loss._is_transform and hasattr(Mest_loss, 'saturated_loss'): # M_est is a glm
-        X_restricted = X[:,active]
-        loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
-    else:
-        I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
-        loss_restricted = rr.affine_smooth(Mest_loss, I_restricted.T)
-    beta_E = loss_restricted.solve(**solve_args)
-    
-    return beta_E
-
-class M_estimator_split(M_estimator):
-
-    def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10}):
-
-        total_size = loss.saturated_loss.shape[0]
-        self.randomization = split(loss.shape, subsample_size, total_size)
-
-        M_estimator.__init__(self, loss, epsilon, penalty, self.randomization, solve_args=solve_args)
-
-        total_size = loss.saturated_loss.shape[0]
-        if subsample_size > total_size:
-            raise ValueError('subsample size must be smaller than total sample size')
-
-        self.total_size, self.subsample_size = total_size, subsample_size
-        
-
-class M_estimator_group_lasso(M_estimator):
-
-    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
-
-        M_estimator.__init__(self, loss, epsilon, penalty, randomization, solve_args=solve_args)
-
-        self.Q = self._Mest_hessian[self._overall,:] + epsilon * np.identity(self._overall.sum())
-        self.Qinv = np.linalg.inv(self.Q)
-        self.form_VQLambda()
-
-    def form_VQLambda(self):
-        nactive_groups = len(self.active_directions_list)
-        nactive_vars = sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
-        V = np.zeros((nactive_vars, nactive_vars - nactive_groups))
-
-        Lambda = np.zeros((nactive_vars, nactive_vars))
-        temp_row, temp_col = 0, 0
-        for g in range(len(self.active_directions_list)):
-            size_curr_group = self.active_directions_list[g].shape[0]
-
-            Lambda[temp_row:(temp_row + size_curr_group), temp_row:(temp_row + size_curr_group)] \
-                = self.active_penalty[g] * np.identity(size_curr_group)
-
-            def null(A, eps=1e-12):
-                u, s, vh = np.linalg.svd(A)
-                padding = max(0, np.shape(A)[1] - np.shape(s)[0])
-                null_mask = np.concatenate(((s <= eps), np.ones((padding,), dtype=bool)), axis=0)
-                null_space = scipy.compress(null_mask, vh, axis=0)
-                return scipy.transpose(null_space)
-
-            V_g = null(matrix(self.active_directions_list[g]))
-            V[temp_row:(temp_row + V_g.shape[0]), temp_col:(temp_col + V_g.shape[1])] = V_g
-            temp_row += V_g.shape[0]
-            temp_col += V_g.shape[1]
-        self.VQLambda = np.dot(np.dot(V.T, self.Qinv), Lambda.dot(V))
-
-        return self.VQLambda
-
-    def derivative_logdet_jacobian(self, scalings):
-        nactive_groups = len(self.active_directions_list)
-        nactive_vars = np.sum([self.active_directions_list[i].shape[0] for i in range(nactive_groups)])
-        from scipy.linalg import block_diag
-        matrix_list = [scalings[i] * np.identity(self.active_directions_list[i].shape[0] - 1) for i in
-                       range(scalings.shape[0])]
-        Gamma_minus = block_diag(*matrix_list)
-        jacobian_inv = np.linalg.inv(Gamma_minus + self.VQLambda)
-
-        group_sizes = [self._active_directions[i].shape[0] for i in range(nactive_groups)]
-        group_sizes_cumsum = np.concatenate(([0], np.array(group_sizes).cumsum()))
-
-        jacobian_inv_blocks = [
-            jacobian_inv[group_sizes_cumsum[i]:group_sizes_cumsum[i + 1],
-            group_sizes_cumsum[i]:group_sizes_cumsum[i + 1]]
-            for i in range(nactive_groups)]
-
-        der = np.zeros(self.observed_opt_state.shape[0])
-        der[self.scaling_slice] = np.array([np.matrix.trace(jacobian_inv_blocks[i]) for i in range(scalings.shape[0])])
-        return der
-
diff --git a/selection/randomized/base.py b/selection/randomized/base.py
new file mode 100644
index 000000000..dc6db4230
--- /dev/null
+++ b/selection/randomized/base.py
@@ -0,0 +1,37 @@
+import regreg.api as rr
+import regreg.affine as ra
+
+def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
+    """
+    Fit a restricted model using only columns `active`.
+
+    Parameters
+    ----------
+
+    Mest_loss : objective function
+        A GLM loss.
+
+    active : ndarray
+        Which columns to use.
+
+    solve_args : dict
+        Passed to `solve`.
+
+    Returns
+    -------
+
+    soln : ndarray
+        Solution to restricted problem.
+
+    """
+    X, Y = loss.data
+
+    if not loss._is_transform and hasattr(loss, 'saturated_loss'): # M_est is a glm
+        X_restricted = X[:,active]
+        loss_restricted = rr.affine_smooth(loss.saturated_loss, X_restricted)
+    else:
+        I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
+        loss_restricted = rr.affine_smooth(loss, I_restricted.T)
+    beta_E = loss_restricted.solve(**solve_args)
+    
+    return beta_E
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index cd0ec063b..bdb0897f7 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -16,7 +16,6 @@
                   pairs_bootstrap_glm)
 from .randomization import randomization
 from .query import multiple_queries
-from .M_estimator import restricted_Mest
 
 class step(lasso):
 
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 35b546bf8..fd493c0ee 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -5,7 +5,7 @@
 
 from regreg.api import glm, identity_quadratic
 
-from .M_estimator import restricted_Mest, M_estimator, M_estimator_split
+from .base import restricted_estimator
 from .greedy_step import greedy_score_step
 from .threshold_score import threshold_score
 
@@ -45,7 +45,7 @@ def pairs_bootstrap_glm(glm_loss,
         by sqrt(scaling).
 
     solve_args : dict
-        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        Arguments passed to solver of restricted problem (`restricted_estimator`) if 
         beta_full is None.
 
     Returns
@@ -59,7 +59,7 @@ def pairs_bootstrap_glm(glm_loss,
     X, Y = glm_loss.data
 
     if beta_full is None:
-        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+        beta_active = restricted_estimator(glm_loss, active, solve_args=solve_args)
         beta_full = np.zeros(glm_loss.shape)
         beta_full[active] = beta_active
     else:
@@ -154,7 +154,7 @@ def pairs_inactive_score_glm(glm_loss,
         to ~active.
 
     solve_args : dict
-        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        Arguments passed to solver of restricted problem (`restricted_estimator`) if 
         beta_full is None.
 
     Returns
@@ -211,7 +211,7 @@ def pairs_bootstrap_score(glm_loss,
         Solution to the restricted problem. 
 
     solve_args : dict
-        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        Arguments passed to solver of restricted problem (`restricted_estimator`) if 
         beta_full is None.
 
     Returns
@@ -226,7 +226,7 @@ def pairs_bootstrap_score(glm_loss,
     X, Y = glm_loss.data
 
     if beta_active is None:
-        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+        beta_active = restricted_estimator(glm_loss, active, solve_args=solve_args)
     X_active = X[:,active]
 
     _bootW = np.diag(glm_loss.saturated_loss.hessian(X_active.dot(beta_active)))
@@ -271,7 +271,7 @@ def set_alpha_matrix(glm_loss,
         by sqrt(scaling).
 
     solve_args : dict
-        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        Arguments passed to solver of restricted problem (`restricted_estimator`) if 
         beta_full is None.
 
     Returns
@@ -283,7 +283,7 @@ def set_alpha_matrix(glm_loss,
     X, Y = glm_loss.data
 
     if beta_full is None:
-        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+        beta_active = restricted_estimator(glm_loss, active, solve_args=solve_args)
         beta_full = np.zeros(glm_loss.shape)
         beta_full[active] = beta_active
     else:
@@ -344,7 +344,7 @@ def _parametric_cov_glm(glm_loss,
         Boolean indexing array
 
     solve_args : dict
-        Arguments passed to solver of restricted problem (`restricted_Mest`) if 
+        Arguments passed to solver of restricted problem (`restricted_estimator`) if 
         beta_full is None.
 
     Returns
@@ -358,7 +358,7 @@ def _parametric_cov_glm(glm_loss,
     n, p = X.shape
 
     if beta_full is None:
-        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+        beta_active = restricted_estimator(glm_loss, active, solve_args=solve_args)
         beta_full = np.zeros(glm_loss.shape)
         beta_full[active] = beta_active
     else:
@@ -391,80 +391,6 @@ def _parametric_cov_glm(glm_loss,
     Sigma_full = np.dot(mat, np.dot(_W, mat.T))
     return Sigma_full
 
-#### Subclasses of different randomized views
-
-class glm_group_lasso(M_estimator):
-
-    def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
-
-        bootstrap_score = pairs_bootstrap_glm(self.loss,
-                                              self.selection_variable['variables'],
-                                              beta_full=self._beta_full,
-                                              inactive=~self.selection_variable['variables'])[0]
-
-        return bootstrap_score
-
-class split_glm_group_lasso(M_estimator_split):
-
-    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B=1000):
-
-        # now we need to estimate covariance of
-        # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)
-
-        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0] # shorthand
-        
-        from .glm import pairs_bootstrap_score # need to correct these imports!!!
-
-        bootstrap_score = pairs_bootstrap_score(self.loss,
-                                                self._overall,
-                                                beta_active=self._beta_full[self._overall],
-                                                solve_args=solve_args)
-
-        # find unpenalized MLE on subsample
-
-        newq, oldq = identity_quadratic(0, 0, 0, 0), self.randomized_loss.quadratic
-        self.randomized_loss.quadratic = newq
-        beta_active_subsample = restricted_Mest(self.randomized_loss,
-                                                self._overall)
-
-        bootstrap_score_split = pairs_bootstrap_score(self.loss,
-                                                      self._overall,
-                                                      beta_active=beta_active_subsample,
-                                                      solve_args=solve_args)
-        self.randomized_loss.quadratic = oldq
-
-        inv_frac = n / m
-        
-        def subsample_diff(m, n, indices):
-            subsample = np.random.choice(indices, size=m, replace=False)
-            full_score = bootstrap_score(indices) # a sum of n terms
-            randomized_score = bootstrap_score_split(subsample) # a sum of m terms
-            return full_score - randomized_score * inv_frac
-
-        first_moment = np.zeros(p)
-        second_moment = np.zeros((p, p))
-        
-        _n = np.arange(n)
-        for _ in range(B):
-            indices = np.random.choice(_n, size=n, replace=True)
-            randomized_score = subsample_diff(m, n, indices)
-            first_moment += randomized_score
-            second_moment += np.multiply.outer(randomized_score, randomized_score)
-
-        first_moment /= B
-        second_moment /= B
-
-        cov = second_moment - np.multiply.outer(first_moment,
-                                                first_moment)
-
-        self.randomization.set_covariance(cov)
-
-        bootstrap_score = pairs_bootstrap_glm(self.loss,
-                                              self.selection_variable['variables'],
-                                              beta_full=self._beta_full,
-                                              inactive=~self.selection_variable['variables'])[0]
-
-        return bootstrap_score
 
 class glm_greedy_step(greedy_score_step, glm):
 
@@ -600,7 +526,7 @@ def parametric_cov(glm_loss,
     n, p = X.shape
 
     def _WQ(active):
-        beta_active = restricted_Mest(glm_loss, active, solve_args=solve_args)
+        beta_active = restricted_estimator(glm_loss, active, solve_args=solve_args)
         W = glm_loss.saturated_loss.hessian(X[:,active].dot(beta_active))
         return W
 
@@ -610,7 +536,7 @@ def _WQ(active):
     XW_T = W_T[:, None] * X_T
     Q_T_inv = np.linalg.inv(X_T.T.dot(XW_T))
 
-    beta_T = restricted_Mest(glm_loss, target, solve_args=solve_args)
+    beta_T = restricted_estimator(glm_loss, target, solve_args=solve_args)
     sigma_T = np.sqrt(np.sum((Y-glm_loss.saturated_loss.mean_function(X_T.dot(beta_T)))**2)/(n-np.sum(target)))
 
     covariances = [linear_func.dot(Q_T_inv).dot(linear_funcT)* (sigma_T **2)]
@@ -628,7 +554,7 @@ def _WQ(active):
         null_block = X_IT.dot(XW_T) - X_IT.dot(W_T[:, None] * X_C).dot(Q_C_inv).dot(X[:, cross].T.dot(XW_T))
         null_block = null_block.dot(Q_T_inv)
 
-        beta_C = restricted_Mest(glm_loss, cross, solve_args=solve_args)
+        beta_C = restricted_estimator(glm_loss, cross, solve_args=solve_args)
         sigma_C = np.sqrt(np.sum((Y - glm_loss.saturated_loss.mean_function(X_C.dot(beta_C))) ** 2) / (n - np.sum(cross)))
 
         covariances.append(np.vstack([beta_block, null_block]).dot(linear_funcT).T * sigma_T * sigma_C)
diff --git a/selection/randomized/greedy_step.py b/selection/randomized/greedy_step.py
index 86b3da405..85676e8ce 100644
--- a/selection/randomized/greedy_step.py
+++ b/selection/randomized/greedy_step.py
@@ -3,7 +3,7 @@
 import regreg.api as rr
 
 from .query import query, optimization_sampler
-from .M_estimator import restricted_Mest
+from .base import restricted_estimator
 from .reconstruction import reconstruct_full_from_internal
 
 class greedy_score_step(query):
@@ -69,7 +69,7 @@ def solve(self, nboot=2000):
                          self.beta_active)
 
         if beta_active is None:
-            beta_active = self.beta_active = restricted_Mest(self.loss, active, solve_args=solve_args)
+            beta_active = self.beta_active = restricted_estimator(self.loss, active, solve_args=solve_args)
             
         beta_full = np.zeros(loss.shape)
         beta_full[active] = beta_active
diff --git a/selection/randomized/group_lasso.py b/selection/randomized/group_lasso.py
index e45424d31..3c8573d4f 100644
--- a/selection/randomized/group_lasso.py
+++ b/selection/randomized/group_lasso.py
@@ -13,7 +13,7 @@
 from .reconstruction import reconstruct_full_from_internal
 from .randomization import split
 
-class M_estimator(query):
+class group_lasso_view(query):
 
     def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
         """
@@ -200,9 +200,10 @@ def solve(self, scaling=1, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         Mest_slice = slice(0, overall.sum())
         X, y = loss.data
         W = self.loss.saturated_loss.hessian(X.dot(beta_full))
-        _Mest_hessian = np.dot(X.T, X[:, overall] * W[:, None])
-        self._Mest_hessian = _Mest_hessian
-        _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling
+        _Mest_hessian_active = np.dot(X.T, X[:, active] * W[:, None])
+        _Mest_hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
+
+        _score_linear_term[:, Mest_slice] = -np.hstack([_Mest_hessian_active, _Mest_hessian_unpen]) / _sqrt_scaling
 
         # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
 
@@ -613,14 +614,14 @@ def restricted_Mest(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
     
     return beta_E
 
-class M_estimator_split(M_estimator):
+class group_lasso_split(group_lasso_view):
 
     def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its':50, 'tol':1.e-10}):
 
         total_size = loss.saturated_loss.shape[0]
         self.randomization = split(loss.shape, subsample_size, total_size)
 
-        M_estimator.__init__(self, loss, epsilon, penalty, self.randomization, solve_args=solve_args)
+        group_lasso.__init__(self, loss, epsilon, penalty, self.randomization, solve_args=solve_args)
 
         total_size = loss.saturated_loss.shape[0]
         if subsample_size > total_size:
@@ -629,11 +630,11 @@ def __init__(self, loss, epsilon, subsample_size, penalty, solve_args={'min_its'
         self.total_size, self.subsample_size = total_size, subsample_size
         
 
-class M_estimator_group_lasso(M_estimator):
+class group_lasso_group_lasso(group_lasso_view):
 
     def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
 
-        M_estimator.__init__(self, loss, epsilon, penalty, randomization, solve_args=solve_args)
+        group_lasso.__init__(self, loss, epsilon, penalty, randomization, solve_args=solve_args)
 
         self.Q = self._Mest_hessian[self._overall,:] + epsilon * np.identity(self._overall.sum())
         self.Qinv = np.linalg.inv(self.Q)
@@ -688,3 +689,78 @@ def derivative_logdet_jacobian(self, scalings):
         der[self.scaling_slice] = np.array([np.matrix.trace(jacobian_inv_blocks[i]) for i in range(scalings.shape[0])])
         return der
 
+
+#### Subclasses of different randomized views
+
+class glm_group_lasso(group_lasso_view):
+
+    def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
+
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self.selection_variable['variables'],
+                                              beta_full=self._beta_full,
+                                              inactive=~self.selection_variable['variables'])[0]
+
+        return bootstrap_score
+
+class split_glm_group_lasso(group_lasso_split):
+
+    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}, B=1000):
+
+        # now we need to estimate covariance of
+        # loss.grad(\beta_E^*) - 1/pi * randomized_loss.grad(\beta_E^*)
+
+        m, n, p = self.subsample_size, self.total_size, self.loss.shape[0] # shorthand
+        
+        from .glm import pairs_bootstrap_score # need to correct these imports!!!
+
+        bootstrap_score = pairs_bootstrap_score(self.loss,
+                                                self._overall,
+                                                beta_active=self._beta_full[self._overall],
+                                                solve_args=solve_args)
+
+        # find unpenalized MLE on subsample
+
+        newq, oldq = identity_quadratic(0, 0, 0, 0), self.randomized_loss.quadratic
+        self.randomized_loss.quadratic = newq
+        beta_active_subsample = restricted_Mest(self.randomized_loss,
+                                                self._overall)
+
+        bootstrap_score_split = pairs_bootstrap_score(self.loss,
+                                                      self._overall,
+                                                      beta_active=beta_active_subsample,
+                                                      solve_args=solve_args)
+        self.randomized_loss.quadratic = oldq
+
+        inv_frac = n / m
+        
+        def subsample_diff(m, n, indices):
+            subsample = np.random.choice(indices, size=m, replace=False)
+            full_score = bootstrap_score(indices) # a sum of n terms
+            randomized_score = bootstrap_score_split(subsample) # a sum of m terms
+            return full_score - randomized_score * inv_frac
+
+        first_moment = np.zeros(p)
+        second_moment = np.zeros((p, p))
+        
+        _n = np.arange(n)
+        for _ in range(B):
+            indices = np.random.choice(_n, size=n, replace=True)
+            randomized_score = subsample_diff(m, n, indices)
+            first_moment += randomized_score
+            second_moment += np.multiply.outer(randomized_score, randomized_score)
+
+        first_moment /= B
+        second_moment /= B
+
+        cov = second_moment - np.multiply.outer(first_moment,
+                                                first_moment)
+
+        self.randomization.set_covariance(cov)
+
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self.selection_variable['variables'],
+                                              beta_full=self._beta_full,
+                                              inactive=~self.selection_variable['variables'])[0]
+
+        return bootstrap_score
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index b30234fe5..7801f8a4e 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -18,6 +18,7 @@
 
 from .reconstruction import reconstruct_full_from_internal
 from .randomization import split, randomization
+from .base import restricted_estimator
 from .glm import (pairs_bootstrap_glm,
                   glm_nonparametric_bootstrap)
 
@@ -186,7 +187,7 @@ def solve(self, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         W = self.loss.saturated_loss.hessian(X.dot(beta_bar))
         _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
         _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
-        #self._hessian = _hessian
+
         _score_linear_term[:, est_slice] = -np.hstack([_hessian_active, _hessian_unpen])
 
         # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
@@ -1382,38 +1383,4 @@ def sqrt_lasso(X,
         return L
 
 
-def restricted_estimator(Mest_loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
-    """
-    Fit a restricted model using only columns `active`.
-
-    Parameters
-    ----------
-
-    Mest_loss : objective function
-        A GLM loss.
-
-    active : ndarray
-        Which columns to use.
-
-    solve_args : dict
-        Passed to `solve`.
-
-    Returns
-    -------
-
-    soln : ndarray
-        Solution to restricted problem.
-
-    """
-    X, Y = Mest_loss.data
-
-    if not Mest_loss._is_transform and hasattr(Mest_loss, 'saturated_loss'): # M_est is a glm
-        X_restricted = X[:,active]
-        loss_restricted = rr.affine_smooth(Mest_loss.saturated_loss, X_restricted)
-    else:
-        I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
-        loss_restricted = rr.affine_smooth(Mest_loss, I_restricted.T)
-    beta_E = loss_restricted.solve(**solve_args)
-    
-    return beta_E
 
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index 3c35ca3c9..f8ed0bda3 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -5,7 +5,7 @@
 
 from .query import query, optimization_sampler
 from .reconstruction import reconstruct_full_from_internal, reconstruct_score
-from .M_estimator import restricted_Mest
+from .base import restricted_estimator
 
 class threshold_score(query):
 
@@ -98,7 +98,7 @@ def solve(self, nboot=2000):
         self._marginalize_subgradient = True # need to find a better place to set this...
 
         if beta_active is None:
-            beta_active = self.beta_active = restricted_Mest(self.loss, active, solve_args=self.solve_args)
+            beta_active = self.beta_active = restricted_estimator(self.loss, active, solve_args=self.solve_args)
 
         self.randomize()
 

From 594be7386502629c0b4871b5d92dabdcf7381cba Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 9 Feb 2018 08:07:54 -0800
Subject: [PATCH 472/617] incorrect mean specification for affine gaussian
 sampler

---
 selection/randomized/glm.py   | 16 +++++++++++++---
 selection/randomized/lasso.py | 24 +++++++++++-------------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index fd493c0ee..d84a814bc 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -510,6 +510,7 @@ def _boot_score(Y_star):
 def parametric_cov(glm_loss, 
                    target_with_linear_func, 
                    cross_terms=(),
+                   dispersion=None,
                    solve_args={'min_its':50, 'tol':1.e-10}):
 
     # cross_terms are different active sets
@@ -532,14 +533,20 @@ def _WQ(active):
 
     # weights and Q at the target
     W_T = _WQ(target)
+
     X_T = X[:,target]
     XW_T = W_T[:, None] * X_T
     Q_T_inv = np.linalg.inv(X_T.T.dot(XW_T))
 
     beta_T = restricted_estimator(glm_loss, target, solve_args=solve_args)
-    sigma_T = np.sqrt(np.sum((Y-glm_loss.saturated_loss.mean_function(X_T.dot(beta_T)))**2)/(n-np.sum(target)))
 
-    covariances = [linear_func.dot(Q_T_inv).dot(linear_funcT)* (sigma_T **2)]
+    # this is Pearson's X^2 dispersion estimator
+    if dispersion is None:
+        sigma_T = np.sqrt(np.sum((Y-glm_loss.saturated_loss.mean_function(X_T.dot(beta_T)))**2)/(n-np.sum(target)))
+    else:
+        sigma_T = dispersion
+
+    covariances = [linear_func.dot(Q_T_inv).dot(linear_funcT) * (sigma_T **2)]
 
     for cross in cross_terms:
         # the covariances are for (\bar{\beta}_{C}, N_C) -- C for cross
@@ -555,7 +562,10 @@ def _WQ(active):
         null_block = null_block.dot(Q_T_inv)
 
         beta_C = restricted_estimator(glm_loss, cross, solve_args=solve_args)
-        sigma_C = np.sqrt(np.sum((Y - glm_loss.saturated_loss.mean_function(X_C.dot(beta_C))) ** 2) / (n - np.sum(cross)))
+        if dispersion is None:
+            sigma_C = np.sqrt(np.sum((Y - glm_loss.saturated_loss.mean_function(X_C.dot(beta_C))) ** 2) / (n - np.sum(cross)))
+        else:
+            sigma_C = dispersion
 
         covariances.append(np.vstack([beta_block, null_block]).dot(linear_funcT).T * sigma_T * sigma_C)
 
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 7801f8a4e..703a51828 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -20,7 +20,8 @@
 from .randomization import split, randomization
 from .base import restricted_estimator
 from .glm import (pairs_bootstrap_glm,
-                  glm_nonparametric_bootstrap)
+                  glm_nonparametric_bootstrap,
+                  glm_parametric_covariance)
 
 class lasso_view(query):
 
@@ -330,7 +331,7 @@ def log_density(query,
                                                         self.score_transform, 
                                                         self.observed_internal_state, 
                                                         np.zeros(opt_linear.shape[1]))
-                cond_mean = cond_cov.dot(opt_linear.T.dot(prec.dot(offset)))
+                cond_mean = -cond_cov.dot(opt_linear.T.dot(prec.dot(offset)))
 
                 # need a log_density function
                 # the conditional density of opt variables
@@ -341,8 +342,8 @@ def log_density(query,
 
                 def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
                     mean_term = logdens_linear.dot(score.T).T + logdens_offset
-                    diff = opt - mean_term
-                    return - 0.5 * np.sum(diff * cond_prec.dot(diff.T).T, 1)
+                    arg = opt + mean_term
+                    return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
                 log_density = functools.partial(log_density, logdens_offset, logdens_linear, cond_precision)
 
                 # now make the constraints
@@ -557,7 +558,7 @@ def new_projection(dual,
                                                     self.score_transform, 
                                                     self.observed_internal_state, 
                                                     np.zeros(new_linear.shape[1]))
-            cond_mean = cond_cov.dot(new_linear.T.dot(prec.dot(offset)))
+            cond_mean = -cond_cov.dot(new_linear.T.dot(prec.dot(offset)))
 
             # need a log_density function
             # the conditional density of opt variables
@@ -568,8 +569,8 @@ def new_projection(dual,
 
             def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
                 mean_term = logdens_linear.dot(score.T).T + logdens_offset
-                diff = opt - mean_term
-                return - 0.5 * np.sum(diff * cond_prec.dot(diff.T).T, 1)
+                arg = opt + mean_term
+                return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
             log_density = functools.partial(log_density, logdens_offset, logdens_linear, cond_precision)
 
             # now make the constraints
@@ -591,10 +592,6 @@ def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
             b_subgrad = np.hstack([inactive_lagrange,
                                    inactive_lagrange])
 
-            print(self._overall)
-            print(A_scaling.shape, A_subgrad.shape)
-            print(b_scaling.shape, b_subgrad.shape)
-
             linear_term = np.vstack([A_scaling, A_subgrad])
             offset = np.hstack([b_scaling, b_subgrad])
 
@@ -832,6 +829,9 @@ def summary(self,
         if parameter is None:
             parameter = np.zeros(self.loglike.shape[0])
 
+        if np.asarray(selected_features).dtype != np.bool:
+            raise ValueError('selected_features should be a boolean array')
+
         unpenalized_mle = restricted_estimator(self.loglike, selected_features)
 
         if self.parametric_cov_estimator == False:
@@ -853,13 +853,11 @@ def summary(self,
             else:
                 target_cov, score_cov = form_covariances(target_info,  
                                                          cross_terms=[cov_info])
-
             opt_samplers.append(q.sampler)
 
         opt_samples = [opt_sampler.sample(ndraw,
                                           burnin) for opt_sampler in opt_samplers]
 
-        print(opt_samplers)
         ### TODO -- this only uses one view -- what about other queries?
 
         pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter, sample=opt_samples[0])

From adf8b81041539d2699f76ee842b76be839ff4df1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 9 Feb 2018 08:08:27 -0800
Subject: [PATCH 473/617] a few more steps for langevin

---
 selection/sampling/langevin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/sampling/langevin.py b/selection/sampling/langevin.py
index a5281652c..2f787d027 100644
--- a/selection/sampling/langevin.py
+++ b/selection/sampling/langevin.py
@@ -40,7 +40,7 @@ def next(self):
                 nattempt += 1
                 self._sqrt_step *= 0.8
                 self.stepsize = self._sqrt_step**2
-                if nattempt >= 10:
+                if nattempt >= 30:
                     raise ValueError('unable to find feasible step')
             else:
                 self.state[:] = candidate

From 61bbe97f55f244680237a672e5b756c2e4d04b77 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Feb 2018 16:49:30 -0800
Subject: [PATCH 474/617] results llook close to R sampler -- not great still

---
 selection/randomized/api.py                   |   8 +-
 selection/randomized/glm.py                   |   7 +-
 selection/randomized/lasso.py                 |  70 +++----
 selection/randomized/query.py                 |  19 +-
 selection/randomized/randomization.py         |   9 +-
 selection/randomized/tests/test_lasso_pval.py | 190 ++++++++++++++++++
 6 files changed, 240 insertions(+), 63 deletions(-)
 create mode 100644 selection/randomized/tests/test_lasso_pval.py

diff --git a/selection/randomized/api.py b/selection/randomized/api.py
index d9aaa8d8b..157402121 100644
--- a/selection/randomized/api.py
+++ b/selection/randomized/api.py
@@ -1,14 +1,10 @@
 from .query import multiple_queries, query
 
-from .glm import (glm_group_lasso, split_glm_group_lasso,
-                  glm_group_lasso_parametric,
-                  glm_greedy_step, 
-                  glm_threshold_score,
-                  pairs_bootstrap_glm, 
+from .glm import (pairs_bootstrap_glm, 
                   pairs_inactive_score_glm,
                   glm_nonparametric_bootstrap,
                   glm_parametric_covariance)
 
 from .randomization import randomization
 
-from .convenience import lasso
+from .lasso import lasso
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index d84a814bc..48ecf9c53 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -542,11 +542,11 @@ def _WQ(active):
 
     # this is Pearson's X^2 dispersion estimator
     if dispersion is None:
-        sigma_T = np.sqrt(np.sum((Y-glm_loss.saturated_loss.mean_function(X_T.dot(beta_T)))**2)/(n-np.sum(target)))
+        sigma_T = np.sqrt(np.sum((Y-glm_loss.saturated_loss.mean_function(X_T.dot(beta_T)))**2 / W_T)/(n-np.sum(target)))
     else:
         sigma_T = dispersion
 
-    covariances = [linear_func.dot(Q_T_inv).dot(linear_funcT) * (sigma_T **2)]
+    covariances = [linear_func.dot(Q_T_inv).dot(linear_funcT) * (sigma_T**2)]
 
     for cross in cross_terms:
         # the covariances are for (\bar{\beta}_{C}, N_C) -- C for cross
@@ -563,7 +563,8 @@ def _WQ(active):
 
         beta_C = restricted_estimator(glm_loss, cross, solve_args=solve_args)
         if dispersion is None:
-            sigma_C = np.sqrt(np.sum((Y - glm_loss.saturated_loss.mean_function(X_C.dot(beta_C))) ** 2) / (n - np.sum(cross)))
+            sigma_C = sigma_T # Hmm... not sure here 
+            # sigma_C = np.sqrt(np.sum((Y - glm_loss.saturated_loss.mean_function(X_C.dot(beta_C)) / W_C) ** 2) / (n - np.sum(cross)))
         else:
             sigma_C = dispersion
 
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 703a51828..b701eb7dc 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -25,7 +25,13 @@
 
 class lasso_view(query):
 
-    def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
+    def __init__(self, 
+                 loss, 
+                 epsilon, 
+                 penalty, 
+                 randomization, 
+                 perturb=None,
+                 solve_args={'min_its':50, 'tol':1.e-10}):
         """
         Fits the logistic regression to a candidate active set, without penalty.
         Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
@@ -69,9 +75,10 @@ def __init__(self, loss, epsilon, penalty, randomization, solve_args={'min_its':
          
     # Methods needed for subclassing a query
 
-    def solve(self, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
+    def solve(self, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000,
+              perturb=None):
 
-        self.randomize()
+        self.randomize(perturb=perturb)
 
         (loss,
          randomized_loss,
@@ -128,12 +135,11 @@ def solve(self, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000):
         self.initial_subgrad = initial_subgrad
 
         initial_scalings = np.fabs(self.initial_soln[active])
-        initial_subgrad = initial_subgrad[self._inactive]
         initial_unpenalized = self.initial_soln[self._unpenalized]
 
         self.observed_opt_state = np.concatenate([initial_scalings,
                                                   initial_unpenalized,
-                                                  initial_subgrad], axis=0)
+                                                  self.initial_subgrad[self._inactive]], axis=0)
 
         # set the _solved bit
 
@@ -233,11 +239,7 @@ def signed_basis_vector(p, j, s):
 
         _opt_affine_term = np.zeros(p)
         idx = 0
-        if np.asarray(penalty.lagrange).shape in [(), (1,)]:
-            _opt_affine_term[active] = active_signs[active] * penalty.lagrange
-            
-        else:
-            _opt_affine_term[active] = active_signs[active] * penalty.lagrange[active]
+        _opt_affine_term[active] = active_signs[active] * self._lagrange[active]
 
         # two transforms that encode score and optimization
         # variable roles 
@@ -367,6 +369,7 @@ def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
                                          mean=cond_mean,
                                          covariance=cond_cov)
 
+                logdens_transform = (logdens_linear, logdens_offset)
                 self._sampler = affine_gaussian_sampler(affine_con,
                                                         self.observed_opt_state,
                                                         self.observed_internal_state,
@@ -424,7 +427,6 @@ def decompose_subgradient(self, condition=None, marginalize=None):
         subgrad_idx = range(self._active.sum() + self._unpenalized.sum(),
                             self._active.sum() + self._unpenalized.sum() +
                             moving_inactive.sum())
-        subgrad_slice = subgrad_idx
         for _i, _s in zip(inactive_moving_idx, subgrad_idx):
             new_linear[_i, _s] = 1.
 
@@ -436,15 +438,9 @@ def decompose_subgradient(self, condition=None, marginalize=None):
         condition_linear = np.zeros((opt_linear.shape[0], (self._active.sum() +
                                                            self._unpenalized.sum() +
                                                            condition_inactive.sum())))
-        inactive_condition_idx = np.nonzero(condition_inactive)[0]
-        subgrad_condition_idx = range(self._active.sum() + self._unpenalized.sum(),
-                                      self._active.sum() + self._unpenalized.sum() + condition_inactive.sum())
-
-        for _i, _s in zip(inactive_condition_idx, subgrad_condition_idx):
-            condition_linear[_i, _s] = 1.
-
-        new_offset = condition_linear[:,subgrad_condition_idx].dot(self.initial_subgrad[condition_inactive]) + opt_offset
 
+        new_offset = opt_offset + 0.
+        new_offset[condition_inactive] += self.initial_subgrad[condition_inactive]
         new_opt_transform = (new_linear, new_offset)
 
         if not hasattr(self.randomization, "cov_prec") or marginalize.sum(): # use Langevin -- not gaussian
@@ -564,7 +560,8 @@ def new_projection(dual,
             # the conditional density of opt variables
             # given the score
 
-            logdens_offset = cond_cov.dot(new_linear.T.dot(prec.dot(score_offset + opt_offset)))
+            logdens_offset = cond_cov.dot(new_linear.T.dot(prec.dot(
+                                                           score_offset + opt_offset)))
             logdens_linear = cond_cov.dot(new_linear.T.dot(prec.dot(score_linear)))
 
             def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
@@ -600,10 +597,12 @@ def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
                                      mean=cond_mean,
                                      covariance=cond_cov)
 
+            logdens_transform = (logdens_linear, logdens_offset)
             self._sampler = affine_gaussian_sampler(affine_con,
                                                     observed_opt_state,
                                                     self.observed_internal_state,
                                                     log_density,
+                                                    logdens_transform,
                                                     selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
 
 
@@ -722,7 +721,7 @@ def __init__(self,
 
     def fit(self, 
             solve_args={'tol':1.e-12, 'min_its':50}, 
-            views=[], 
+            perturb=None,
             nboot=1000):
         """
         Fit the randomized lasso using `regreg`.
@@ -733,9 +732,6 @@ def fit(self,
         solve_args : keyword args
              Passed to `regreg.problems.simple_problem.solve`.
 
-        views : list
-             Other views of the data, e.g. cross-validation.
-
         Returns
         -------
 
@@ -749,12 +745,8 @@ def fit(self,
             self._view = glm_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
         else:
             self._view = glm_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
-        self._view.solve(nboot=nboot)
+        self._view.solve(nboot=nboot, perturb=perturb)
 
-        views = copy(views); views.append(self._view)
-        self._queries = multiple_queries(views)
-        self._queries.solve()
-   
         self.signs = np.sign(self._view.initial_soln)
         self.selection_variable = self._view.selection_variable
         return self.signs
@@ -795,7 +787,8 @@ def summary(self,
                 ndraw=10000, 
                 burnin=2000,
                 compute_intervals=False,
-                bootstrap_sampler=False):
+                bootstrap_sampler=False,
+                subset=None):
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
@@ -823,7 +816,7 @@ def summary(self,
             Use wild bootstrap instead of Gaussian plugin.
 
         """
-        if not hasattr(self, "_queries"):
+        if not hasattr(self, "_view"):
             raise ValueError('run `fit` method before producing summary.')
 
         if parameter is None:
@@ -844,7 +837,7 @@ def summary(self,
             form_covariances = glm_parametric_covariance(self.loglike)
 
         opt_samplers = []
-        for q in self._queries.objectives:
+        for q in [self._view]:
             cov_info = q.setup_sampler()
             if self.parametric_cov_estimator == False:
                 target_cov, score_cov = form_covariances(target_info,  
@@ -858,7 +851,10 @@ def summary(self,
         opt_samples = [opt_sampler.sample(ndraw,
                                           burnin) for opt_sampler in opt_samplers]
 
-        ### TODO -- this only uses one view -- what about other queries?
+        if subset is not None:
+            target_cov = target_cov[subset][:,subset]
+            score_cov = score_cov[subset]
+            unpenalized_mle = unpenalized_mle[subset]
 
         pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter, sample=opt_samples[0])
         if not np.all(parameter == 0):
@@ -940,7 +936,7 @@ def gaussian(X,
 
         mean_diag = np.mean((X**2).sum(0))
         if ridge_term is None:
-            ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
@@ -1020,7 +1016,7 @@ def logistic(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = mean_diag / np.sqrt(n)
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 
@@ -1105,7 +1101,7 @@ def coxph(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
+            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
@@ -1183,7 +1179,7 @@ def poisson(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(counts)**2 * mean_diag / np.sqrt(n)
+            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 0365f4bc7..577c704bc 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -17,19 +17,20 @@
 
 class query(object):
 
-    def __init__(self, randomization):
+    def __init__(self, randomization, perturb=None):
 
         self.randomization = randomization
+        self.perturb = perturb
         self._solved = False
         self._randomized = False
         self._setup = False
 
     # Methods reused by subclasses
 
-    def randomize(self):
+    def randomize(self, perturb=None):
 
         if not self._randomized:
-            self.randomized_loss, self._initial_omega = self.randomization.randomize(self.loss, self.epsilon)
+            self.randomized_loss, self._initial_omega = self.randomization.randomize(self.loss, self.epsilon, perturb=perturb)
         self._randomized = True
 
     def linear_decomposition(self, target_score_cov, target_cov, observed_target_state):
@@ -443,6 +444,7 @@ def __init__(self,
                  initial_point,
                  observed_internal_state,
                  log_density,
+                 logdens_transform,
                  selection_info=None):
 
         '''
@@ -461,6 +463,7 @@ def __init__(self,
         self.observed_internal_state = observed_internal_state
         self.selection_info = selection_info
         self.log_density = log_density
+        self.logdens_transform = logdens_transform
 
     def sample(self, ndraw, burnin):
         '''
@@ -484,16 +487,6 @@ def sample(self, ndraw, burnin):
                                        self.initial_point,
                                        ndraw=ndraw,
                                        burnin=burnin)
-        # sample_from_constraints
-
-#     def log_density(self, 
-#                     internal_state,
-#                     opt_sample):
-#         """
-#         Conditional density of opt variables for a given value of the internal state.
-#         """
-#         # Hmm.....
-#         return np.random.sample(opt_sample.shape[0])
 
 
 class optimization_intervals(object):
diff --git a/selection/randomized/randomization.py b/selection/randomized/randomization.py
index cb51dda02..c6cb1b250 100644
--- a/selection/randomized/randomization.py
+++ b/selection/randomized/randomization.py
@@ -85,14 +85,15 @@ def log_density(self, perturbation):
         """
         return np.squeeze(self._log_density(perturbation))
 
-    def randomize(self, loss, epsilon=0):
+    def randomize(self, loss, epsilon=0, perturb=None):
         """
         Randomize the loss.
         """
         randomized_loss = rr.smooth_sum([loss])
-        _randomZ = self.sample()
-        randomized_loss.quadratic = rr.identity_quadratic(epsilon, 0, -_randomZ, 0)
-        return randomized_loss, _randomZ
+        if perturb is None:
+            perturb = self.sample()
+        randomized_loss.quadratic = rr.identity_quadratic(epsilon, 0, -perturb, 0)
+        return randomized_loss, perturb
 
     @staticmethod
     def isotropic_gaussian(shape, scale):
diff --git a/selection/randomized/tests/test_lasso_pval.py b/selection/randomized/tests/test_lasso_pval.py
new file mode 100644
index 000000000..8165e9b53
--- /dev/null
+++ b/selection/randomized/tests/test_lasso_pval.py
@@ -0,0 +1,190 @@
+import numpy as np
+import nose.tools as nt
+import rpy2.robjects as rpy
+from rpy2.robjects import numpy2ri
+rpy.r('library(selectiveInference)')
+
+from selection.randomized.lasso import lasso
+from selection.tests.instance import gaussian_instance
+import matplotlib.pyplot as plt
+
+n, p = 500, 50
+
+def test_condition_subgrad(n=n, p=p, signal=np.sqrt(2.5 * np.log(p)), s=0, ndraw=50000, burnin=5000, param=False, sigma=1, full=True, rho=0.2, useR=False):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, lasso.gaussian
+    X, Y, beta = inst(n=n,
+                      p=p, 
+                      signal=signal, 
+                      s=s, 
+                      equicorrelated=False, 
+                      rho=rho, 
+                      sigma=sigma, 
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    W = np.ones(X.shape[1]) * np.sqrt(1. * np.log(p)) * sigma
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer='gaussian', 
+                 parametric_cov_estimator=param)
+    
+    nboot = 2000
+    signs = conv.fit(nboot=nboot)
+    nonzero = signs != 0
+    conv.decompose_subgradient(condition=np.ones(p, np.bool))
+
+    if full:
+        selected = np.ones(p, np.bool)
+        keep = nonzero
+    else:
+        selected = nonzero
+        selected_idx = np.nonzero(selected)[0]
+        keep = np.ones(selected_idx.shape[0], np.bool)
+
+    _, pval, intervals = conv.summary(selected,
+                                      ndraw=ndraw,
+                                      burnin=burnin, compute_intervals=False,
+                                      subset=keep)
+
+    if full:
+        if not useR:
+            return pval[beta[keep] == 0], pval[beta[keep] != 0]
+        else:
+            pval, selected_idx = Rpval(X, Y, W)[:2]
+            return [p for j, p in zip(selected_idx, pval) if beta[j] == 0], [p for j, p in zip(selected_idx, pval) if beta[j] != 0]
+    else:
+        return [p for j, p in zip(selected_idx, pval) if beta[j] == 0], [p for j, p in zip(selected_idx, pval) if beta[j] != 0]
+
+def test_compareR(n=n, p=p, signal=np.sqrt(4) * np.sqrt(2 * np.log(p)), s=5, ndraw=5000, burnin=1000, param=True, sigma=3):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, lasso.gaussian
+    X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=False, rho=0.2, sigma=sigma, random_signs=True)[:3]
+
+    n, p = X.shape
+
+    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
+    randomizer_scale = np.std(Y) * .5
+
+    L, O, rand, active, soln, ridge_term, cond_cov, cond_mean = Rpval(X, Y, W, randomizer_scale)[2:]
+    implied_prec = L.T.dot(L) / randomizer_scale**2
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer='gaussian', 
+                 parametric_cov_estimator=param,
+                 randomizer_scale=randomizer_scale)
+    
+    nboot = 2000
+
+    signs = conv.fit(nboot=nboot, perturb=rand, solve_args={'min_its':500})
+
+    assert np.fabs(conv._view.epsilon - np.sqrt((n - 1.) / n) * ridge_term) / ridge_term < 1.e-4
+
+    assert np.fabs(soln - conv._view.initial_soln).max() / np.fabs(soln).max() < 1.e-3
+
+
+    nonzero = signs != 0
+    print(nonzero.sum())
+
+    print(np.diag(np.linalg.inv(X.T.dot(X)) * sigma**2))
+    
+    conv.decompose_subgradient(condition=np.ones(p, np.bool))
+
+    assert np.linalg.norm(np.linalg.inv(conv._view.sampler.affine_con.covariance) - implied_prec) / np.linalg.norm(implied_prec) < 1.e-3
+
+    assert np.linalg.norm(conv._view.sampler.affine_con.mean - cond_mean[:,0]) / np.linalg.norm(cond_mean[:,0]) < 1.e-3
+    assert np.linalg.norm(conv._view.sampler.affine_con.covariance - cond_cov) / np.linalg.norm(cond_cov) < 1.e-3
+
+    full = False
+
+    if full:
+        selected = np.ones(p, np.bool)
+        keep = nonzero
+    else:
+        selected = nonzero
+        selected_idx = np.nonzero(selected)[0]
+        keep = True
+
+    _, pval, intervals = conv.summary(selected,
+                                      ndraw=ndraw,
+                                      burnin=burnin, compute_intervals=False)
+
+    pval = np.asarray(pval)
+    pval = 2 * np.minimum(pval, 1 - pval)
+
+#    if not full:
+#        pval, selected_idx = Rpval(X, Y, W, randomizer_scale)[:2]
+
+    if full:
+        return pval[nonzero][beta[nonzero] == 0], pval[nonzero][beta[nonzero] != 0]
+#        return pval[nonzero][beta[nonzero] == 0], pval[nonzero][beta[nonzero] != 0]
+    else:
+        return [p for j, p in zip(selected_idx, pval) if beta[j] == 0], [p for j, p in zip(selected_idx, pval) if beta[j] != 0]
+
+def main(nsim=500):
+
+    P0, PA = [], []
+    from statsmodels.distributions import ECDF
+
+    for i in range(nsim):
+        p0, pA = test_condition_subgrad()
+        P0.extend(p0)
+        PA.extend(pA)
+        print(np.mean(P0), np.std(P0))
+    
+        if i % 3 == 0 and i > 0:
+            U = np.linspace(0, 1, 101)
+            plt.clf()
+            if len(P0) > 0:
+                plt.plot(U, ECDF(P0)(U))
+            if len(PA) > 0:
+                plt.plot(U, ECDF(PA)(U), 'r')
+            plt.plot([0, 1], [0, 1], 'k--')
+            plt.savefig("plot.pdf")
+    plt.show()
+
+def Rpval(X, Y, W, noise_scale=None):
+    numpy2ri.activate()
+    rpy.r.assign('X', X)
+    rpy.r.assign('Y', Y)
+    rpy.r.assign('lam', W)
+    if noise_scale is not None:
+        rpy.r.assign('noise_scale', noise_scale)
+        rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam, noise_scale=noise_scale)')
+    else:
+        rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam)')
+    rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="norejection")')
+    pval = np.asarray(rpy.r('rand_inf$pvalues'))
+    vars = np.asarray(rpy.r('soln$active_set')) - 1 
+
+    L = np.asarray(rpy.r('soln$law$sampling_transform$linear_term'))
+    O = np.asarray(rpy.r('soln$law$sampling_transform$offset_term'))
+    cond_cov = np.asarray(rpy.r('soln$law$cond_cov'))
+    cond_mean = np.asarray(rpy.r('soln$law$cond_mean'))
+    rand = np.asarray(rpy.r('soln$perturb'))
+    active =  np.asarray(rpy.r('soln$active')) - 1
+    soln = np.asarray(rpy.r('soln$soln'))
+    rpy.r('print(names(soln))')
+    rpy.r('print(names(soln$law))')
+    ridge = rpy.r('soln$ridge_term')
+
+    try:
+        pval = 2 * np.minimum(pval, 1 - pval)
+        return pval, vars, L, O, rand, active, soln, ridge, cond_cov, cond_mean
+    except:
+        return [], []
+
+
+# if __name__ == "__main__":
+#     main()

From 48c04184670ddc30b579526786c8ebe8bccb0058 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Feb 2018 17:16:13 -0800
Subject: [PATCH 475/617] using full pvalues

---
 selection/randomized/tests/test_lasso_pval.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/tests/test_lasso_pval.py b/selection/randomized/tests/test_lasso_pval.py
index 8165e9b53..49d38acfd 100644
--- a/selection/randomized/tests/test_lasso_pval.py
+++ b/selection/randomized/tests/test_lasso_pval.py
@@ -10,7 +10,7 @@
 
 n, p = 500, 50
 
-def test_condition_subgrad(n=n, p=p, signal=np.sqrt(2.5 * np.log(p)), s=0, ndraw=50000, burnin=5000, param=False, sigma=1, full=True, rho=0.2, useR=False):
+def test_condition_subgrad(n=n, p=p, signal=np.sqrt(2.5 * np.log(p)), s=5, ndraw=50000, burnin=5000, param=False, sigma=1, full=True, rho=0.2, useR=True):
     """
     Compare to R randomized lasso
     """
@@ -164,7 +164,8 @@ def Rpval(X, Y, W, noise_scale=None):
         rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam, noise_scale=noise_scale)')
     else:
         rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam)')
-    rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="norejection")')
+    rpy.r('full_targets=selectiveInference:::set.target(soln,type="full")')
+    rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="norejection", full_targets=full_targets)')
     pval = np.asarray(rpy.r('rand_inf$pvalues'))
     vars = np.asarray(rpy.r('soln$active_set')) - 1 
 

From 3957bf34ddb765024b14269455dded9768fcb2af Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Feb 2018 17:26:14 -0800
Subject: [PATCH 476/617] using full targets

---
 selection/randomized/tests/test_lasso_pval.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/selection/randomized/tests/test_lasso_pval.py b/selection/randomized/tests/test_lasso_pval.py
index 49d38acfd..d2747b838 100644
--- a/selection/randomized/tests/test_lasso_pval.py
+++ b/selection/randomized/tests/test_lasso_pval.py
@@ -8,9 +8,9 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-n, p = 500, 50
+n, p = 500, 200
 
-def test_condition_subgrad(n=n, p=p, signal=np.sqrt(2.5 * np.log(p)), s=5, ndraw=50000, burnin=5000, param=False, sigma=1, full=True, rho=0.2, useR=True):
+def test_condition_subgrad(n=n, p=p, signal=np.sqrt(1.5 * np.log(p)), s=5, ndraw=50000, burnin=5000, param=False, sigma=1, full=True, rho=0.2, useR=True):
     """
     Compare to R randomized lasso
     """
@@ -27,7 +27,7 @@ def test_condition_subgrad(n=n, p=p, signal=np.sqrt(2.5 * np.log(p)), s=5, ndraw
 
     n, p = X.shape
 
-    W = np.ones(X.shape[1]) * np.sqrt(1. * np.log(p)) * sigma
+    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
 
     conv = const(X, 
                  Y, 
@@ -57,7 +57,7 @@ def test_condition_subgrad(n=n, p=p, signal=np.sqrt(2.5 * np.log(p)), s=5, ndraw
         if not useR:
             return pval[beta[keep] == 0], pval[beta[keep] != 0]
         else:
-            pval, selected_idx = Rpval(X, Y, W)[:2]
+            pval, selected_idx = Rpval(X, Y, W, 1.)[:2]
             return [p for j, p in zip(selected_idx, pval) if beta[j] == 0], [p for j, p in zip(selected_idx, pval) if beta[j] != 0]
     else:
         return [p for j, p in zip(selected_idx, pval) if beta[j] == 0], [p for j, p in zip(selected_idx, pval) if beta[j] != 0]
@@ -164,7 +164,7 @@ def Rpval(X, Y, W, noise_scale=None):
         rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam, noise_scale=noise_scale)')
     else:
         rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam)')
-    rpy.r('full_targets=selectiveInference:::set.target(soln,type="full")')
+    rpy.r('full_targets=selectiveInference:::set.target(soln,type="partial")')
     rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="norejection", full_targets=full_targets)')
     pval = np.asarray(rpy.r('rand_inf$pvalues'))
     vars = np.asarray(rpy.r('soln$active_set')) - 1 

From 1a851955670f606ffd8081549e63799cb6328e34 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 13 Feb 2018 17:52:52 -0800
Subject: [PATCH 477/617] WIP: writing highdim gaussian randomization version

---
 selection/randomized/glm.py                   |  79 --
 selection/randomized/lasso.py                 | 766 +++++++++++++++++-
 selection/randomized/query.py                 |   7 +-
 selection/randomized/randomization.py         |   3 +-
 selection/randomized/tests/test_lasso_pval.py |  25 +-
 5 files changed, 774 insertions(+), 106 deletions(-)

diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 48ecf9c53..9a7cf95bc 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -9,7 +9,6 @@
 from .greedy_step import greedy_score_step
 from .threshold_score import threshold_score
 
-
 def pairs_bootstrap_glm(glm_loss,
                         active, 
                         beta_full=None, 
@@ -314,84 +313,6 @@ def set_alpha_matrix(glm_loss,
 
     return np.dot(np.dot(_Qinv, X_active.T), np.diag(obs_residuals))
 
-
-def _parametric_cov_glm(glm_loss,
-                        active,
-                        beta_full=None,
-                        inactive=None,
-                        solve_args={'min_its': 50, 'tol': 1.e-10}):
-    """
-    Compute parametric covariance of
-    the estimates ($\bar{\beta}_E^*$) of a generalized 
-    linear model (GLM) restricted to `active`
-    as well as, optionally, the inactive coordinates of the score of the 
-    GLM evaluated at the estimates ($\nabla \ell(\bar{\beta}_E)[-E]$) where
-    $\bar{\beta}_E$ is padded with zeros where necessary.
-
-    Parameters
-    ----------
-
-    glm_loss : regreg.smooth.glm.glm
-        The loss of the generalized linear model.
-
-    active : np.bool
-        Boolean indexing array
-
-    beta_full : np.float (optional)
-        Solution to the restricted problem, zero except where active is nonzero.
-
-    inactive : np.bool (optional)
-        Boolean indexing array
-
-    solve_args : dict
-        Arguments passed to solver of restricted problem (`restricted_estimator`) if 
-        beta_full is None.
-
-    Returns
-    -------
-
-    Sigma : np.float
-        Covariance matrix.
-
-    """
-    X, Y = glm_loss.data
-    n, p = X.shape
-
-    if beta_full is None:
-        beta_active = restricted_estimator(glm_loss, active, solve_args=solve_args)
-        beta_full = np.zeros(glm_loss.shape)
-        beta_full[active] = beta_active
-    else:
-        beta_active = beta_full[active]
-
-    X_active = X[:, active]
-
-    nactive = active.sum()
-    ntotal = nactive
-
-    if inactive is not None:
-        X_inactive = X[:, inactive]
-        ntotal += inactive.sum()
-
-    _W = np.diag(glm_loss.saturated_loss.hessian(X_active.dot(beta_active)))
-    _Q = X_active.T.dot(_W.dot(X_active))
-    _Qinv = np.linalg.inv(_Q)
-    if inactive is not None:
-        _C = X_inactive.T.dot(_W.dot(X_active))
-        _I = _C.dot(_Qinv)
-
-    nactive = active.sum()
-
-    mat = np.zeros((p, n))
-    mat[:nactive, :] = _Qinv.dot(X_active.T)
-    if ntotal > nactive:
-        mat1 = np.dot(np.dot(_W, X_active), np.dot(_Qinv, X_active.T))
-        mat[nactive:, :] = X[:, inactive].T.dot(np.identity(n) - mat1)
-
-    Sigma_full = np.dot(mat, np.dot(_W, mat.T))
-    return Sigma_full
-
-
 class glm_greedy_step(greedy_score_step, glm):
 
     # XXX this makes the assumption that our
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index b701eb7dc..148528c9c 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -324,23 +324,36 @@ def log_density(query,
                 # compute implied mean and covariance
 
                 cov, prec = self.randomization.cov_prec
+                prec_array = len(np.asarray(prec).shape) == 2
                 opt_linear, opt_offset = self.opt_transform
                 score_linear, score_offset = self.score_transform
-                cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
+
+                if prec_array:
+                    cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
+                else:
+                    cond_precision = opt_linear.T.dot(opt_linear) * prec
+
                 cond_cov = np.linalg.inv(cond_precision)
 
                 offset = reconstruct_full_from_internal(self.opt_transform, 
                                                         self.score_transform, 
                                                         self.observed_internal_state, 
                                                         np.zeros(opt_linear.shape[1]))
-                cond_mean = -cond_cov.dot(opt_linear.T.dot(prec.dot(offset)))
+                if prec_array:
+                    cond_mean = -cond_cov.dot(opt_linear.T.dot(prec.dot(offset)))
+                else:
+                    cond_mean = -cond_cov.dot(opt_linear.T.dot(offset)) * prec
 
                 # need a log_density function
                 # the conditional density of opt variables
                 # given the score
 
-                logdens_offset = cond_cov.dot(opt_linear.T.dot(prec.dot(score_offset + opt_offset)))
-                logdens_linear = cond_cov.dot(opt_linear.T.dot(prec.dot(score_linear)))
+                if prec_array:
+                    logdens_offset = cond_cov.dot(opt_linear.T.dot(prec.dot(score_offset + opt_offset)))
+                    logdens_linear = cond_cov.dot(opt_linear.T.dot(prec.dot(score_linear)))
+                else:
+                    logdens_offset = cond_cov.dot(opt_linear.T.dot(score_offset + opt_offset)) * prec
+                    logdens_linear = cond_cov.dot(opt_linear.T.dot(score_linear)) * prec
 
                 def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
                     mean_term = logdens_linear.dot(score.T).T + logdens_offset
@@ -545,7 +558,10 @@ def new_projection(dual,
         else:
 
             cov, prec = self.randomization.cov_prec
-            cond_precision = new_linear.T.dot(prec.dot(new_linear))
+            if len(np.asarray(prec)) == 2:
+                cond_precision = new_linear.T.dot(prec.dot(new_linear))
+            else:
+                cond_precision = new_linear.T.dot(new_linear) * prec
             score_linear, score_offset = self.score_transform
 
             cond_cov = np.linalg.inv(cond_precision)
@@ -554,15 +570,24 @@ def new_projection(dual,
                                                     self.score_transform, 
                                                     self.observed_internal_state, 
                                                     np.zeros(new_linear.shape[1]))
-            cond_mean = -cond_cov.dot(new_linear.T.dot(prec.dot(offset)))
+
+            if len(np.asarray(prec)) == 2:
+                cond_mean = -cond_cov.dot(new_linear.T.dot(prec.dot(offset)))
+            else:
+                cond_mean = -cond_cov.dot(new_linear.T.dot(offset)) * prec
 
             # need a log_density function
             # the conditional density of opt variables
             # given the score
 
-            logdens_offset = cond_cov.dot(new_linear.T.dot(prec.dot(
-                                                           score_offset + opt_offset)))
-            logdens_linear = cond_cov.dot(new_linear.T.dot(prec.dot(score_linear)))
+            if len(np.asarray(prec)) == 2:
+                logdens_offset = cond_cov.dot(new_linear.T.dot(prec.dot(
+                            score_offset + opt_offset)))
+                logdens_linear = cond_cov.dot(new_linear.T.dot(prec.dot(score_linear)))
+            else:
+                logdens_offset = cond_cov.dot(new_linear.T.dot(
+                            score_offset + opt_offset)) * prec
+                logdens_linear = cond_cov.dot(new_linear.T.dot(score_linear)) * prec
 
             def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
                 mean_term = logdens_linear.dot(score.T).T + logdens_offset
@@ -1376,5 +1401,728 @@ def sqrt_lasso(X,
 
         return L
 
+#### High dimensional version
+#### - parametric covariance
+#### - Gaussian randomization
+
+class highdim(lasso):
+
+    r"""
+    A class for the LASSO for post-selection inference.
+    The problem solved is
+
+    .. math::
+
+        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
+            \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
+
+    where $\lambda$ is `lam`, $\omega$ is a randomization generated below
+    and the last term is a small ridge penalty.
+
+    """
+
+    def __init__(self, 
+                 loglike, 
+                 feature_weights,
+                 ridge_term,
+                 randomizer_scale):
+        r"""
+
+        Create a new post-selection object for the LASSO problem
+
+        Parameters
+        ----------
+
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is brodcast to all features.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomization.
+
+
+        """
+
+        self.loglike = loglike
+        self.nfeature = p = self.loglike.shape[0]
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+        self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
+        self.ridge_term = ridge_term
+        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
+
+
+    def fit(self, 
+            solve_args={'tol':1.e-12, 'min_its':50}, 
+            perturb=None):
+        """
+        Fit the randomized lasso using `regreg`.
+
+        Parameters
+        ----------
+
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+
+        Returns
+        -------
+
+        signs : np.float
+             Support and non-zero signs of randomized lasso solution.
+             
+        """
+
+        p = self.nfeature
+
+        if perturb is None:
+            self._initial_omega = perturb = self.randomizer.sample()
+        quad = rr.identity_quadratic(self.epsilon, 0, -perturb)
+        problem = rr.simple_problem(self.loss, self.penalty)
+        self.initial_soln = rr.solve(quad)
+
+        active_signs = np.sign(self.initial_soln)
+        active = self._active = active_signs != 0
+
+        self._lagrange = penalty.weights
+        unpenalized = self._lagrange == 0
+
+        active *= ~unpenalized
+
+        self._overall = (active + unpenalized) > 0
+        self._inactive = ~self._overall
+        self._unpenalized = unpenalized
+
+        _active_signs = active_signs.copy()
+        _active_signs[unpenalized] = np.nan # don't release sign of unpenalized variables
+        self.selection_variable = {'sign':_active_signs,
+                                   'variables':self._overall}
+
+        # initial state for opt variables
+
+        initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + 
+                            quad.objective(self.initial_soln, 'grad')) 
+        self.initial_subgrad = initial_subgrad
+
+        initial_scalings = np.fabs(self.initial_soln[active])
+        initial_unpenalized = self.initial_soln[self._unpenalized]
+
+        self.observed_opt_state = np.concatenate([initial_scalings,
+                                                  initial_unpenalized])
+
+        _beta_unpenalized = restricted_estimator(loss, overall, solve_args=solve_args)
+
+        beta_bar = np.zeros(p)
+        beta_bar[overall] = _beta_unpenalized
+        self._beta_full = beta_bar
+
+        # observed state for score in internal coordinates
+
+        self.observed_internal_state = np.hstack([_beta_unpenalized,
+                                                  -loss.smooth_objective(beta_bar, 'grad')[inactive]])
+
+        # form linear part
+
+        self.num_opt_var = self.observed_opt_state.shape[0]
+
+        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
+        # E for active
+        # U for unpenalized
+        # -E for inactive
+
+        _opt_linear_term = np.zeros((p, overall.sum()))
+        _score_linear_term = np.zeros((p, overall.sum()))
+
+        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
+
+        X, y = loss.data
+        W = self.loss.saturated_loss.hessian(X.dot(beta_bar))
+        _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
+        _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
+
+        _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen])
+
+        def signed_basis_vector(p, j, s):
+            v = np.zeros(p)
+            v[j] = s
+            return v
+
+        active_directions = np.array([signed_basis_vector(p, j, active_signs[j]) for j in np.nonzero(active)[0]]).T
+
+        scaling_slice = slice(0, active.sum())
+        if np.sum(active) == 0:
+            _opt_hessian = 0
+        else:
+            _opt_hessian = _hessian_active * active_signs[None, active] + epsilon * active_directions
+        _opt_linear_term[:, scaling_slice] = _opt_hessian
+
+        # beta_U piece
+
+        unpenalized_slice = slice(active.sum(), overall.sum())
+        unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
+        if unpenalized.sum():
+            _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
+                                                      + epsilon * unpenalized_directions) 
+
+        # two transforms that encode score and optimization
+        # variable roles 
+
+        self.opt_transform = (_opt_linear_term, self.initial_subgrad)
+        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
+
+        # now store everything needed for the projections
+        # the projection acts only on the optimization
+        # variables
+
+        self._setup = True
+        self.scaling_slice = scaling_slice
+        self.unpenalized_slice = unpenalized_slice
+        self.ndim = loss.shape[0]
+
+        # compute implied mean and covariance
+
+        cov, prec = self.randomization.cov_prec
+        opt_linear, opt_offset = self.opt_transform
+        score_linear, score_offset = self.score_transform
+        cond_precision = opt_linear.T.dot(opt_linear) * prec
+        cond_cov = np.linalg.inv(cond_precision)
+
+        offset = reconstruct_full_from_internal(self.opt_transform, 
+                                                self.score_transform, 
+                                                self.observed_internal_state, 
+                                                np.zeros(opt_linear.shape[1]))
+        cond_mean = -cond_cov.dot(opt_linear.T.dot(offset)) * prec
+
+        # need a log_density function
+        # the conditional density of opt variables
+        # given the score
+
+        logdens_offset = cond_cov.dot(opt_linear.T.dot(prec.dot(score_offset + opt_offset)))
+        logdens_linear = cond_cov.dot(opt_linear.T.dot(prec.dot(score_linear)))
+
+        def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
+            mean_term = logdens_linear.dot(score.T).T + logdens_offset
+            arg = opt + mean_term
+            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+        log_density = functools.partial(log_density, logdens_offset, logdens_linear, cond_precision)
+
+        # now make the constraints
+
+        # scaling constraints
+
+        I = np.identity(cond_cov.shape[0])
+        A_scaling = -I[self.scaling_slice]
+        b_scaling = np.zeros(A_scaling.shape[0])
+
+        A_subgrad = np.vstack([I[self.subgrad_slice],
+                               -I[self.subgrad_slice]])
+        b_subgrad = np.hstack([inactive_lagrange,
+                               inactive_lagrange])
+
+        linear_term = np.vstack([A_scaling, A_subgrad])
+        offset = np.hstack([b_scaling, b_subgrad])
+
+        affine_con = constraints(linear_term,
+                                 offset,
+                                 mean=cond_mean,
+                                 covariance=cond_cov)
+
+        logdens_transform = (logdens_linear, logdens_offset)
+        self._sampler = affine_gaussian_sampler(affine_con,
+                                                self.observed_opt_state,
+                                                self.observed_internal_state,
+                                                log_density,
+                                                selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
+
+
+
+    def summary(self,
+                selected_features,
+                parameter=None,
+                level=0.9,
+                ndraw=10000, 
+                burnin=2000,
+                compute_intervals=False,
+                bootstrap_sampler=False,
+                subset=None):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        selected_features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+
+        level : float
+            Confidence level.
+
+        ndraw : int (optional)
+            Defaults to 1000.
+
+        burnin : int (optional)
+            Defaults to 1000.
+
+        bootstrap : bool
+            Use wild bootstrap instead of Gaussian plugin.
+
+        """
+        if not hasattr(self, "_view"):
+            raise ValueError('run `fit` method before producing summary.')
+
+        if parameter is None:
+            parameter = np.zeros(self.loglike.shape[0])
+
+        if np.asarray(selected_features).dtype != np.bool:
+            raise ValueError('selected_features should be a boolean array')
+
+        unpenalized_mle = restricted_estimator(self.loglike, selected_features)
+
+        target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
+            form_covariances = glm_parametric_covariance(self.loglike)
+
+        opt_samplers = []
+        for q in [self._view]:
+            cov_info = q.setup_sampler()
+            if self.parametric_cov_estimator == False:
+                target_cov, score_cov = form_covariances(target_info,  
+                                                         cross_terms=[cov_info],
+                                                         nsample=q.nboot)
+            else:
+                target_cov, score_cov = form_covariances(target_info,  
+                                                         cross_terms=[cov_info])
+            opt_samplers.append(q.sampler)
+
+        opt_samples = [opt_sampler.sample(ndraw,
+                                          burnin) for opt_sampler in opt_samplers]
+
+        if subset is not None:
+            target_cov = target_cov[subset][:,subset]
+            score_cov = score_cov[subset]
+            unpenalized_mle = unpenalized_mle[subset]
+
+        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter, sample=opt_samples[0])
+        if not np.all(parameter == 0):
+            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=np.zeros_like(parameter), sample=opt_samples[0])
+        else:
+            pvalues = pivots
+
+        intervals = None
+        if compute_intervals:
+            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_samples[0])
+
+        return pivots, pvalues, intervals
+
+    @staticmethod
+    def gaussian(X, 
+                 Y, 
+                 feature_weights, 
+                 sigma=1.,
+                 parametric_cov_estimator=False,
+                 quadratic=None,
+                 ridge_term=None,
+                 randomizer_scale=None,
+                 randomizer='gaussian'):
+        r"""
+        Squared-error LASSO with feature weights.
+
+        Objective function (before randomizer) is 
+        $$
+        \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\lambda$ is `feature_weights`. The ridge term
+        is determined by the Hessian and `np.std(Y)` by default,
+        as is the randomizer scale.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        sigma : float (optional)
+            Noise variance. Set to 1 if `covariance_estimator` is not None.
+            This scales the loglikelihood by `sigma**(-2)`.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+
+        """
+
+        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
+        n, p = X.shape
+
+        mean_diag = np.mean((X**2).sum(0))
+        if ridge_term is None:
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return highdim(loglike, np.asarray(feature_weights) / sigma**2,
+                     ridge_term, randomizer_scale)
+
+
+    @staticmethod
+    def logistic(X, 
+                 successes, 
+                 feature_weights, 
+                 trials=None,
+                 parametric_cov_estimator=False,
+                 quadratic=None,
+                 ridge_term=None,
+                 randomizer='gaussian',
+                 randomizer_scale=None):
+        r"""
+        Logistic LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell$ is the negative of the logistic 
+        log-likelihood (half the logistic deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y. 
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+
+        """
+        n, p = X.shape
+
+        loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
+
+        mean_diag = np.mean((X**2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 
+
+        return highdim(loglike, np.asarray(feature_weights),
+                       ridge_term, randomizer_scale)
+
+    @staticmethod
+    def coxph(X, 
+              times, 
+              status, 
+              feature_weights,
+              parametric_cov_estimator=False,
+              quadratic=None,
+              ridge_term=None,
+              randomizer='gaussian',
+              randomizer_scale=None):
+        r"""
+        Cox proportional hazards LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell^{\text{Cox}}$ is the 
+        negative of the log of the Cox partial
+        likelihood and $\lambda$ is `feature_weights`.
+
+        Uses Efron's tie breaking method.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        times : ndarray
+            Shape (n,) -- the survival times.
+
+        status : ndarray
+            Shape (n,) -- the censoring status.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+
+        """
+        loglike = coxph_obj(X, times, status, quadratic=quadratic)
+
+        # scale for randomization seems kind of meaningless here...
+
+        mean_diag = np.mean((X**2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+
+        return lasso(loglike, 
+                     feature_weights, 
+                     ridge_term,
+                     randomizer_scale)
+
+    @staticmethod
+    def poisson(X, 
+                counts, 
+                feature_weights,
+                parametric_cov_estimator=False,
+                quadratic=None,
+                ridge_term=None,
+                randomizer_scale=None,
+                randomizer='gaussian'):
+        r"""
+        Poisson log-linear LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell^{\text{Poisson}}$ is the negative
+        of the log of the Poisson likelihood (half the deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        counts : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+
+        """
+        n, p = X.shape
+        loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
+
+        # scale for randomizer seems kind of meaningless here...
+
+        mean_diag = np.mean((X**2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
+
+        return lasso(loglike, 
+                     feature_weights, 
+                     ridge_term,
+                     randomizer_scale)
+
+    @staticmethod
+    def sqrt_lasso(X, 
+                   Y, 
+                   feature_weights, 
+                   quadratic=None,
+                   parametric_cov_estimator=False,
+                   sigma_estimate='truncated',
+                   solve_args={'min_its':200},
+                   randomizer_scale=None,
+                   randomizer='gaussian'):
+        r"""
+        Use sqrt-LASSO to choose variables.
+
+        Objective function is 
+        $$
+        \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\lambda$ is `feature_weights`. After solving the problem
+        treat as if `gaussian` with implied variance and choice of 
+        multiplier. See arxiv.org/abs/1504.08031 for details.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        covariance : str
+            One of 'parametric' or 'sandwich'. Method
+            used to estimate covariance for inference
+            in second stage.
+
+        sigma_estimate : str
+            One of 'truncated' or 'OLS'. Method
+            used to estimate $\sigma$ when using
+            parametric covariance.
+
+        solve_args : dict
+            Arguments passed to solver.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+
+        Returns
+        -------
+
+        L : `selection.randomized.convenience.lasso`
+        
+        Notes
+        -----
+
+        Unlike other variants of LASSO, this
+        solves the problem on construction as the active
+        set is needed to find equivalent gaussian LASSO.
+
+        Assumes parametric model is correct for inference,
+        i.e. does not accept a covariance estimator.
+
+        """
+
+        raise NotImplementedError
 
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 577c704bc..82ae79d67 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -1,7 +1,3 @@
-
-
-
-
 from itertools import product
 import numpy as np
 
@@ -488,7 +484,6 @@ def sample(self, ndraw, burnin):
                                        ndraw=ndraw,
                                        burnin=burnin)
 
-
 class optimization_intervals(object):
 
     def __init__(self,
@@ -513,7 +508,7 @@ def __init__(self,
                     tiled_opt_sample = opt_sample[:nsample]
             else:
                 tiled_sample = None
-            tiled_sampling_info.append((opt_sampler, opt_sample, t_cov, score_cov))
+            tiled_sampling_info.append((opt_sampler, tiled_opt_sample, t_cov, score_cov))
 
         self.opt_sampling_info = tiled_sampling_info
         self._logden = 0
diff --git a/selection/randomized/randomization.py b/selection/randomized/randomization.py
index c6cb1b250..c3256d1a2 100644
--- a/selection/randomized/randomization.py
+++ b/selection/randomized/randomization.py
@@ -117,7 +117,6 @@ def isotropic_gaussian(shape, scale):
         CGF_conjugate = isotropic_gaussian_CGF_conjugate(shape, scale)
 
         p = np.product(shape)
-        I = np.identity(p)
         constant = -0.5 * p * np.log(2 * np.pi * scale**2)
         return randomization(shape,
                              density,
@@ -130,7 +129,7 @@ def isotropic_gaussian(shape, scale):
                              log_density = lambda x: -0.5 * (np.atleast_2d(x)**2).sum(1) / scale**2 + constant,
                              CGF=CGF,
                              CGF_conjugate=CGF_conjugate,
-                             cov_prec=(scale**2 * I, I / scale**2)
+                             cov_prec=(scale**2, 1. / scale**2)
                              )
 
     @staticmethod
diff --git a/selection/randomized/tests/test_lasso_pval.py b/selection/randomized/tests/test_lasso_pval.py
index d2747b838..88ee6c7b0 100644
--- a/selection/randomized/tests/test_lasso_pval.py
+++ b/selection/randomized/tests/test_lasso_pval.py
@@ -8,9 +8,9 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-n, p = 500, 200
+n, p = 500, 20
 
-def test_condition_subgrad(n=n, p=p, signal=np.sqrt(1.5 * np.log(p)), s=5, ndraw=50000, burnin=5000, param=False, sigma=1, full=True, rho=0.2, useR=True):
+def test_condition_subgrad(n=n, p=p, signal=np.sqrt(2 * np.log(p)), s=5, ndraw=5000, burnin=1000, param=True, sigma=1, full=True, rho=0.2, useR=True, randomizer_scale=1):
     """
     Compare to R randomized lasso
     """
@@ -27,13 +27,14 @@ def test_condition_subgrad(n=n, p=p, signal=np.sqrt(1.5 * np.log(p)), s=5, ndraw
 
     n, p = X.shape
 
-    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
+    W = np.ones(X.shape[1]) * 1.5 * sigma
 
     conv = const(X, 
                  Y, 
                  W, 
                  randomizer='gaussian', 
-                 parametric_cov_estimator=param)
+                 parametric_cov_estimator=param,
+                 randomizer_scale=randomizer_scale)
     
     nboot = 2000
     signs = conv.fit(nboot=nboot)
@@ -57,7 +58,7 @@ def test_condition_subgrad(n=n, p=p, signal=np.sqrt(1.5 * np.log(p)), s=5, ndraw
         if not useR:
             return pval[beta[keep] == 0], pval[beta[keep] != 0]
         else:
-            pval, selected_idx = Rpval(X, Y, W, 1.)[:2]
+            pval, selected_idx = Rpval(X, Y, W, randomizer_scale)[:2]
             return [p for j, p in zip(selected_idx, pval) if beta[j] == 0], [p for j, p in zip(selected_idx, pval) if beta[j] != 0]
     else:
         return [p for j, p in zip(selected_idx, pval) if beta[j] == 0], [p for j, p in zip(selected_idx, pval) if beta[j] != 0]
@@ -138,10 +139,13 @@ def main(nsim=500):
     from statsmodels.distributions import ECDF
 
     for i in range(nsim):
-        p0, pA = test_condition_subgrad()
+        try:
+            p0, pA = test_condition_subgrad(n=200, p=10)
+        except:
+            p0, pA = [], []
         P0.extend(p0)
         PA.extend(pA)
-        print(np.mean(P0), np.std(P0))
+        print(np.mean(P0), np.std(P0), np.mean(np.array(PA) < 0.05))
     
         if i % 3 == 0 and i > 0:
             U = np.linspace(0, 1, 101)
@@ -164,8 +168,9 @@ def Rpval(X, Y, W, noise_scale=None):
         rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam, noise_scale=noise_scale)')
     else:
         rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam)')
-    rpy.r('full_targets=selectiveInference:::set.target(soln,type="partial")')
-    rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="norejection", full_targets=full_targets)')
+    rpy.r('full_targets=selectiveInference:::set.target(soln,type="full")')
+    print('here')
+    rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="restrictedMVN", full_targets=full_targets, nsample=10000, burnin=3000)')
     pval = np.asarray(rpy.r('rand_inf$pvalues'))
     vars = np.asarray(rpy.r('soln$active_set')) - 1 
 
@@ -181,7 +186,7 @@ def Rpval(X, Y, W, noise_scale=None):
     ridge = rpy.r('soln$ridge_term')
 
     try:
-        pval = 2 * np.minimum(pval, 1 - pval)
+        #pval = 2 * np.minimum(pval, 1 - pval)
         return pval, vars, L, O, rand, active, soln, ridge, cond_cov, cond_mean
     except:
         return [], []

From 9bde5b933df44b13cb9df9d6a3591014395f7286 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 14 Feb 2018 19:18:14 -0800
Subject: [PATCH 478/617] RF: finally working! using score instead of internal

---
 selection/randomized/greedy_step.py           |  24 +-
 selection/randomized/lasso.py                 | 424 ++++++------
 selection/randomized/query.py                 |  44 +-
 selection/randomized/reconstruction.py        |   4 +-
 selection/randomized/target.py                | 649 ------------------
 selection/randomized/tests/test_Mest.py       |   2 +-
 .../randomized/tests/test_highdim_lasso.py    | 143 ++++
 selection/randomized/threshold_score.py       |  10 +-
 8 files changed, 407 insertions(+), 893 deletions(-)
 delete mode 100644 selection/randomized/target.py
 create mode 100644 selection/randomized/tests/test_highdim_lasso.py

diff --git a/selection/randomized/greedy_step.py b/selection/randomized/greedy_step.py
index 85676e8ce..0f5713f4a 100644
--- a/selection/randomized/greedy_step.py
+++ b/selection/randomized/greedy_step.py
@@ -4,7 +4,6 @@
 
 from .query import query, optimization_sampler
 from .base import restricted_estimator
-from .reconstruction import reconstruct_full_from_internal
 
 class greedy_score_step(query):
 
@@ -76,22 +75,20 @@ def solve(self, nboot=2000):
             
         # score at unpenalized M-estimator
 
-        self.observed_internal_state = - self.loss.smooth_objective(beta_full, 'grad')[candidate]
+        self.observed_internal_state = self.observed_score_state = - self.loss.smooth_objective(beta_full, 'grad')[candidate]
         self._randomZ = self.randomization.sample()
 
         self.num_opt_var = self._randomZ.shape[0]
 
         # find the randomized maximizer
 
-        # score transform is identity here so internal is the same as score coords
-
-        randomized_score = self.observed_internal_state - self._randomZ
+        randomized_score = self.observed_score_state - self._randomZ
         terms = self.group_lasso_dual.terms(randomized_score)
 
         # assuming a.s. unique maximizing group here
 
         maximizing_group = np.unique(self.group_lasso_dual.groups)[np.argmax(terms)]
-        maximizing_subgrad = self.observed_internal_state[self.group_lasso_dual.groups == maximizing_group]
+        maximizing_subgrad = self.observed_score_state[self.group_lasso_dual.groups == maximizing_group]
         maximizing_subgrad /= np.linalg.norm(maximizing_subgrad) # this is now a unit vector
         maximizing_subgrad *= self.group_lasso_dual.weights[maximizing_group] # now a vector of length given by weight of maximizing group
         self.maximizing_subgrad = np.zeros(candidate.sum())
@@ -162,26 +159,25 @@ def projection(epigraph, opt_state):
             projection = functools.partial(projection, self.group_lasso_dual_epigraph)
 
             def grad_log_density(query,
-                                 opt_linear,
                                  rand_gradient,
-                                 internal_state,
+                                 score_state,
                                  opt_state):
-                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                full_state = score_state + reconstruct_opt(query.opt_transform, opt_state)
                 return opt_linear.T.dot(rand_gradient(full_state))
 
-            grad_log_density = functools.partial(grad_log_density, self, self.opt_transform[0], self.randomization.gradient)
+            grad_log_density = functools.partial(grad_log_density, self, self.randomization.gradient)
 
             def log_density(query,
                             opt_linear,
                             rand_log_density,
-                            internal_state,
+                            score_state,
                             opt_state):
-                full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                full_state = score_state + reconstruct_opt(query.opt_transform, opt_state)
                 return rand_log_density(full_state)
-            log_density = functools.partial(log_density, self, self.opt_transform[0], self.randomization.log_density)
+            log_density = functools.partial(log_density, self, self.randomization.log_density)
 
             self._sampler = optimization_sampler(self.observed_opt_state,
-                                                 self.observed_internal_state.copy(),
+                                                 self.observed_score_state,
                                                  self.score_transform,
                                                  self.opt_transform,
                                                  projection,
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 148528c9c..dcf95d670 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -244,12 +244,13 @@ def signed_basis_vector(p, j, s):
         # two transforms that encode score and optimization
         # variable roles 
 
-        # later, we will modify `score_transform`
-        # in `linear_decomposition`
-
         self.opt_transform = (_opt_linear_term, _opt_affine_term)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 
+        # everything now expressed in observed_score_state
+
+        self.observed_score_state = _score_linear_term.dot(self.observed_internal_state)
+
         # now store everything needed for the projections
         # the projection acts only on the optimization
         # variables
@@ -293,27 +294,26 @@ def projection(dual, subgrad_slice, scaling_slice, opt_state):
                 projection = functools.partial(projection, dual, self.subgrad_slice, self.scaling_slice)
 
                 def grad_log_density(query,
-                                     opt_linear,
                                      rand_gradient,
-                                     internal_state,
+                                     score_state,
                                      opt_state):
-                    full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                    full_state = score_state + reconstruct_opt(query.opt_transform, opt_state)
                     return opt_linear.T.dot(rand_gradient(full_state).T)
 
-                grad_log_density = functools.partial(grad_log_density, self, self.opt_transform[0], self.randomization.gradient)
+                grad_log_density = functools.partial(grad_log_density, self, self.randomization.gradient)
 
                 def log_density(query,
                                 opt_linear,
                                 rand_log_density,
-                                internal_state,
+                                score_state,
                                 opt_state):
-                    full_state = reconstruct_full_from_internal(query.opt_transform, query.score_transform, internal_state, opt_state)
+                    full_state = score_state + reconstruct_opt(query.opt_transform, opt_state)
                     return rand_log_density(full_state)
 
-                log_density = functools.partial(log_density, self, self.opt_transform[0], self.randomization.log_density)
+                log_density = functools.partial(log_density, self, self.randomization.log_density)
 
                 self._sampler = langevin_sampler(self.observed_opt_state,
-                                                 self.observed_internal_state.copy(),
+                                                 self.observed_score_state,
                                                  self.score_transform,
                                                  self.opt_transform,
                                                  projection,
@@ -326,40 +326,29 @@ def log_density(query,
                 cov, prec = self.randomization.cov_prec
                 prec_array = len(np.asarray(prec).shape) == 2
                 opt_linear, opt_offset = self.opt_transform
-                score_linear, score_offset = self.score_transform
 
                 if prec_array:
                     cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
+                    logdens_linear = cond_cov.dot(opt_linear.T.dot(prec))
                 else:
                     cond_precision = opt_linear.T.dot(opt_linear) * prec
+                    logdens_linear = cond_cov.dot(opt_linear.T) * prec
 
                 cond_cov = np.linalg.inv(cond_precision)
-
-                offset = reconstruct_full_from_internal(self.opt_transform, 
-                                                        self.score_transform, 
-                                                        self.observed_internal_state, 
-                                                        np.zeros(opt_linear.shape[1]))
-                if prec_array:
-                    cond_mean = -cond_cov.dot(opt_linear.T.dot(prec.dot(offset)))
-                else:
-                    cond_mean = -cond_cov.dot(opt_linear.T.dot(offset)) * prec
+                cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
 
                 # need a log_density function
                 # the conditional density of opt variables
                 # given the score
 
-                if prec_array:
-                    logdens_offset = cond_cov.dot(opt_linear.T.dot(prec.dot(score_offset + opt_offset)))
-                    logdens_linear = cond_cov.dot(opt_linear.T.dot(prec.dot(score_linear)))
-                else:
-                    logdens_offset = cond_cov.dot(opt_linear.T.dot(score_offset + opt_offset)) * prec
-                    logdens_linear = cond_cov.dot(opt_linear.T.dot(score_linear)) * prec
-
-                def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
-                    mean_term = logdens_linear.dot(score.T).T + logdens_offset
+                def log_density(logdens_linear, offset, cond_prec, score, opt):
+                    if score.ndim == 1:
+                        mean_term = logdens_linear.dot(score.T + offset).T
+                    else:
+                        mean_term = logdens_linear.dot(score.T + offset[:, None]).T
                     arg = opt + mean_term
                     return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-                log_density = functools.partial(log_density, logdens_offset, logdens_linear, cond_precision)
+                log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
 
                 # now make the constraints
 
@@ -382,10 +371,9 @@ def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
                                          mean=cond_mean,
                                          covariance=cond_cov)
 
-                logdens_transform = (logdens_linear, logdens_offset)
                 self._sampler = affine_gaussian_sampler(affine_con,
                                                         self.observed_opt_state,
-                                                        self.observed_internal_state,
+                                                        self.observed_score_state,
                                                         log_density,
                                                         selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
 
@@ -467,12 +455,12 @@ def new_grad_log_density(query,
                                      margin_inactive,
                                      _cdf,
                                      _pdf,
-                                     opt_linear,
+                                     new_opt_transform,
                                      deriv_log_dens,
-                                     internal_state, 
+                                     score_state, 
                                      opt_state):
 
-                full_state = reconstruct_full_from_internal(new_opt_transform, query.score_transform, internal_state, opt_state)
+                full_state = score_state + reconstruct_opt(new_opt_transform, opt_state)
 
                 p = query.penalty.shape[0]
                 weights = np.zeros(p)
@@ -490,7 +478,7 @@ def new_grad_log_density(query,
                                                      margin_inactive,
                                                      self.randomization._cdf,
                                                      self.randomization._pdf,
-                                                     new_opt_transform[0],
+                                                     new_opt_transform,
                                                      self.randomization._derivative_log_density)
 
             def new_log_density(query, 
@@ -498,15 +486,13 @@ def new_log_density(query,
                                 margin_inactive,
                                 _cdf,
                                 _pdf,
-                                opt_linear,
+                                new_opt_transform,
                                 log_dens,
-                                internal_state, 
+                                score_state,
                                 opt_state):
 
-                full_state = reconstruct_full_from_internal(new_opt_transform,
-                                                            query.score_transform,
-                                                            internal_state,
-                                                            opt_state)
+                full_state = score_state + reconstruct_opt(new_opt_transform, opt_state)
+
                 full_state = np.atleast_2d(full_state)
                 p = query.penalty.shape[0]
                 logdens = np.zeros(full_state.shape[0])
@@ -526,7 +512,7 @@ def new_log_density(query,
                                                 margin_inactive,
                                                 self.randomization._cdf,
                                                 self.randomization._pdf,
-                                                self.opt_transform[0],
+                                                new_opt_transform,
                                                 self.randomization._log_density)
 
             new_lagrange = self.penalty.weights[moving_inactive]
@@ -548,7 +534,7 @@ def new_projection(dual,
             new_selection_variable['subgradient'] = self.observed_opt_state[condition_inactive]
 
             self.sampler = langevin_sampler(observed_opt_state,
-                                            self.observed_internal_state.copy(),
+                                            self.observed_score_state,
                                             self.score_transform,
                                             new_opt_transform,
                                             new_projection,
@@ -558,42 +544,27 @@ def new_projection(dual,
         else:
 
             cov, prec = self.randomization.cov_prec
-            if len(np.asarray(prec)) == 2:
+            prec_array = len(np.asarray(prec).shape) == 2
+
+            if prec_array:
                 cond_precision = new_linear.T.dot(prec.dot(new_linear))
+                logdens_linear = cond_cov.dot(new_linear.T.dot(prec))
             else:
                 cond_precision = new_linear.T.dot(new_linear) * prec
-            score_linear, score_offset = self.score_transform
+                logdens_linear = cond_cov.dot(new_linear.T) * prec
 
             cond_cov = np.linalg.inv(cond_precision)
+            cond_mean = -logdens_linear.dot(self.observed_score_state + new_offset)
 
-            offset = reconstruct_full_from_internal(new_opt_transform, 
-                                                    self.score_transform, 
-                                                    self.observed_internal_state, 
-                                                    np.zeros(new_linear.shape[1]))
-
-            if len(np.asarray(prec)) == 2:
-                cond_mean = -cond_cov.dot(new_linear.T.dot(prec.dot(offset)))
-            else:
-                cond_mean = -cond_cov.dot(new_linear.T.dot(offset)) * prec
-
-            # need a log_density function
-            # the conditional density of opt variables
-            # given the score
-
-            if len(np.asarray(prec)) == 2:
-                logdens_offset = cond_cov.dot(new_linear.T.dot(prec.dot(
-                            score_offset + opt_offset)))
-                logdens_linear = cond_cov.dot(new_linear.T.dot(prec.dot(score_linear)))
-            else:
-                logdens_offset = cond_cov.dot(new_linear.T.dot(
-                            score_offset + opt_offset)) * prec
-                logdens_linear = cond_cov.dot(new_linear.T.dot(score_linear)) * prec
-
-            def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
-                mean_term = logdens_linear.dot(score.T).T + logdens_offset
+            def log_density(logdens_linear, offset, cond_prec, score, opt):
+                if score.ndim == 1:
+                    mean_term = logdens_linear.dot(score.T + offset).T
+                else:
+                    mean_term = logdens_linear.dot(score.T + offset[:, None]).T
                 arg = opt + mean_term
                 return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-            log_density = functools.partial(log_density, logdens_offset, logdens_linear, cond_precision)
+
+            log_density = functools.partial(log_density, logdens_linear, new_offset, cond_precision)
 
             # now make the constraints
 
@@ -622,12 +593,10 @@ def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
                                      mean=cond_mean,
                                      covariance=cond_cov)
 
-            logdens_transform = (logdens_linear, logdens_offset)
             self._sampler = affine_gaussian_sampler(affine_con,
                                                     observed_opt_state,
-                                                    self.observed_internal_state,
+                                                    self.observed_score_state,
                                                     log_density,
-                                                    logdens_transform,
                                                     selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
 
 
@@ -912,7 +881,7 @@ def gaussian(X,
         $$
 
         where $\lambda$ is `feature_weights`. The ridge term
-        is determined by the Hessian and `np.std(Y)` by default,
+        is determined by the Hessian and `np.std(Y)` (scaled by $\sqrt{n/(n-1)}$) by default,
         as is the randomizer scale.
 
         Parameters
@@ -961,10 +930,10 @@ def gaussian(X,
 
         mean_diag = np.mean((X**2).sum(0))
         if ridge_term is None:
-            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)
+            ridge_term = (np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
         return lasso(loglike, np.asarray(feature_weights) / sigma**2,
                      ridge_term, randomizer_scale, randomizer=randomizer,
@@ -1041,7 +1010,7 @@ def logistic(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)
+            ridge_term = (np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 
@@ -1126,10 +1095,10 @@ def coxph(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n)
+            ridge_term = (np.std(times) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
         return lasso(loglike, 
                      feature_weights, 
@@ -1204,10 +1173,10 @@ def poisson(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n)
+            ridge_term = (np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
 
         return lasso(loglike, 
                      feature_weights, 
@@ -1304,8 +1273,8 @@ def sqrt_lasso(X,
         # scale for randomization seems kind of meaningless here...
 
         mean_diag = np.mean((X**2).sum(0))
-        ridge_term = np.std(Y)**2 * mean_diag / np.sqrt(n)
-        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+        ridge_term = (np.std(Y)**2 * mean_diag / np.sqrt(n)) * n / (n - 1.)
+        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
         if np.asarray(feature_weights).shape == ():
             feature_weights = np.ones(p) * feature_weights
@@ -1456,7 +1425,7 @@ def __init__(self,
             feature_weights = np.ones(loglike.shape) * feature_weights
         self.feature_weights = np.asarray(feature_weights)
 
-        self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
+        self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
         self.ridge_term = ridge_term
         self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
 
@@ -1484,21 +1453,22 @@ def fit(self,
         p = self.nfeature
 
         if perturb is None:
-            self._initial_omega = perturb = self.randomizer.sample()
-        quad = rr.identity_quadratic(self.epsilon, 0, -perturb)
-        problem = rr.simple_problem(self.loss, self.penalty)
-        self.initial_soln = rr.solve(quad)
+            perturb = self.randomizer.sample()
+        self._initial_omega = perturb
+        quad = rr.identity_quadratic(self.ridge_term, 0, -perturb)
+        problem = rr.simple_problem(self.loglike, self.penalty)
+        self.initial_soln = problem.solve(quad)
 
         active_signs = np.sign(self.initial_soln)
         active = self._active = active_signs != 0
 
-        self._lagrange = penalty.weights
+        self._lagrange = self.penalty.weights
         unpenalized = self._lagrange == 0
 
         active *= ~unpenalized
 
-        self._overall = (active + unpenalized) > 0
-        self._inactive = ~self._overall
+        self._overall = overall = (active + unpenalized) > 0
+        self._inactive = inactive = ~self._overall
         self._unpenalized = unpenalized
 
         _active_signs = active_signs.copy()
@@ -1508,7 +1478,7 @@ def fit(self,
 
         # initial state for opt variables
 
-        initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + 
+        initial_subgrad = -(self.loglike.smooth_objective(self.initial_soln, 'grad') + 
                             quad.objective(self.initial_soln, 'grad')) 
         self.initial_subgrad = initial_subgrad
 
@@ -1518,7 +1488,7 @@ def fit(self,
         self.observed_opt_state = np.concatenate([initial_scalings,
                                                   initial_unpenalized])
 
-        _beta_unpenalized = restricted_estimator(loss, overall, solve_args=solve_args)
+        _beta_unpenalized = restricted_estimator(self.loglike, self._overall, solve_args=solve_args)
 
         beta_bar = np.zeros(p)
         beta_bar[overall] = _beta_unpenalized
@@ -1527,7 +1497,7 @@ def fit(self,
         # observed state for score in internal coordinates
 
         self.observed_internal_state = np.hstack([_beta_unpenalized,
-                                                  -loss.smooth_objective(beta_bar, 'grad')[inactive]])
+                                                  -self.loglike.smooth_objective(beta_bar, 'grad')[inactive]])
 
         # form linear part
 
@@ -1538,18 +1508,23 @@ def fit(self,
         # U for unpenalized
         # -E for inactive
 
-        _opt_linear_term = np.zeros((p, overall.sum()))
-        _score_linear_term = np.zeros((p, overall.sum()))
+        _opt_linear_term = np.zeros((p, self.num_opt_var))
+        _score_linear_term = np.zeros((p, self.num_opt_var))
 
         # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
 
-        X, y = loss.data
-        W = self.loss.saturated_loss.hessian(X.dot(beta_bar))
+        X, y = self.loglike.data
+        W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar))
         _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
         _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
 
         _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen])
 
+        # set the observed score (data dependent) state
+
+        self.observed_score_state = _score_linear_term.dot(_beta_unpenalized)
+        self.observed_score_state[inactive] += self.loglike.smooth_objective(beta_bar, 'grad')[inactive]
+
         def signed_basis_vector(p, j, s):
             v = np.zeros(p)
             v[j] = s
@@ -1561,16 +1536,16 @@ def signed_basis_vector(p, j, s):
         if np.sum(active) == 0:
             _opt_hessian = 0
         else:
-            _opt_hessian = _hessian_active * active_signs[None, active] + epsilon * active_directions
+            _opt_hessian = _hessian_active * active_signs[None, active] + self.ridge_term * active_directions
         _opt_linear_term[:, scaling_slice] = _opt_hessian
 
         # beta_U piece
 
-        unpenalized_slice = slice(active.sum(), overall.sum())
+        unpenalized_slice = slice(active.sum(), self.num_opt_var)
         unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
         if unpenalized.sum():
             _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
-                                                      + epsilon * unpenalized_directions) 
+                                                      + self.ridge_term * unpenalized_directions) 
 
         # two transforms that encode score and optimization
         # variable roles 
@@ -1585,74 +1560,54 @@ def signed_basis_vector(p, j, s):
         self._setup = True
         self.scaling_slice = scaling_slice
         self.unpenalized_slice = unpenalized_slice
-        self.ndim = loss.shape[0]
+        self.ndim = self.loglike.shape[0]
 
         # compute implied mean and covariance
 
-        cov, prec = self.randomization.cov_prec
+        cov, prec = self.randomizer.cov_prec
         opt_linear, opt_offset = self.opt_transform
-        score_linear, score_offset = self.score_transform
+
         cond_precision = opt_linear.T.dot(opt_linear) * prec
         cond_cov = np.linalg.inv(cond_precision)
+        logdens_linear = cond_cov.dot(opt_linear.T) * prec
+        cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
 
-        offset = reconstruct_full_from_internal(self.opt_transform, 
-                                                self.score_transform, 
-                                                self.observed_internal_state, 
-                                                np.zeros(opt_linear.shape[1]))
-        cond_mean = -cond_cov.dot(opt_linear.T.dot(offset)) * prec
-
-        # need a log_density function
-        # the conditional density of opt variables
-        # given the score
-
-        logdens_offset = cond_cov.dot(opt_linear.T.dot(prec.dot(score_offset + opt_offset)))
-        logdens_linear = cond_cov.dot(opt_linear.T.dot(prec.dot(score_linear)))
-
-        def log_density(logdens_offset, logdens_linear, cond_prec, score, opt):
-            mean_term = logdens_linear.dot(score.T).T + logdens_offset
+        def log_density(logdens_linear, offset, cond_prec, score, opt):
+            if score.ndim == 1:
+                mean_term = logdens_linear.dot(score.T + offset).T
+            else:
+                mean_term = logdens_linear.dot(score.T + offset[:, None]).T
             arg = opt + mean_term
             return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-        log_density = functools.partial(log_density, logdens_offset, logdens_linear, cond_precision)
+        log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
 
         # now make the constraints
 
-        # scaling constraints
-
-        I = np.identity(cond_cov.shape[0])
-        A_scaling = -I[self.scaling_slice]
-        b_scaling = np.zeros(A_scaling.shape[0])
-
-        A_subgrad = np.vstack([I[self.subgrad_slice],
-                               -I[self.subgrad_slice]])
-        b_subgrad = np.hstack([inactive_lagrange,
-                               inactive_lagrange])
+        A_scaling = -np.identity(self.num_opt_var)
+        b_scaling = np.zeros(self.num_opt_var)
 
-        linear_term = np.vstack([A_scaling, A_subgrad])
-        offset = np.hstack([b_scaling, b_subgrad])
-
-        affine_con = constraints(linear_term,
-                                 offset,
+        affine_con = constraints(A_scaling,
+                                 b_scaling,
                                  mean=cond_mean,
                                  covariance=cond_cov)
 
-        logdens_transform = (logdens_linear, logdens_offset)
-        self._sampler = affine_gaussian_sampler(affine_con,
-                                                self.observed_opt_state,
-                                                self.observed_internal_state,
-                                                log_density,
-                                                selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
-
-
+        self.sampler = affine_gaussian_sampler(affine_con,
+                                               self.observed_opt_state,
+                                               self.observed_score_state,
+                                               log_density,
+                                               selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
+        
+        return active_signs
 
     def summary(self,
-                selected_features,
+                target="selected",
+                features=None,
                 parameter=None,
                 level=0.9,
                 ndraw=10000, 
                 burnin=2000,
                 compute_intervals=False,
-                bootstrap_sampler=False,
-                subset=None):
+                dispersion=None):
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
@@ -1660,7 +1615,9 @@ def summary(self,
         Parameters
         ----------
 
-        selected_features : np.bool
+        target : one of ['selected', 'full']
+
+        features : np.bool
             Binary encoding of which features to use in final
             model and targets.
 
@@ -1676,56 +1633,142 @@ def summary(self,
         burnin : int (optional)
             Defaults to 1000.
 
-        bootstrap : bool
-            Use wild bootstrap instead of Gaussian plugin.
+        compute_intervals : bool
+            Compute confidence intervals?
+
+        dispersion : float (optional)
+            Use a known value for dispersion, or Pearson's X^2?
 
         """
-        if not hasattr(self, "_view"):
-            raise ValueError('run `fit` method before producing summary.')
 
         if parameter is None:
             parameter = np.zeros(self.loglike.shape[0])
 
-        if np.asarray(selected_features).dtype != np.bool:
-            raise ValueError('selected_features should be a boolean array')
-
-        unpenalized_mle = restricted_estimator(self.loglike, selected_features)
-
-        target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
-            form_covariances = glm_parametric_covariance(self.loglike)
-
-        opt_samplers = []
-        for q in [self._view]:
-            cov_info = q.setup_sampler()
-            if self.parametric_cov_estimator == False:
-                target_cov, score_cov = form_covariances(target_info,  
-                                                         cross_terms=[cov_info],
-                                                         nsample=q.nboot)
+        if target == 'selected':
+            observed_target, cov_target, cov_target_score, alternative = self.selected_targets(features=features, dispersion=dispersion)
+        elif target == 'full':
+            X, y = self.loglike.data
+            n, p = X.shape
+            if n > p:
+                observed_target, cov_target, cov_target_score, alternative = self.full_targets(features=features, dispersion=dispersion)
             else:
-                target_cov, score_cov = form_covariances(target_info,  
-                                                         cross_terms=[cov_info])
-            opt_samplers.append(q.sampler)
+                observed_target, cov_target, cov_target_score, alternative = self.debiased_targets(features=features, dispersion=dispersion)
 
-        opt_samples = [opt_sampler.sample(ndraw,
-                                          burnin) for opt_sampler in opt_samplers]
+        opt_sample = self.sampler.sample(ndraw,  burnin)
 
-        if subset is not None:
-            target_cov = target_cov[subset][:,subset]
-            score_cov = score_cov[subset]
-            unpenalized_mle = unpenalized_mle[subset]
-
-        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter, sample=opt_samples[0])
+        pivots = self.sampler.coefficient_pvalues(observed_target, 
+                                                  cov_target, 
+                                                  cov_target_score, 
+                                                  parameter=parameter, 
+                                                  sample=opt_sample, 
+                                                  alternative=alternative)
         if not np.all(parameter == 0):
-            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=np.zeros_like(parameter), sample=opt_samples[0])
+            pvalues = self.sampler.coefficient_pvalues(observed_target, 
+                                                       cov_target, 
+                                                       cov_target_score, 
+                                                       parameter=np.zeros_like(parameter), 
+                                                       sample=opt_sample, 
+                                                       alternative=alternative)
         else:
             pvalues = pivots
 
         intervals = None
         if compute_intervals:
-            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_samples[0])
+            intervals = self.sampler.confidence_intervals(observed_target, 
+                                                          cov_target, 
+                                                          cov_target_interval, 
+                                                          sample=opt_sample)
 
         return pivots, pvalues, intervals
 
+    # Targets of inference
+    # and covariance with score representation
+
+    def selected_targets(self, features=None, dispersion=None):
+
+        X, y = self.loglike.data
+        n, p = X.shape
+
+        if features is None:
+            active = self._active
+            unpenalized = self._unpenalized
+            noverall = active.sum() + unpenalized.sum()
+            overall = active + unpenalized
+
+            score_linear = self.score_transform[0]
+            Q = -score_linear[overall]
+            cov_target = np.linalg.inv(Q)
+            observed_target = self._beta_full[overall]
+            crosscov_target_score = score_linear.dot(cov_target)
+            Xfeat = X[:,overall]
+            alternative = [{1:'greater', -1:'less'}[int(s)] for s in self.selection_variable['sign'][active]] + ['two-sided'] * unpenalized.sum()
+
+        else:
+
+            features_b = np.zeros_like(self._overall)
+            features_b[features] = True
+            features = features_b
+
+            Xfeat = X[:,features]
+            Qfeat = Xfeat.T.dot(self._W[:, None] * Xfeat)
+            Gfeat = self.loglike.smooth_objective(self.initial_soln, 'grad')[features]
+            Qfeat_inv = np.linalg.inv(Qfeat)
+            one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat)
+            cov_target = Qfeat_inv
+            _score_linear = -Xfeat.T.dot(self._W[:, None] * X).T
+            crosscov_target_score = _score_linear.dot(cov_target)
+            observed_target = one_step
+            alternative = ['two-sided'] * overall.sum()
+            for i, f in enumerate(np.nonzero(features)[0]):
+                if active[f]:
+                    alternative[i] = {1:'greater', -1:'less'}[int(self.selection_variable['sign'][f])]
+
+        if dispersion is None: # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(observed_target)))**2 / self._W).sum() / (n - Xfeat.shape[1])
+
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion
+
+    def full_targets(self, features=None, dispersion=None):
+
+        if features is None:
+            features = self._overall
+        features_b = np.zeros_like(self._overall)
+        features_b[features] = True
+        features = features_b
+
+        X, y = self.loglike.data
+        n, p = X.shape
+
+        # target is one-step estimator
+
+        Qfull = X.T.dot(self._W[:, None] * X)
+        G = self.loglike.smooth_objective(self.initial_soln, 'grad')
+        Qfull_inv = np.linalg.inv(Qfull)
+        one_step = self.initial_soln - Qfull_inv.dot(G)
+        cov_target = Qfull_inv[features][:,features]
+        observed_target = one_step[features]
+        crosscov_target_score = np.zeros((p, cov_target.shape[0]))
+        crosscov_target_score[features] = -np.identity(cov_target.shape[0])
+
+        if dispersion is None: # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step)))**2 / self._W).sum() / (n - p)
+
+        alternative = ['two-sided'] * features.sum()
+
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion
+
+    def debiased_targets(self, dispersion=None):
+        
+        raise NotImplementedError
+
+        if not hasattr(self, "_debiased_targets"):
+            X, y = self.loglike.data
+            n, p = X.shape
+
+            self._debiased_targets = observed_target, cov_target, crosscov_target_score
+
+        return self._debiased_targets
+
     @staticmethod
     def gaussian(X, 
                  Y, 
@@ -1794,10 +1837,10 @@ def gaussian(X,
 
         mean_diag = np.mean((X**2).sum(0))
         if ridge_term is None:
-            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)
+            ridge_term = (np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
         return highdim(loglike, np.asarray(feature_weights) / sigma**2,
                      ridge_term, randomizer_scale)
@@ -1874,7 +1917,7 @@ def logistic(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)
+            ridge_term = (np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 
@@ -1956,11 +1999,11 @@ def coxph(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n)
+            ridge_term = (np.std(times) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y)
-
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
+ 
         return lasso(loglike, 
                      feature_weights, 
                      ridge_term,
@@ -2032,10 +2075,10 @@ def poisson(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n)
+            ridge_term = (np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts)
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
 
         return lasso(loglike, 
                      feature_weights, 
@@ -2125,4 +2168,3 @@ def sqrt_lasso(X,
 
         raise NotImplementedError
 
-
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 82ae79d67..48b660c4c 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -9,7 +9,6 @@
 from ..distributions.api import discrete_family
 from ..sampling.langevin import projected_langevin
 from ..constraints.affine import sample_from_constraints
-from .reconstruction import reconstruct_full_from_internal
 
 class query(object):
 
@@ -47,18 +46,9 @@ def linear_decomposition(self, target_score_cov, target_cov, observed_target_sta
         observed_target_state = np.atleast_1d(observed_target_state)
 
         linear_part = target_score_cov.T.dot(np.linalg.pinv(target_cov))
+        offset = self.observed_score_state - linear_part.dot(observed_target_state) + score_offset
 
-        offset = self.observed_internal_state - linear_part.dot(observed_target_state)
-
-        # now compute the composition of this map with
-        # self.score_transform
-
-        score_linear, score_offset = self.score_transform
-        composition_linear_part = score_linear.dot(linear_part)
-
-        composition_offset = score_linear.dot(offset) + score_offset
-
-        return (composition_linear_part, composition_offset)
+        return (linear_part, offset)
 
     def get_sampler(self):
         if hasattr(self, "_sampler"):
@@ -80,11 +70,10 @@ def setup_sampler(self):
         Setup query to prepare for sampling.
         Should set a few key attributes:
 
-            - observed_internal_state
+            - observed_score_state
             - num_opt_var
             - observed_opt_state
             - opt_transform
-            - score_transform
 
         """
         raise NotImplementedError('abstract method -- only keyword arguments')
@@ -340,7 +329,7 @@ class langevin_sampler(optimization_sampler):
 
     def __init__(self,
                  observed_opt_state,
-                 observed_internal_state,
+                 observed_score_state,
                  score_transform,
                  opt_transform,
                  projection,
@@ -360,11 +349,11 @@ def __init__(self,
         '''
 
         self.observed_opt_state = observed_opt_state.copy()
-        self.observed_internal_state = observed_internal_state.copy()
+        self.observed_score_state = observed_score_state.copy()
         self.score_linear, self.score_offset = score_transform
         self.opt_linear, self.opt_offset = opt_transform
         self.projection = projection
-        self.gradient = lambda opt: - grad_log_density(self.observed_internal_state, opt)
+        self.gradient = lambda opt: - grad_log_density(self.observed_score_state, opt)
         self.log_density = log_density
         self.selection_info = selection_info # a way to record what view and what was conditioned on -- not used in calculations
 
@@ -438,9 +427,8 @@ class affine_gaussian_sampler(optimization_sampler):
     def __init__(self,
                  affine_con,
                  initial_point,
-                 observed_internal_state,
+                 observed_score_state,
                  log_density,
-                 logdens_transform,
                  selection_info=None):
 
         '''
@@ -456,10 +444,9 @@ def __init__(self,
 
         self.affine_con = affine_con
         self.initial_point = initial_point
-        self.observed_internal_state = observed_internal_state
+        self.observed_score_state = observed_score_state
         self.selection_info = selection_info
         self.log_density = log_density
-        self.logdens_transform = logdens_transform
 
     def sample(self, ndraw, burnin):
         '''
@@ -513,7 +500,7 @@ def __init__(self,
         self.opt_sampling_info = tiled_sampling_info
         self._logden = 0
         for opt_sampler, opt_sample, _, _ in opt_sampling_info:
-            self._logden += opt_sampler.log_density(opt_sampler.observed_internal_state, opt_sample)
+            self._logden += opt_sampler.log_density(opt_sampler.observed_score_state, opt_sample)
 
         self.observed = observed.copy() # this is our observed unpenalized estimator
 
@@ -552,12 +539,11 @@ def pivot(self,
         for opt_sampler, opt_sample, _, score_cov in self.opt_sampling_info:
             cur_score_cov = linear_func.dot(score_cov)
 
-            # cur_nuisance is in the view's internal coordinates
-            cur_nuisance = opt_sampler.observed_internal_state - cur_score_cov * observed_stat / target_cov
+            # cur_nuisance is in the view's score coordinates
+            cur_nuisance = opt_sampler.observed_score_state - cur_score_cov * observed_stat / target_cov
             nuisance.append(cur_nuisance)
             translate_dirs.append(cur_score_cov / target_cov)
 
-
         weights = self._weights(sample_stat + candidate,  # normal sample under candidate
                                 nuisance,                 # nuisance sufficient stats for each view
                                 translate_dirs)               # points will be moved like sample * score_cov
@@ -605,7 +591,7 @@ def _weights(self,
         # for each projected (through linear_func) normal sample
         # using the linear decomposition
 
-        # We need access to the map that takes observed_internal for each view
+        # We need access to the map that takes observed_score for each view
         # and constructs the full randomization -- this is the reconstruction map
         # for each view
 
@@ -616,12 +602,12 @@ def _weights(self,
 
         # In this function, \hat{\theta}_i will change with the Monte Carlo sample
 
-        internal_sample = []
+        score_sample = []
         _lognum = 0
         for i, opt_info in enumerate(self.opt_sampling_info):
             opt_sampler, opt_sample = opt_info[:2]
-            internal_sample = np.multiply.outer(sample_stat, translate_dirs[i]) + nuisance[i][None, :] # these are now internal coordinates
-            _lognum += opt_sampler.log_density(internal_sample, opt_sample)
+            score_sample = np.multiply.outer(sample_stat, translate_dirs[i]) + nuisance[i][None, :] # these are now score coordinates
+            _lognum += opt_sampler.log_density(score_sample, opt_sample)
 
         _logratio = _lognum - self._logden
         _logratio -= _logratio.max()
diff --git a/selection/randomized/reconstruction.py b/selection/randomized/reconstruction.py
index 9e790395d..10f7a776b 100644
--- a/selection/randomized/reconstruction.py
+++ b/selection/randomized/reconstruction.py
@@ -63,7 +63,7 @@ def reconstruct_full_from_internal(opt_transform, score_transform, internal_stat
     Reconstruct original randomization state from internal state data
     and optimization state.
     """
-    randomization_internal = reconstruct_score(score_transform, internal_state)
+    randomization_score = reconstruct_score(score_transform, internal_state)
     randomization_opt = reconstruct_opt(opt_transform, opt_state)
-    return randomization_internal + randomization_opt
+    return randomization_score + randomization_opt
 
diff --git a/selection/randomized/target.py b/selection/randomized/target.py
deleted file mode 100644
index 776e9fcf3..000000000
--- a/selection/randomized/target.py
+++ /dev/null
@@ -1,649 +0,0 @@
-from itertools import product
-import numpy as np
-
-from regreg.affine import power_L
-
-from ..distributions.api import discrete_family, intervals_from_sample
-from ..sampling.langevin import projected_langevin
-from .reconstruction import reconstruct_full_from_data, reconstruct_internal
-
-class targeted_sampler(object):
-
-    '''
-    Object to sample from target of a selective sampler.
-    '''
-
-    def __init__(self,
-                 multi_view,
-                 target_info,
-                 observed_target_state,
-                 form_covariances,
-                 reference=None,
-                 target_set=None,
-                 parametric=False):
-
-        '''
-        Parameters
-        ----------
-
-        multi_view : `multiple_queries`
-           Instance of `multiple_queries`. Attributes
-           `objectives`, `score_info` are key
-           attributed. (Should maybe change constructor
-           to reflect only what is needed.)
-
-        target_info : object
-           Passed as first argument to `self.form_covariances`.
-
-        observed_target_state : np.float
-           Observed value of the target estimator.
-
-        form_covariances : callable
-           Used in linear decomposition of each score
-           and the target.
-
-        reference : np.float (optional)
-           Reference parameter for Gaussian approximation
-           of target.
-
-        target_set : sequence (optional)
-           Which coordinates of target are really
-           of interest. If not None, then coordinates
-           not in target_set are assumed to have 0
-           mean in the sampler.
-
-        parametric : bool
-           Use parametric covariance estimate?
-
-        Notes
-        -----
-        The callable `form_covariances`
-        should accept `target_info` as first argument
-        and a keyword argument `cross_terms` which
-        correspond to the `score_info` of each
-        objective of `multi_view`. This used in
-        a linear decomposition of each score into
-        a piece correlated with `target` and
-        an independent piece.
-        The independent piece is treated as a
-        nuisance parameter and conditioned on
-        (i.e. is fixed within the sampler).
-        '''
-
-        # sampler will draw samples for bootstrap
-        # these are arguments to target_info and score_bootstrap
-        # nonparamteric bootstrap is np.random.choice(n, size=(n,), replace=True)
-        # residual bootstrap might be X_E.dot(\bar{\beta}_E)
-        # + np.random.choice(resid, size=(n,), replace=True)
-
-        # if target_set is not None, we assume that
-        # these coordinates (specified by a list of coordinates) of target
-        # is assumed to be independent of the rest
-        # the corresponding block of `target_cov` is zeroed out
-
-        # we need these attributes of multi_view
-
-        self.nqueries = len(multi_view.objectives)
-        self.opt_slice = multi_view.opt_slice
-        self.objectives = multi_view.objectives
-
-        self.observed_target_state = observed_target_state
-        self.shape = observed_target_state.shape
-
-        self.total_randomization_length = multi_view.total_randomization_length
-        self.randomization_slice = multi_view.randomization_slice
-
-        self.score_cov = []
-        target_cov_sum = 0
-        for i in range(self.nqueries):
-            if parametric == False:
-                target_cov, cross_cov = multi_view.form_covariances(target_info,  
-                                                                    cross_terms=[multi_view.score_info[i]],
-                                                                    nsample=multi_view.nboot[i])
-            else:
-                target_cov, cross_cov = multi_view.form_covariances(target_info, 
-                                                                    cross_terms=[multi_view.score_info[i]])
-
-            target_cov_sum += target_cov
-            self.score_cov.append(cross_cov)
-
-        self.target_cov = target_cov_sum / self.nqueries
-
-        # XXX we're not really using this target_set in our tests
-
-        # zero out some coordinates of target_cov
-        # to enforce independence of target and null statistics
-
-        if target_set is not None:
-            null_set = set(range(self.target_cov.shape[0])).difference(target_set)
-            for t, n in product(target_set, null_set):
-                self.target_cov[t, n] = 0.
-                self.target_cov[n, t] = 0.
-
-        self.target_transform = []
-
-        for i in range(self.nqueries):
-            self.target_transform.append(
-                self.objectives[i].linear_decomposition(self.score_cov[i],
-                                                        self.target_cov,
-                                                        self.observed_target_state))
-
-        self.target_cov = np.atleast_2d(self.target_cov)
-        self.target_inv_cov = np.linalg.inv(self.target_cov)
-
-        # size of reference? should it only be target_set?
-
-        if reference is None:
-            reference = np.zeros(self.target_inv_cov.shape[0])
-        self.reference = reference
-
-        # need to vectorize the state for Langevin
-
-        self.overall_opt_slice = slice(0, multi_view.num_opt_var)
-        self.target_slice = slice(multi_view.num_opt_var,
-                                  multi_view.num_opt_var + self._reference_inv.shape[0])
-        self.keep_slice = self.target_slice
-
-        # set the observed state
-
-        self.observed_state = np.zeros(multi_view.num_opt_var + self._reference_inv.shape[0])
-        self.observed_state[self.target_slice] = self.observed_target_state
-        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
-
-        # added for the reconstruction map in case we marginalize over optimization variables
-
-        randomization_length_total = 0
-        self.randomization_slice = []
-        for i in range(self.nqueries):
-            self.randomization_slice.append(
-                slice(randomization_length_total, randomization_length_total + self.objectives[i].ndim))
-            randomization_length_total += self.objectives[i].ndim
-
-        self.randomization_length_total = randomization_length_total
-
-    def set_reference(self, reference):
-        self._reference = np.atleast_1d(reference)
-        self._reference_inv = self.target_inv_cov.dot(self.reference).flatten()
-
-    def get_reference(self):
-        return self._reference
-
-    reference = property(get_reference, set_reference)
-
-    def projection(self, state):
-        '''
-        Projection map of projected Langevin sampler.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Typically, the projection will only act on
-           `opt_vars`.
-        Returns
-        -------
-        projected_state : np.float
-        '''
-
-        opt_state = state[self.overall_opt_slice]
-        new_opt_state = np.zeros_like(opt_state)
-        for i in range(self.nqueries):
-            new_opt_state[self.opt_slice[i]] = self.objectives[i].projection(opt_state[self.opt_slice[i]])
-        state[self.overall_opt_slice] = new_opt_state
-        return state
-
-    def gradient(self, state):
-        '''
-        Gradient of log-density at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        target_state, opt_state = state[self.target_slice], state[self.overall_opt_slice]
-        target_grad, opt_grad = np.zeros_like(target_state), np.zeros_like(opt_state)
-        full_grad = np.zeros_like(state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        for i in range(self.nqueries):
-
-            randomization_state = reconstruct_full_from_data(self.objectives[i].opt_transform,
-                                                             self.objectives[i].score_transform,
-                                                             target_state, 
-                                                             self.target_transform[i], 
-                                                             opt_state[self.opt_slice[i]])
-
-            internal_state = reconstruct_internal(target_state, self.target_transform[i])
-            grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]]) 
-            target_linear, target_offset = self.target_transform[i]
-            opt_linear, opt_offset = self.objectives[i].opt_transform
-            if target_linear is not None:
-                target_grad += target_linear.T.dot(grad)
-            if opt_linear is not None:
-                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
-
-        target_grad = -target_grad
-        target_grad += self._reference_inv - self.target_inv_cov.dot(target_state)
-        full_grad[self.target_slice] = target_grad
-        full_grad[self.overall_opt_slice] = -opt_grad
-
-        return full_grad
-
-
-    def sample(self, ndraw, burnin, stepsize=None, keep_opt=False):
-        '''
-        Sample `target` from selective density
-        using projected Langevin sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-
-        Parameters
-        ----------
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        keep_opt : bool
-           Should we return optimization variables
-           as well as the target?
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if stepsize is None:
-            stepsize = 1. / self.crude_lipschitz()
-
-        if keep_opt:
-            keep_slice = slice(None, None, None)
-        else:
-            keep_slice = self.keep_slice
-
-        target_langevin = projected_langevin(self.observed_state.copy(),
-                                             self.gradient,
-                                             self.projection,
-                                             stepsize)
-
-        samples = []
-
-        for i in range(ndraw + burnin):
-            target_langevin.next()
-            if (i >= burnin):
-                samples.append(target_langevin.state[keep_slice].copy())
-        return np.asarray(samples)
-
-    def hypothesis_test(self,
-                        test_stat,
-                        observed_value,
-                        ndraw=10000,
-                        burnin=2000,
-                        stepsize=None,
-                        sample=None,
-                        parameter=None,
-                        alternative='twosided'):
-
-        '''
-        Sample `target` from selective density
-        using projected Langevin sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-        Parameters
-        ----------
-        test_stat : callable
-           Test statistic to evaluate on sample from
-           selective distribution.
-        observed_value : float
-           Observed value of test statistic.
-           Used in p-value calculation.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc. If not None,
-           `ndraw, burnin, stepsize` are ignored.
-        parameter : np.float (optional)
-           If not None, defaults to `self.reference`.
-           Otherwise, sample is reweighted using Gaussian tilting.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        gradient : np.float
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        if parameter is None:
-            parameter = self.reference
-
-        sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
-
-
-        delta = self.target_inv_cov.dot(parameter - self.reference)
-        W = np.exp(sample.dot(delta))
-
-        family = discrete_family(sample_test_stat, W)
-        pval = family.cdf(0, observed_value)
-
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * min(pval, 1 - pval)
-
-    def confidence_intervals(self,
-                             observed,
-                             ndraw=10000,
-                             burnin=2000,
-                             stepsize=None,
-                             sample=None,
-                             level=0.9):
-        '''
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        level : float (optional)
-            Specify the
-            confidence level.
-        Notes
-        -----
-        Construct selective confidence intervals
-        for each parameter of the target.
-        Returns
-        -------
-        intervals : [(float, float)]
-            List of confidence intervals.
-        '''
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
-
-        return intervals_instance.confidence_intervals_all(level=level)
-
-    def coefficient_pvalues(self,
-                            observed,
-                            parameter=None,
-                            ndraw=10000,
-                            burnin=2000,
-                            stepsize=None,
-                            sample=None,
-                            alternative='twosided'):
-        '''
-        Construct selective p-values
-        for each parameter of the target.
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        parameter : np.float (optional)
-            A vector of parameters with shape `self.shape`
-            at which to evaluate p-values. Defaults
-            to `np.zeros(self.shape)`.
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        stepsize : float
-           Stepsize for Langevin sampler. Defaults
-           to a crude estimate based on the
-           dimension of the problem.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalues : np.float
-
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample = self.sample(ndraw, burnin, stepsize=stepsize)
-
-        if parameter is None:
-            parameter = np.zeros(self.shape)
-
-        nactive = observed.shape[0]
-        intervals_instance = intervals_from_sample(self.reference,
-                                                   sample,
-                                                   observed,
-                                                   self.target_cov)
-
-        pval = intervals_instance.pivots_all(parameter)
-
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * np.minimum(pval, 1 - pval)
-
-    def crude_lipschitz(self):
-        """
-        A crude Lipschitz constant for the
-        gradient of the log-density.
-        Returns
-        -------
-        lipschitz : float
-
-        """
-        lipschitz = power_L(self.target_inv_cov)
-        for transform, objective in zip(self.target_transform, self.objectives):
-            lipschitz += power_L(transform[0])**2 * objective.randomization.lipschitz
-            lipschitz += power_L(objective.score_transform[0])**2 * objective.randomization.lipschitz
-        return lipschitz
-
-
-    def reconstruct(self, state):
-        '''
-        Reconstruction of randomization at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be array with each row a state.
-        Returns
-        -------
-        reconstructed : np.float
-           Has shape of `opt_vars` with same number of rows
-           as `state`.
-
-        '''
-
-        state = np.atleast_2d(state)
-        if len(state.shape) > 2:
-            raise ValueError('expecting at most 2-dimensional array')
-
-        target_state, opt_state = state[:,self.target_slice], state[:,self.overall_opt_slice]
-        reconstructed = np.zeros((state.shape[0], self.total_randomization_length))
-
-        for i in range(self.nqueries):
-            reconstructed[:, self.randomization_slice[i]] = reconstruct_full_from_data(self.objectives[i].opt_transform,
-                                                                                       self.objectives[i].score_transform,
-                                                                                       target_state,
-                                                                                       self.target_transform[i],
-                                                                                       opt_state[:, self.opt_slice[i]])
-
-        return np.squeeze(reconstructed)
-
-    def log_density(self, state):
-        '''
-        Log of randomization density at current state.
-        Parameters
-        ----------
-        state : np.float
-           State of sampler made up of `(target, opt_vars)`.
-           Can be two-dimensional with each row a state.
-        Returns
-        -------
-        density : np.float
-            Has number of rows as `state` if 2-dimensional.
-        '''
-
-        reconstructed = self.reconstruct(state)
-        value = np.zeros(reconstructed.shape[0])
-
-        for i in range(self.nqueries):
-            log_dens = self.objectives[i].randomization.log_density
-            value += log_dens(reconstructed[:,self.opt_slice[i]])
-        return np.squeeze(value)
-
-class bootstrapped_target_sampler(targeted_sampler):
-
-    # make one of these for each hypothesis test
-
-    def __init__(self,
-                 multi_view,
-                 target_info,
-                 observed_target_state,
-                 target_alpha,
-                 target_set=None,
-                 reference=None,
-                 boot_size=None):
-
-        # sampler will draw bootstrapped weights for the target
-
-        if boot_size is None:
-            boot_size = target_alpha.shape[1]
-
-        targeted_sampler.__init__(self, multi_view,
-                                  target_info,
-                                  observed_target_state,
-                                  target_set,
-                                  reference)
-        # for bootstrap
-
-        self.boot_size = boot_size
-        self.target_alpha = target_alpha
-        self.boot_transform = []
-
-        for i in range(self.nqueries):
-            composition_linear_part, composition_offset = self.objectives[i].linear_decomposition(self.score_cov[i],
-                                                                                                  self.target_cov,
-                                                                                                  self.observed_target_state)
-            boot_linear_part = np.dot(composition_linear_part, target_alpha)
-            boot_offset = composition_offset + np.dot(composition_linear_part, self.reference).flatten()
-            self.boot_transform.append((boot_linear_part, boot_offset))
-
-        # set the observed state for bootstrap
-
-        self.boot_slice = slice(multi_view.num_opt_var, multi_view.num_opt_var + self.boot_size)
-        self.observed_state = np.zeros(multi_view.num_opt_var + self.boot_size)
-        self.observed_state[self.boot_slice] = np.ones(self.boot_size)
-        self.observed_state[self.overall_opt_slice] = multi_view.observed_opt_state
-
-
-    def gradient(self, state):
-
-        boot_state, opt_state = state[self.boot_slice], state[self.overall_opt_slice]
-        boot_grad, opt_grad = np.zeros_like(boot_state), np.zeros_like(opt_state)
-        full_grad = np.zeros_like(state)
-
-        # randomization_gradient are gradients of a CONVEX function
-
-        for i in range(self.nqueries):
-
-            randomization_state = reconstruct_full_from_data(self.objectives[i].opt_transform,
-                                                             self.objectives[i].score_transform,
-                                                             boot_state, 
-                                                             self.boot_transform[i], 
-                                                             opt_state[self.opt_slice[i]])
-
-            internal_state = reconstruct_internal(boot_state, self.boot_transform[i])
-            grad = self.objectives[i].grad_log_density(internal_state, opt_state[self.opt_slice[i]])
-            boot_linear, boot_offset = self.boot_transform[i]
-            opt_linear, opt_offset = self.objectives[i].opt_transform
-            if boot_linear is not None:
-                boot_grad += boot_linear.T.dot(grad)
-            if opt_linear is not None:
-                opt_grad[self.opt_slice[i]] = opt_offset.T.dot(grad)
-
-        boot_grad = -boot_grad
-        boot_grad -= boot_state
-
-        full_grad[self.boot_slice] = boot_grad
-        full_grad[self.overall_opt_slice] = -opt_grad
-
-        return full_grad
-
-    def sample(self, ndraw, burnin, stepsize = None, keep_opt=False):
-        if stepsize is None:
-            stepsize = 1. / self.observed_state.shape[0]
-
-        bootstrap_langevin = projected_langevin(self.observed_state.copy(),
-                                                self.gradient,
-                                                self.projection,
-                                                stepsize)
-        if keep_opt:
-            boot_slice = slice(None, None, None)
-        else:
-            boot_slice = self.boot_slice
-
-        samples = []
-        for i in range(ndraw + burnin):
-            bootstrap_langevin.next()
-            if (i >= burnin):
-                samples.append(bootstrap_langevin.state[boot_slice].copy())
-        samples = np.asarray(samples)
-
-        if keep_opt:
-            target_samples = samples[:,self.boot_slice].dot(self.target_alpha.T) + self.reference[None, :]
-            opt_sample0 = samples[0,self.overall_opt_slice]
-            result = np.zeros((samples.shape[0], opt_sample0.shape[0] + target_samples.shape[1]))
-            result[:,self.overall_opt_slice] = samples[:,self.overall_opt_slice]
-            result[:,self.target_slice] = target_samples
-            return result
-        else:
-            target_samples = samples.dot(self.target_alpha.T) + self.reference[None, :]
-            return target_samples
-
-# test rebase
diff --git a/selection/randomized/tests/test_Mest.py b/selection/randomized/tests/test_Mest.py
index ae21c68f3..21b0dc8ae 100644
--- a/selection/randomized/tests/test_Mest.py
+++ b/selection/randomized/tests/test_Mest.py
@@ -15,7 +15,7 @@
 from ..glm import bootstrap_cov
 from ...distributions.discrete_family import discrete_family
 from ...sampling.langevin import projected_langevin
-from ..target import reconstruct_internal
+from ..reconstruct import reconstruct_internal
 
 @register_report(['pvalue', 'active'])
 @wait_for_return_value()
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
new file mode 100644
index 000000000..53730b2cb
--- /dev/null
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -0,0 +1,143 @@
+import numpy as np
+import nose.tools as nt
+import rpy2.robjects as rpy
+from rpy2.robjects import numpy2ri
+rpy.r('library(selectiveInference)')
+
+import selection.randomized.lasso as L; reload(L)
+from selection.randomized.lasso import highdim 
+from selection.tests.instance import gaussian_instance
+import matplotlib.pyplot as plt
+
+
+def test_condition_subgrad(n=200, p=10, signal=np.sqrt(2 * np.log(10)), s=5, ndraw=5000, burnin=1000, param=True, sigma=3, full=True, rho=0.2, randomizer_scale=1):
+    """
+    Compare to R randomized lasso
+    """
+
+
+    inst, const = gaussian_instance, highdim.gaussian
+    X, Y, beta = inst(n=n,
+                      p=p, 
+                      signal=signal, 
+                      s=s, 
+                      equicorrelated=False, 
+                      rho=rho, 
+                      sigma=sigma, 
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    W = np.ones(X.shape[1]) * 1.5 * sigma
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer_scale=randomizer_scale)
+    
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    if full:
+        _, pval, intervals = conv.summary(target="full",
+                                          ndraw=ndraw,
+                                          burnin=burnin, 
+                                          compute_intervals=False)
+    else:
+        _, pval, intervals = conv.summary(target="selected",
+                                          ndraw=ndraw,
+                                          burnin=burnin, 
+                                          compute_intervals=False)
+
+    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
+
+def test_compareR(n=200, p=10, signal=np.sqrt(4) * np.sqrt(2 * np.log(10)), s=5, ndraw=5000, burnin=1000, param=True, sigma=3):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, highdim.gaussian
+    X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=False, rho=0.2, sigma=sigma, random_signs=True)[:3]
+
+    n, p = X.shape
+
+    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
+    randomizer_scale = np.std(Y) * .5 * np.sqrt(n / (n - 1.)) # to agree more exactly with R
+
+    pval, vars, rand, active, soln, ridge_term, cond_cov, cond_mean = Rpval(X, Y, W, randomizer_scale)
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer_scale=randomizer_scale)
+    
+    signs = conv.fit(perturb=rand, solve_args={'min_its':500, 'tol':1.e-12})
+
+    assert np.fabs(conv.ridge_term - ridge_term) / ridge_term < 1.e-4
+
+    assert np.fabs(soln - conv.initial_soln).max() / np.fabs(soln).max() < 1.e-3
+
+
+    nonzero = signs != 0
+
+    assert np.linalg.norm(conv.sampler.affine_con.covariance - cond_cov) / np.linalg.norm(cond_cov) < 1.e-3
+    assert np.linalg.norm(conv.sampler.affine_con.mean - cond_mean[:,0]) / np.linalg.norm(cond_mean[:,0]) < 1.e-3
+
+
+def main(nsim=500):
+
+    P0, PA = [], []
+    from statsmodels.distributions import ECDF
+
+    n, p = 500, 20
+
+    for i in range(nsim):
+        p0, pA = test_condition_subgrad(n=n, p=p, full=False)
+        try:
+            p0, pA = test_condition_subgrad(n=n, p=p, full=False)
+        except:
+            p0, pA = [], []
+        P0.extend(p0)
+        PA.extend(pA)
+        print(np.mean(P0), np.std(P0), np.mean(np.array(PA) < 0.05))
+    
+        if i % 3 == 0 and i > 0:
+            U = np.linspace(0, 1, 101)
+            plt.clf()
+            if len(P0) > 0:
+                plt.plot(U, ECDF(P0)(U))
+            if len(PA) > 0:
+                plt.plot(U, ECDF(PA)(U), 'r')
+            plt.plot([0, 1], [0, 1], 'k--')
+            plt.savefig("plot.pdf")
+    plt.show()
+
+def Rpval(X, Y, W, noise_scale=None):
+    numpy2ri.activate()
+    rpy.r.assign('X', X)
+    rpy.r.assign('Y', Y)
+    rpy.r.assign('lam', W)
+
+    if noise_scale is not None:
+        rpy.r.assign('noise_scale', noise_scale)
+        rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam, noise_scale=noise_scale, kkt_tol=1.e-8, parameter_tol=1.e-8)')
+    else:
+        rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam)')
+    rpy.r('targets=selectiveInference:::set.targets(soln,type="full")')
+    #rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="norejection", targets=targets, nsample=5000, burnin=1000)')
+    rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="restrictedMVN", targets=targets, nsample=5000, burnin=2000)')
+
+    pval = np.asarray(rpy.r('rand_inf$pvalues'))
+    vars = np.asarray(rpy.r('soln$active_set')) - 1 
+    cond_cov = np.asarray(rpy.r('soln$law$cond_cov'))
+    cond_mean = np.asarray(rpy.r('soln$law$cond_mean'))
+    rand = np.asarray(rpy.r('soln$perturb'))
+    active =  np.asarray(rpy.r('soln$active')) - 1
+    soln = np.asarray(rpy.r('soln$soln'))
+    ridge = rpy.r('soln$ridge_term')
+
+    return pval, vars, rand, active, soln, ridge, cond_cov, cond_mean
+
+
+# if __name__ == "__main__":
+#     main()
diff --git a/selection/randomized/threshold_score.py b/selection/randomized/threshold_score.py
index f8ed0bda3..8a92c5404 100644
--- a/selection/randomized/threshold_score.py
+++ b/selection/randomized/threshold_score.py
@@ -4,7 +4,6 @@
 import regreg.api as rr
 
 from .query import query, optimization_sampler
-from .reconstruction import reconstruct_full_from_internal, reconstruct_score
 from .base import restricted_estimator
 
 class threshold_score(query):
@@ -118,7 +117,7 @@ def solve(self, nboot=2000):
 
         self.interior = ~self.boundary
 
-        self.observed_internal_state = candidate_score
+        self.observed_internal_state = self.observed_score_state = candidate_score
 
         active_signs = np.sign(randomized_score[self.boundary])
         self.selection_variable = {'boundary_set': self.boundary,
@@ -150,17 +149,15 @@ def get_sampler(self):
         if not hasattr(self, "_sampler"):
 
             def log_density(boundary, 
-                            score_transform,
                             threshold,
                             _density,
                             _cdf,
-                            internal_state, 
+                            score_state, 
                             opt_state):
                 """
                 marginalizing over the sub-gradient
                 """
 
-                score_state = np.atleast_2d(reconstruct_score(score_transform, internal_state))
                 logdens = 0
                 weights = np.zeros_like(boundary, np.float)
 
@@ -173,7 +170,6 @@ def log_density(boundary,
 
             log_density = functools.partial(log_density,
                                             self.boundary,
-                                            self.score_transform,
                                             self.threshold,
                                             self.randomization._density,
                                             self.randomization._cdf)
@@ -186,7 +182,7 @@ def log_density(boundary,
             projection = None
 
             self._sampler = optimization_sampler(np.zeros(()), # nothing to sample
-                                                 self.observed_internal_state.copy(),
+                                                 self.observed_score_state,
                                                  self.score_transform,
                                                  self.opt_transform,
                                                  projection,

From ed3f62c255609485335ef4f773eac672b1198427 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 14 Feb 2018 20:47:28 -0800
Subject: [PATCH 479/617] adding alternatives as a sequence

---
 selection/randomized/lasso.py                 | 28 +++++++++++--------
 selection/randomized/query.py                 | 10 +++----
 .../randomized/tests/test_highdim_lasso.py    | 14 ++++------
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index dcf95d670..ecc63ed4f 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1645,14 +1645,14 @@ def summary(self,
             parameter = np.zeros(self.loglike.shape[0])
 
         if target == 'selected':
-            observed_target, cov_target, cov_target_score, alternative = self.selected_targets(features=features, dispersion=dispersion)
+            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
         elif target == 'full':
             X, y = self.loglike.data
             n, p = X.shape
             if n > p:
-                observed_target, cov_target, cov_target_score, alternative = self.full_targets(features=features, dispersion=dispersion)
+                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features, dispersion=dispersion)
             else:
-                observed_target, cov_target, cov_target_score, alternative = self.debiased_targets(features=features, dispersion=dispersion)
+                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features, dispersion=dispersion)
 
         opt_sample = self.sampler.sample(ndraw,  burnin)
 
@@ -1661,14 +1661,14 @@ def summary(self,
                                                   cov_target_score, 
                                                   parameter=parameter, 
                                                   sample=opt_sample, 
-                                                  alternative=alternative)
+                                                  alternatives=alternatives)
         if not np.all(parameter == 0):
             pvalues = self.sampler.coefficient_pvalues(observed_target, 
                                                        cov_target, 
                                                        cov_target_score, 
                                                        parameter=np.zeros_like(parameter), 
                                                        sample=opt_sample, 
-                                                       alternative=alternative)
+                                                       alternatives=alternatives)
         else:
             pvalues = pivots
 
@@ -1701,7 +1701,7 @@ def selected_targets(self, features=None, dispersion=None):
             observed_target = self._beta_full[overall]
             crosscov_target_score = score_linear.dot(cov_target)
             Xfeat = X[:,overall]
-            alternative = [{1:'greater', -1:'less'}[int(s)] for s in self.selection_variable['sign'][active]] + ['two-sided'] * unpenalized.sum()
+            alternatives = [{1:'greater', -1:'less'}[int(s)] for s in self.selection_variable['sign'][active]] + ['twosided'] * unpenalized.sum()
 
         else:
 
@@ -1718,21 +1718,21 @@ def selected_targets(self, features=None, dispersion=None):
             _score_linear = -Xfeat.T.dot(self._W[:, None] * X).T
             crosscov_target_score = _score_linear.dot(cov_target)
             observed_target = one_step
-            alternative = ['two-sided'] * overall.sum()
+            alternatives = ['twosided'] * overall.sum()
             for i, f in enumerate(np.nonzero(features)[0]):
                 if active[f]:
-                    alternative[i] = {1:'greater', -1:'less'}[int(self.selection_variable['sign'][f])]
+                    alternatives[i] = {1:'greater', -1:'less'}[int(self.selection_variable['sign'][f])]
 
         if dispersion is None: # use Pearson's X^2
             dispersion = ((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(observed_target)))**2 / self._W).sum() / (n - Xfeat.shape[1])
 
-        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     def full_targets(self, features=None, dispersion=None):
 
         if features is None:
             features = self._overall
-        features_b = np.zeros_like(self._overall)
+        features_b = np.zeros(self._overall.shape, np.bool)
         features_b[features] = True
         features = features_b
 
@@ -1753,9 +1753,13 @@ def full_targets(self, features=None, dispersion=None):
         if dispersion is None: # use Pearson's X^2
             dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step)))**2 / self._W).sum() / (n - p)
 
-        alternative = ['two-sided'] * features.sum()
+        alternatives = ['twosided'] * features.sum()
 
-        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion
+        for i, f in enumerate(np.nonzero(features)[0]):
+            if self._active[f]:
+                alternatives[i] = {1:'greater', -1:'less'}[int(self.selection_variable['sign'][f])]
+
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     def debiased_targets(self, dispersion=None):
         
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 48b660c4c..91a78d7ea 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -263,7 +263,7 @@ def coefficient_pvalues(self,
                             parameter=None,
                             sample_args=(),
                             sample=None,
-                            alternative='twosided'):
+                            alternatives=None):
         '''
         Construct selective p-values
         for each parameter of the target.
@@ -289,7 +289,7 @@ def coefficient_pvalues(self,
            Allows reuse of the same sample for construction of confidence
            intervals, hypothesis tests, etc.
 
-        alternative : ['greater', 'less', 'twosided']
+        alternatives : list of ['greater', 'less', 'twosided']
             What alternative to use.
 
         Returns
@@ -298,8 +298,8 @@ def coefficient_pvalues(self,
 
         '''
 
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
+        if alternatives is None:
+            alternatives = ['twosided'] * observed_target.shape[0]
 
         if sample is None:
             sample = self.sample(*sample_args)
@@ -316,7 +316,7 @@ def coefficient_pvalues(self,
         for i in range(observed_target.shape[0]):
             keep = np.zeros_like(observed_target)
             keep[i] = 1.
-            pvals.append(_intervals.pivot(keep, candidate=parameter[i], alternative=alternative))
+            pvals.append(_intervals.pivot(keep, candidate=parameter[i], alternative=alternatives[i]))
 
         return np.array(pvals)
 
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index 53730b2cb..e5a3ca7fb 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -9,14 +9,13 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-
-def test_condition_subgrad(n=200, p=10, signal=np.sqrt(2 * np.log(10)), s=5, ndraw=5000, burnin=1000, param=True, sigma=3, full=True, rho=0.2, randomizer_scale=1):
+def test_highdim_lasso(n=200, p=10, signal_fac=1.5, s=5, ndraw=5000, burnin=1000, sigma=3, full=False, rho=0.4, randomizer_scale=1):
     """
     Compare to R randomized lasso
     """
 
-
     inst, const = gaussian_instance, highdim.gaussian
+    signal = np.sqrt(signal_fac * np.log(p))
     X, Y, beta = inst(n=n,
                       p=p, 
                       signal=signal, 
@@ -28,12 +27,12 @@ def test_condition_subgrad(n=200, p=10, signal=np.sqrt(2 * np.log(10)), s=5, ndr
 
     n, p = X.shape
 
-    W = np.ones(X.shape[1]) * 1.5 * sigma
+    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
 
     conv = const(X, 
                  Y, 
                  W, 
-                 randomizer_scale=randomizer_scale)
+                 randomizer_scale=randomizer_scale * sigma)
     
     signs = conv.fit()
     nonzero = signs != 0
@@ -89,12 +88,11 @@ def main(nsim=500):
     P0, PA = [], []
     from statsmodels.distributions import ECDF
 
-    n, p = 500, 20
+    n, p = 500, 200
 
     for i in range(nsim):
-        p0, pA = test_condition_subgrad(n=n, p=p, full=False)
         try:
-            p0, pA = test_condition_subgrad(n=n, p=p, full=False)
+            p0, pA = test_highdim_lasso(n=n, p=p, full=False)
         except:
             p0, pA = [], []
         P0.extend(p0)

From 7ef5a458d40cc9dd4f614baedf895a8f01c2f656 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 14 Feb 2018 21:31:39 -0800
Subject: [PATCH 480/617] BF: variable name

---
 selection/randomized/lasso.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index ecc63ed4f..44f02e419 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1676,7 +1676,7 @@ def summary(self,
         if compute_intervals:
             intervals = self.sampler.confidence_intervals(observed_target, 
                                                           cov_target, 
-                                                          cov_target_interval, 
+                                                          cov_target_score,
                                                           sample=opt_sample)
 
         return pivots, pvalues, intervals

From 23fadb23e029ec4d6ede6da04f814a2d7561e172 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 14 Feb 2018 22:44:02 -0800
Subject: [PATCH 481/617] BF: for full we can't use one-sided tests -- not what
 we observed

---
 selection/randomized/lasso.py                    | 4 ----
 selection/randomized/tests/test_highdim_lasso.py | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 44f02e419..881ec752a 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1755,10 +1755,6 @@ def full_targets(self, features=None, dispersion=None):
 
         alternatives = ['twosided'] * features.sum()
 
-        for i, f in enumerate(np.nonzero(features)[0]):
-            if self._active[f]:
-                alternatives[i] = {1:'greater', -1:'less'}[int(self.selection_variable['sign'][f])]
-
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     def debiased_targets(self, dispersion=None):
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index e5a3ca7fb..15a4922b8 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -92,7 +92,7 @@ def main(nsim=500):
 
     for i in range(nsim):
         try:
-            p0, pA = test_highdim_lasso(n=n, p=p, full=False)
+            p0, pA = test_highdim_lasso(n=n, p=p, full=True)
         except:
             p0, pA = [], []
         P0.extend(p0)

From d24f44405bac8bfc85da12f9eee4bb17b6c908c0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 16 Feb 2018 14:54:27 -0800
Subject: [PATCH 482/617] comparing highdim to more general with decomposition

---
 selection/randomized/tests/test_full_lasso.py | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 selection/randomized/tests/test_full_lasso.py

diff --git a/selection/randomized/tests/test_full_lasso.py b/selection/randomized/tests/test_full_lasso.py
new file mode 100644
index 000000000..4bd633dc6
--- /dev/null
+++ b/selection/randomized/tests/test_full_lasso.py
@@ -0,0 +1,60 @@
+import numpy as np
+import nose.tools as nt
+
+import selection.randomized.lasso as L; reload(L)
+from selection.randomized.lasso import highdim, lasso
+from selection.tests.instance import gaussian_instance
+import matplotlib.pyplot as plt
+
+def test_full_lasso(n=200, p=30, signal_fac=1.5, s=5, ndraw=5000, burnin=1000, sigma=3, full=False, rho=0.4, randomizer_scale=1):
+    """
+    General LASSO -- 
+    """
+
+    inst, const = gaussian_instance, highdim.gaussian
+    signal = np.sqrt(signal_fac * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p, 
+                      signal=signal, 
+                      s=s, 
+                      equicorrelated=False, 
+                      rho=rho, 
+                      sigma=sigma, 
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer_scale=randomizer_scale * sigma)
+    
+    signs = conv.fit(solve_args={'min_its':500, 'tol':1.e-13})
+    nonzero = signs != 0
+
+    conv2 = lasso.gaussian(X, 
+                           Y, 
+                           W,
+                           randomizer_scale=randomizer_scale * sigma)
+    conv2.fit(perturb=conv._initial_omega, solve_args={'min_its':500, 'tol':1.e-13})
+    conv2.decompose_subgradient(condition=np.ones(p, np.bool))
+
+    np.testing.assert_allclose(conv2._view.sampler.affine_con.covariance,
+                               conv.sampler.affine_con.covariance)
+
+    np.testing.assert_allclose(conv2._view.sampler.affine_con.mean,
+                               conv.sampler.affine_con.mean)
+
+    np.testing.assert_allclose(conv2._view.sampler.affine_con.linear_part,
+                               conv.sampler.affine_con.linear_part)
+
+    np.testing.assert_allclose(conv2._view.sampler.affine_con.offset,
+                               conv.sampler.affine_con.offset)
+
+    np.testing.assert_allclose(conv2._view.initial_soln,
+                               conv.initial_soln)
+
+    np.testing.assert_allclose(conv2._view.initial_subgrad,
+                               conv.initial_subgrad)

From cf6bb6fb13570bae6bf891574a0e79cda0742826 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 17 Feb 2018 11:40:26 -0800
Subject: [PATCH 483/617] adding logdens_transform

---
 selection/randomized/lasso.py | 12 +++++++-----
 selection/randomized/query.py |  1 +
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 881ec752a..977432718 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -371,10 +371,12 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                          mean=cond_mean,
                                          covariance=cond_cov)
 
+                logdens_transform = (logdens_linear, opt_offset)
                 self._sampler = affine_gaussian_sampler(affine_con,
                                                         self.observed_opt_state,
                                                         self.observed_score_state,
                                                         log_density,
+                                                        logdens_transform,
                                                         selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
 
         return self._sampler
@@ -593,10 +595,12 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                      mean=cond_mean,
                                      covariance=cond_cov)
 
+            logdens_transform = (logdens_linear, new_offset)
             self._sampler = affine_gaussian_sampler(affine_con,
                                                     observed_opt_state,
                                                     self.observed_score_state,
                                                     log_density,
+                                                    logdens_transform,
                                                     selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
 
 
@@ -1591,10 +1595,12 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                  mean=cond_mean,
                                  covariance=cond_cov)
 
+        logdens_transform = (logdens_linear, opt_offset)
         self.sampler = affine_gaussian_sampler(affine_con,
                                                self.observed_opt_state,
                                                self.observed_score_state,
                                                log_density,
+                                               logdens_transform,
                                                selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
         
         return active_signs
@@ -1718,10 +1724,7 @@ def selected_targets(self, features=None, dispersion=None):
             _score_linear = -Xfeat.T.dot(self._W[:, None] * X).T
             crosscov_target_score = _score_linear.dot(cov_target)
             observed_target = one_step
-            alternatives = ['twosided'] * overall.sum()
-            for i, f in enumerate(np.nonzero(features)[0]):
-                if active[f]:
-                    alternatives[i] = {1:'greater', -1:'less'}[int(self.selection_variable['sign'][f])]
+            alternatives = ['twosided'] * features.sum()
 
         if dispersion is None: # use Pearson's X^2
             dispersion = ((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(observed_target)))**2 / self._W).sum() / (n - Xfeat.shape[1])
@@ -1754,7 +1757,6 @@ def full_targets(self, features=None, dispersion=None):
             dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step)))**2 / self._W).sum() / (n - p)
 
         alternatives = ['twosided'] * features.sum()
-
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     def debiased_targets(self, dispersion=None):
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 91a78d7ea..2a30571e1 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -429,6 +429,7 @@ def __init__(self,
                  initial_point,
                  observed_score_state,
                  log_density,
+                 logdens_transform, # described how score enters log_density.
                  selection_info=None):
 
         '''

From ced3d9ca687981a2a5d84a5a6f631075941fce42 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 22 Feb 2018 10:21:24 -0800
Subject: [PATCH 484/617] edits to take into account given conditional_cov and
 mean

---
 selection/randomized/selective_MLE.py | 61 ++++++++++++++++-----------
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/selection/randomized/selective_MLE.py b/selection/randomized/selective_MLE.py
index f820ae21a..0aaa89f5d 100644
--- a/selection/randomized/selective_MLE.py
+++ b/selection/randomized/selective_MLE.py
@@ -88,9 +88,8 @@ def solve_barrier_nonneg(conjugate_arg,
 def selective_MLE(target_observed,
                   target_cov,
                   target_transform,
-                  opt_transform,
+                  cond_cov,
                   feasible_point,
-                  randomizer_precision,
                   step=1,
                   max_iter=30,
                   tol=1.e-8):
@@ -145,50 +144,62 @@ def selective_MLE(target_observed,
 
     """
 
-    A, data_offset = target_transform # data_offset = N
-    B, opt_offset = opt_transform     # opt_offset = u
+    """
 
-    nopt = B.shape[1]
-    ntarget = A.shape[1]
+    Notes
+    -----
 
-    # setup joint implied covariance matrix
+    With $(A, b)$ as `target_transform`, $\Sigma$ as `target_cov`  and $\Sigma_R$ as `cond_cov`, the joint density of
+    the target $\hat{\theta}$ under $H_0:\theta^*=0$ is proportional to
+
+    .. math::
+
+        (\theta, \omega) \mapsto \phi_{(\theta^*,\Sigma)}(\theta) \phi_{A\theta + b, \Sigma_R}(\omega) 1_K(\omega)
+
+    with $K$ representing the constraints on the randomization.
+    """
 
+    A, b = target_linear, target_offset = target_transform
+
+    cond_precision = np.linalg.inv(cond_cov)
     target_precision = np.linalg.inv(target_cov)
 
+    nopt = cond_precision.shape[0]
+    ntarget = A.shape[1]
+
+    # setup joint implied covariance matrix
+
     implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
-    implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
-    implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
-    implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
-    implied_precision[ntarget:,ntarget:] = B.T.dot(randomizer_precision).dot(B)
+    implied_precision[:ntarget,:ntarget] = A.T.dot(cond_precision).dot(A) + target_precision
+    implied_precision[:ntarget,ntarget:] = A.T.dot(cond_precision)
+    implied_precision[ntarget:,:ntarget] = cond_precision.dot(A)
+    implied_precision[ntarget:,ntarget:] = cond_precision
     implied_cov = np.linalg.inv(implied_precision)
 
-    implied_opt = implied_cov[ntarget:,ntarget:]
-    implied_target = implied_cov[:ntarget,:ntarget]
-    implied_cross = implied_cov[:ntarget,ntarget:]
+    implied_opt = implied_cov[ntarget:, ntarget:]
+    implied_target = implied_cov[:ntarget, :ntarget]
+    implied_cross = implied_cov[:ntarget, ntarget:]
 
     L = implied_cross.dot(np.linalg.inv(implied_opt))
     M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
-    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
-
-    conditioned_value = data_offset + opt_offset
+    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T)
 
     linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
-    offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
 
-    natparam_transform = (linear_term, offset_term)
-    conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
+    natparam_transform = (linear_term, target_offset)
+    conditional_natural_parameter = linear_term.dot(target_observed) - target_offset
 
     conditional_precision = implied_precision[ntarget:,ntarget:]
 
     M_1_inv = np.linalg.inv(M_1)
-    mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
+    mle_offset_term = - M_1_inv.dot(M_2.dot(target_offset))
     mle_transform = (M_1_inv, -M_1_inv.dot(L), mle_offset_term)
     var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
-                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
+                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(target_offset)))
 
     cross_covariance = np.linalg.inv(implied_precision[:ntarget, :ntarget]).dot(implied_precision[:ntarget, ntarget:])
     var_matrices = (np.linalg.inv(implied_opt), np.linalg.inv(implied_precision[:ntarget,:ntarget]),
-                    cross_covariance,target_precision)
+                    cross_covariance, target_precision)
 
     def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
                 feasible_point, conditional_precision, target_observed):
@@ -205,6 +216,8 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
 
         selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
 
+        # why are we resolving? hmm...
+
         var_target_lin, var_offset = var_transform
         var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
         _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
@@ -222,6 +235,6 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
                                     feasible_point, conditional_precision)
     sel_MLE, inv_hessian = mle_partial(target_observed)
 
-    implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value), offset_term])
+    implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(target_offset), -target_offset])
 
     return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform

From cccf6bbd2cce507a9146902fa73c7e8b70393a15 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 27 Feb 2018 00:56:46 -0800
Subject: [PATCH 485/617] if nothing selected, don't sample and return empty
 summary

---
 selection/randomized/lasso.py | 51 ++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 977432718..dc2e36912 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1660,32 +1660,35 @@ def summary(self,
             else:
                 observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features, dispersion=dispersion)
 
-        opt_sample = self.sampler.sample(ndraw,  burnin)
-
-        pivots = self.sampler.coefficient_pvalues(observed_target, 
-                                                  cov_target, 
-                                                  cov_target_score, 
-                                                  parameter=parameter, 
-                                                  sample=opt_sample, 
-                                                  alternatives=alternatives)
-        if not np.all(parameter == 0):
-            pvalues = self.sampler.coefficient_pvalues(observed_target, 
-                                                       cov_target, 
-                                                       cov_target_score, 
-                                                       parameter=np.zeros_like(parameter), 
-                                                       sample=opt_sample, 
-                                                       alternatives=alternatives)
-        else:
-            pvalues = pivots
+        if self._overall.sum() > 0:
+            opt_sample = self.sampler.sample(ndraw,  burnin)
+
+            pivots = self.sampler.coefficient_pvalues(observed_target, 
+                                                      cov_target, 
+                                                      cov_target_score, 
+                                                      parameter=parameter, 
+                                                      sample=opt_sample, 
+                                                      alternatives=alternatives)
+            if not np.all(parameter == 0):
+                pvalues = self.sampler.coefficient_pvalues(observed_target, 
+                                                           cov_target, 
+                                                           cov_target_score, 
+                                                           parameter=np.zeros_like(parameter), 
+                                                           sample=opt_sample, 
+                                                           alternatives=alternatives)
+            else:
+                pvalues = pivots
 
-        intervals = None
-        if compute_intervals:
-            intervals = self.sampler.confidence_intervals(observed_target, 
-                                                          cov_target, 
-                                                          cov_target_score,
-                                                          sample=opt_sample)
+            intervals = None
+            if compute_intervals:
+                intervals = self.sampler.confidence_intervals(observed_target, 
+                                                              cov_target, 
+                                                              cov_target_score,
+                                                              sample=opt_sample)
 
-        return pivots, pvalues, intervals
+            return pivots, pvalues, intervals
+        else:
+            return [], [], []
 
     # Targets of inference
     # and covariance with score representation

From 9b0822a030023b5d9dac5cbfefb4f849d5e98f99 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 27 Feb 2018 12:27:14 -0800
Subject: [PATCH 486/617] making sure solve_args are used

---
 selection/randomized/lasso.py | 36 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 881ec752a..09993d5b7 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -66,16 +66,15 @@ def __init__(self,
         (self.loss,
          self.epsilon,
          self.penalty,
-         self.randomization,
-         self.solve_args) = (loss,
-                             epsilon,
-                             penalty,
-                             randomization,
-                             solve_args)
+         self.randomization) = (loss,
+                                epsilon,
+                                penalty,
+                                randomization)
          
     # Methods needed for subclassing a query
 
-    def solve(self, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000,
+    def solve(self, nboot=2000,
+              solve_args={'min_its':20, 'tol':1.e-10}, 
               perturb=None):
 
         self.randomize(perturb=perturb)
@@ -84,13 +83,11 @@ def solve(self, solve_args={'min_its':20, 'tol':1.e-10}, nboot=2000,
          randomized_loss,
          epsilon,
          penalty,
-         randomization,
-         solve_args) = (self.loss,
-                        self.randomized_loss, 
-                        self.epsilon,
-                        self.penalty,
-                        self.randomization,
-                        self.solve_args)
+         randomization) = (self.loss,
+                           self.randomized_loss, 
+                           self.epsilon,
+                           self.penalty,
+                           self.randomization)
 
         # initial solution
 
@@ -329,12 +326,13 @@ def log_density(query,
 
                 if prec_array:
                     cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
+                    cond_cov = np.linalg.inv(cond_precision)
                     logdens_linear = cond_cov.dot(opt_linear.T.dot(prec))
                 else:
                     cond_precision = opt_linear.T.dot(opt_linear) * prec
+                    cond_cov = np.linalg.inv(cond_precision)
                     logdens_linear = cond_cov.dot(opt_linear.T) * prec
 
-                cond_cov = np.linalg.inv(cond_precision)
                 cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
 
                 # need a log_density function
@@ -548,12 +546,13 @@ def new_projection(dual,
 
             if prec_array:
                 cond_precision = new_linear.T.dot(prec.dot(new_linear))
+                cond_cov = np.linalg.inv(cond_precision)
                 logdens_linear = cond_cov.dot(new_linear.T.dot(prec))
             else:
                 cond_precision = new_linear.T.dot(new_linear) * prec
+                cond_cov = np.linalg.inv(cond_precision)
                 logdens_linear = cond_cov.dot(new_linear.T) * prec
 
-            cond_cov = np.linalg.inv(cond_precision)
             cond_mean = -logdens_linear.dot(self.observed_score_state + new_offset)
 
             def log_density(logdens_linear, offset, cond_prec, score, opt):
@@ -739,7 +738,7 @@ def fit(self,
             self._view = glm_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
         else:
             self._view = glm_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
-        self._view.solve(nboot=nboot, perturb=perturb)
+        self._view.solve(nboot=nboot, perturb=perturb, solve_args=solve_args)
 
         self.signs = np.sign(self._view.initial_soln)
         self.selection_variable = self._view.selection_variable
@@ -1457,7 +1456,7 @@ def fit(self,
         self._initial_omega = perturb
         quad = rr.identity_quadratic(self.ridge_term, 0, -perturb)
         problem = rr.simple_problem(self.loglike, self.penalty)
-        self.initial_soln = problem.solve(quad)
+        self.initial_soln = problem.solve(quad, **solve_args)
 
         active_signs = np.sign(self.initial_soln)
         active = self._active = active_signs != 0
@@ -1570,6 +1569,7 @@ def signed_basis_vector(p, j, s):
         cond_precision = opt_linear.T.dot(opt_linear) * prec
         cond_cov = np.linalg.inv(cond_precision)
         logdens_linear = cond_cov.dot(opt_linear.T) * prec
+
         cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
 
         def log_density(logdens_linear, offset, cond_prec, score, opt):

From dd1e3beb885290dd2899abf925a66955e4454fce Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 28 Feb 2018 16:54:42 -0800
Subject: [PATCH 487/617] added selective MLE method

---
 selection/randomized/lasso.py                 | 85 ++++++++++++++++++-
 selection/randomized/query.py                 |  1 +
 .../randomized/tests/test_highdim_lasso.py    |  2 +
 3 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 6b9489d3c..7f2d12e3b 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -3,8 +3,7 @@
 from copy import copy
 
 import numpy as np
-import scipy
-from scipy import matrix
+from scipy.stats import norm as ndist
 
 import regreg.api as rr
 import regreg.affine as ra
@@ -22,6 +21,7 @@
 from .glm import (pairs_bootstrap_glm,
                   glm_nonparametric_bootstrap,
                   glm_parametric_covariance)
+from .selective_MLE import solve_barrier_nonneg
 
 class lasso_view(query):
 
@@ -1690,6 +1690,87 @@ def summary(self,
         else:
             return [], [], []
 
+    def selective_MLE(self,
+                      target="selected",
+                      features=None,
+                      parameter=None,
+                      level=0.9,
+                      compute_intervals=False,
+                      dispersion=None,
+                      solve_args={}):
+        """
+
+        Parameters
+        ----------
+
+        target : one of ['selected', 'full']
+
+        features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+
+        level : float
+            Confidence level.
+
+        ndraw : int (optional)
+            Defaults to 1000.
+
+        burnin : int (optional)
+            Defaults to 1000.
+
+        compute_intervals : bool
+            Compute confidence intervals?
+
+        dispersion : float (optional)
+            Use a known value for dispersion, or Pearson's X^2?
+
+        """
+
+        if parameter is None:
+            parameter = np.zeros(self.loglike.shape[0])
+
+        if target == 'selected':
+            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
+        elif target == 'full':
+            X, y = self.loglike.data
+            n, p = X.shape
+            if n > p:
+                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features, dispersion=dispersion)
+            else:
+                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features, dispersion=dispersion)
+
+        # working out conditional law of opt variables given
+        # target after decomposing score wrt target
+
+        prec_target = np.linalg.inv(cov_target)
+        logdens_lin, logdens_off = self.sampler.logdens_transform
+        target_lin = logdens_lin.dot(cov_target_score.T.dot(prec_target))
+        target_offset = self.sampler.affine_con.mean - target_lin.dot(observed_target)
+
+        # solve the barrier constrained problem
+
+        cov_opt = self.sampler.affine_con.covariance
+        prec_opt = np.linalg.inv(cov_opt)
+        conjugate_arg = prec_opt.dot(target_lin.dot(observed_target) + target_offset) # same as prec_opt.dot(self.sampler.affine_con.mean)
+
+        val, soln, hess = solve_barrier_nonneg(conjugate_arg,
+                                               prec_opt,
+                                               **solve_args)
+
+        final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(target_lin.dot(observed_target) + target_offset - soln)))
+
+        L = target_lin.T.dot(prec_opt)
+        observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
+        observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
+
+        Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
+        pvalues = ndist.cdf(Z_scores)
+        pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
+        return final_estimator, observed_info_mean, Z_scores, pvalues
+
     # Targets of inference
     # and covariance with score representation
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 2a30571e1..95f5f3c18 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -448,6 +448,7 @@ def __init__(self,
         self.observed_score_state = observed_score_state
         self.selection_info = selection_info
         self.log_density = log_density
+        self.logdens_transform = logdens_transform
 
     def sample(self, ndraw, burnin):
         '''
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index 15a4922b8..adb31a59b 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -37,6 +37,8 @@ def test_highdim_lasso(n=200, p=10, signal_fac=1.5, s=5, ndraw=5000, burnin=1000
     signs = conv.fit()
     nonzero = signs != 0
 
+    print conv.selective_MLE(target="full")
+
     if full:
         _, pval, intervals = conv.summary(target="full",
                                           ndraw=ndraw,

From e29388624c2b702996c633feb4332404ac7b69cc Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 28 Feb 2018 21:53:58 -0800
Subject: [PATCH 488/617] moved selective mle to the affine_gaussian sampler

---
 selection/randomized/lasso.py                 | 27 +--------------
 selection/randomized/query.py                 | 33 +++++++++++++++++++
 .../randomized/tests/test_highdim_lasso.py    |  8 +++--
 3 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 7f2d12e3b..fc8eba9f2 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -21,7 +21,6 @@
 from .glm import (pairs_bootstrap_glm,
                   glm_nonparametric_bootstrap,
                   glm_parametric_covariance)
-from .selective_MLE import solve_barrier_nonneg
 
 class lasso_view(query):
 
@@ -1745,31 +1744,7 @@ def selective_MLE(self,
         # working out conditional law of opt variables given
         # target after decomposing score wrt target
 
-        prec_target = np.linalg.inv(cov_target)
-        logdens_lin, logdens_off = self.sampler.logdens_transform
-        target_lin = logdens_lin.dot(cov_target_score.T.dot(prec_target))
-        target_offset = self.sampler.affine_con.mean - target_lin.dot(observed_target)
-
-        # solve the barrier constrained problem
-
-        cov_opt = self.sampler.affine_con.covariance
-        prec_opt = np.linalg.inv(cov_opt)
-        conjugate_arg = prec_opt.dot(target_lin.dot(observed_target) + target_offset) # same as prec_opt.dot(self.sampler.affine_con.mean)
-
-        val, soln, hess = solve_barrier_nonneg(conjugate_arg,
-                                               prec_opt,
-                                               **solve_args)
-
-        final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(target_lin.dot(observed_target) + target_offset - soln)))
-
-        L = target_lin.T.dot(prec_opt)
-        observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
-        observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
-
-        Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
-        pvalues = ndist.cdf(Z_scores)
-        pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
-        return final_estimator, observed_info_mean, Z_scores, pvalues
+        return self.sampler.selective_MLE(observed_target, cov_target, cov_target_score, solve_args=solve_args)
 
     # Targets of inference
     # and covariance with score representation
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 95f5f3c18..fc4f109b0 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -6,6 +6,8 @@
 
 from regreg.affine import power_L
 
+from .selective_MLE import solve_barrier_nonneg
+
 from ..distributions.api import discrete_family
 from ..sampling.langevin import projected_langevin
 from ..constraints.affine import sample_from_constraints
@@ -473,6 +475,37 @@ def sample(self, ndraw, burnin):
                                        ndraw=ndraw,
                                        burnin=burnin)
 
+    def selective_MLE(self, observed_target, cov_target, cov_target_score, solve_args={}):
+        """
+        Selective MLE based on approximation of
+        CGF.
+
+        """
+        prec_target = np.linalg.inv(cov_target)
+        logdens_lin, logdens_off = self.logdens_transform
+        target_lin = logdens_lin.dot(cov_target_score.T.dot(prec_target))
+        target_offset = self.affine_con.mean - target_lin.dot(observed_target)
+
+        cov_opt = self.affine_con.covariance
+        prec_opt = np.linalg.inv(cov_opt)
+        conjugate_arg = prec_opt.dot(target_lin.dot(observed_target) + target_offset) # same as prec_opt.dot(self.sampler.affine_con.mean)
+
+        val, soln, hess = solve_barrier_nonneg(conjugate_arg,
+                                               prec_opt,
+                                               **solve_args)
+
+        final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(target_lin.dot(observed_target) + target_offset - soln)))
+
+        L = target_lin.T.dot(prec_opt)
+        observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
+        observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
+
+        Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
+        pvalues = ndist.cdf(Z_scores)
+        pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
+        return final_estimator, observed_info_mean, Z_scores, pvalues
+
+
 class optimization_intervals(object):
 
     def __init__(self,
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index adb31a59b..4a31b8df2 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -9,7 +9,7 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-def test_highdim_lasso(n=200, p=10, signal_fac=1.5, s=5, ndraw=5000, burnin=1000, sigma=3, full=False, rho=0.4, randomizer_scale=1):
+def test_highdim_lasso(n=200, p=50, signal_fac=1.5, s=5, ndraw=5000, burnin=1000, sigma=3, full=True, rho=0.4, randomizer_scale=1):
     """
     Compare to R randomized lasso
     """
@@ -37,7 +37,11 @@ def test_highdim_lasso(n=200, p=10, signal_fac=1.5, s=5, ndraw=5000, burnin=1000
     signs = conv.fit()
     nonzero = signs != 0
 
-    print conv.selective_MLE(target="full")
+    estimate, _, _, pv = conv.selective_MLE(target="full")
+    print(estimate, 'selective MLE')
+    print(beta[nonzero], 'truth')
+    print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
+    print(pv[beta[nonzero] == 0], pv[beta[nonzero] != 0])
 
     if full:
         _, pval, intervals = conv.summary(target="full",

From 088e67130c7fa555a033560c217b157ddcaaa7f4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 28 Feb 2018 22:42:24 -0800
Subject: [PATCH 489/617] 1D test of LASSO MLE

---
 selection/randomized/lasso.py                 |  1 +
 .../tests/test_selective_MLE_onedim.py        | 83 +++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 selection/randomized/tests/test_selective_MLE_onedim.py

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index fc8eba9f2..b2ab071bf 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1744,6 +1744,7 @@ def selective_MLE(self,
         # working out conditional law of opt variables given
         # target after decomposing score wrt target
 
+        print(observed_target, cov_target, cov_target_score)
         return self.sampler.selective_MLE(observed_target, cov_target, cov_target_score, solve_args=solve_args)
 
     # Targets of inference
diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
new file mode 100644
index 000000000..dfea832f2
--- /dev/null
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -0,0 +1,83 @@
+import numpy as np
+import nose.tools as nt
+
+import selection.randomized.lasso as L; reload(L)
+from selection.randomized.lasso import highdim 
+from selection.tests.instance import gaussian_instance
+import matplotlib.pyplot as plt
+
+def test_onedim_lasso(n=200, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000, sigma=3, full=True, rho=0.4, randomizer_scale=1):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, highdim.gaussian
+    signal = signal_fac * np.sqrt(2 * np.log(p+1.))
+    X, Y, beta = inst(n=n,
+                      p=p, 
+                      signal=signal, 
+                      s=s, 
+                      equicorrelated=False, 
+                      rho=rho, 
+                      sigma=sigma, 
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p+1.)) * sigma
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer_scale=randomizer_scale * sigma)
+    
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    estimate, _, _, pv = conv.selective_MLE(target="full")
+    print(estimate, 'selective MLE')
+    print(beta[nonzero], 'truth')
+    print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
+    print(pv[beta[nonzero] == 0], pv[beta[nonzero] != 0])
+
+    if full:
+        _, pval, intervals = conv.summary(target="full",
+                                          ndraw=ndraw,
+                                          burnin=burnin, 
+                                          compute_intervals=False)
+    else:
+        _, pval, intervals = conv.summary(target="selected",
+                                          ndraw=ndraw,
+                                          burnin=burnin, 
+                                          compute_intervals=False)
+
+    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
+
+
+def main(nsim=500):
+
+    P0, PA = [], []
+    from statsmodels.distributions import ECDF
+
+    n, p = 500, 200
+
+    for i in range(nsim):
+        try:
+            p0, pA = test_highdim_lasso(n=n, p=p, full=True)
+        except:
+            p0, pA = [], []
+        P0.extend(p0)
+        PA.extend(pA)
+        print(np.mean(P0), np.std(P0), np.mean(np.array(PA) < 0.05))
+    
+        if i % 3 == 0 and i > 0:
+            U = np.linspace(0, 1, 101)
+            plt.clf()
+            if len(P0) > 0:
+                plt.plot(U, ECDF(P0)(U))
+            if len(PA) > 0:
+                plt.plot(U, ECDF(PA)(U), 'r')
+            plt.plot([0, 1], [0, 1], 'k--')
+            plt.savefig("plot.pdf")
+    plt.show()
+

From c56c94d8f9c854b987ec6c6bbc3580bc1a37eb35 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 28 Feb 2018 22:57:53 -0800
Subject: [PATCH 490/617] using python solver for now

---
 selection/randomized/lasso.py                 |  6 +-
 selection/randomized/query.py                 | 70 +++++++++++++++++--
 selection/randomized/selective_MLE.py         |  4 +-
 .../tests/test_selective_MLE_onedim.py        | 33 ++++-----
 4 files changed, 90 insertions(+), 23 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index b2ab071bf..d718f1aac 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1745,7 +1745,11 @@ def selective_MLE(self,
         # target after decomposing score wrt target
 
         print(observed_target, cov_target, cov_target_score)
-        return self.sampler.selective_MLE(observed_target, cov_target, cov_target_score, solve_args=solve_args)
+        return self.sampler.selective_MLE(observed_target, 
+                                          cov_target, 
+                                          cov_target_score, 
+                                          feasible_point=self.observed_opt_state,
+                                          solve_args=solve_args)
 
     # Targets of inference
     # and covariance with score representation
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index fc4f109b0..b3b0e50ed 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -475,7 +475,7 @@ def sample(self, ndraw, burnin):
                                        ndraw=ndraw,
                                        burnin=burnin)
 
-    def selective_MLE(self, observed_target, cov_target, cov_target_score, solve_args={}):
+    def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_point, solve_args={}):
         """
         Selective MLE based on approximation of
         CGF.
@@ -490,9 +490,10 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, solve_arg
         prec_opt = np.linalg.inv(cov_opt)
         conjugate_arg = prec_opt.dot(target_lin.dot(observed_target) + target_offset) # same as prec_opt.dot(self.sampler.affine_con.mean)
 
-        val, soln, hess = solve_barrier_nonneg(conjugate_arg,
-                                               prec_opt,
-                                               **solve_args)
+        soln, val, hess = solve_barrier_nonneg_(conjugate_arg,
+                                                prec_opt,
+                                                feasible_point=feasible_point,
+                                                **solve_args)
 
         final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(target_lin.dot(observed_target) + target_offset - soln)))
 
@@ -688,3 +689,64 @@ def naive_pvalues(diag_cov, observed, parameter):
         pval = ndist.cdf((observed[j] - parameter[j])/sigma)
         pvalues[j] = 2 * min(pval, 1-pval)
     return pvalues
+
+def solve_barrier_nonneg_(conjugate_arg,
+                         precision,
+                         feasible_point=None,
+                         step=1,
+                         nstep=1000,
+                         tol=1.e-8):
+
+    scaling = np.sqrt(np.diag(precision))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
+
+    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
+    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
+    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        newton_step = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * newton_step
+            if np.all(proposal > 0):
+                break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            proposal = current - step * newton_step
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
+
+    print(grad(current))
+    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
+    return current, current_value, hess
diff --git a/selection/randomized/selective_MLE.py b/selection/randomized/selective_MLE.py
index 0aaa89f5d..87831a9f6 100644
--- a/selection/randomized/selective_MLE.py
+++ b/selection/randomized/selective_MLE.py
@@ -66,7 +66,7 @@ def solve_barrier_nonneg(conjugate_arg,
     scaling = np.sqrt(np.diag(precision))
 
     if initial is None:
-        initial, proposed, grad = np.zeros((3, p))
+        initial, proposed, grad = np.ones((3, p))
 
     if step is None:
         step = 1. / power_L(precision)
@@ -83,7 +83,7 @@ def solve_barrier_nonneg(conjugate_arg,
     barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
     hess = np.linalg.inv(precision + np.diag(barrier_hessian(soln)))
 
-    return val, soln, hess
+    return soln, val, hess
 
 def selective_MLE(target_observed,
                   target_cov,
diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index dfea832f2..9aafcc6ee 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -34,24 +34,25 @@ def test_onedim_lasso(n=200, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000,
     signs = conv.fit()
     nonzero = signs != 0
 
-    estimate, _, _, pv = conv.selective_MLE(target="full")
-    print(estimate, 'selective MLE')
-    print(beta[nonzero], 'truth')
-    print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
-    print(pv[beta[nonzero] == 0], pv[beta[nonzero] != 0])
+    if nonzero.sum():
+        estimate, _, _, pv = conv.selective_MLE(target="full")
+        print(estimate, 'selective MLE')
+        print(beta[nonzero], 'truth')
+        print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
+        print(pv[beta[nonzero] == 0], pv[beta[nonzero] != 0])
 
-    if full:
-        _, pval, intervals = conv.summary(target="full",
-                                          ndraw=ndraw,
-                                          burnin=burnin, 
-                                          compute_intervals=False)
-    else:
-        _, pval, intervals = conv.summary(target="selected",
-                                          ndraw=ndraw,
-                                          burnin=burnin, 
-                                          compute_intervals=False)
+        if full:
+            _, pval, intervals = conv.summary(target="full",
+                                              ndraw=ndraw,
+                                              burnin=burnin, 
+                                              compute_intervals=False)
+        else:
+            _, pval, intervals = conv.summary(target="selected",
+                                              ndraw=ndraw,
+                                              burnin=burnin, 
+                                              compute_intervals=False)
 
-    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
+        return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
 
 def main(nsim=500):

From 4815d399ff35c4d63a6a1430d8e17eaf36d6a80f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 28 Feb 2018 23:41:26 -0800
Subject: [PATCH 491/617] getting rid of ridge term

---
 selection/randomized/lasso.py                 |  2 ++
 .../tests/test_selective_MLE_onedim.py        | 21 +++++--------------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index d718f1aac..b2924c5b8 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -369,6 +369,7 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                          covariance=cond_cov)
 
                 logdens_transform = (logdens_linear, opt_offset)
+
                 self._sampler = affine_gaussian_sampler(affine_con,
                                                         self.observed_opt_state,
                                                         self.observed_score_state,
@@ -1595,6 +1596,7 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                  covariance=cond_cov)
 
         logdens_transform = (logdens_linear, opt_offset)
+
         self.sampler = affine_gaussian_sampler(affine_con,
                                                self.observed_opt_state,
                                                self.observed_score_state,
diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index 9aafcc6ee..0dbff802b 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -6,7 +6,7 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-def test_onedim_lasso(n=200, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000, sigma=3, full=True, rho=0.4, randomizer_scale=1):
+def test_onedim_lasso(n=5000, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000, sigma=3, full=True, rho=0.4, randomizer_scale=1):
     """
     Compare to R randomized lasso
     """
@@ -29,30 +29,19 @@ def test_onedim_lasso(n=200, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000,
     conv = const(X, 
                  Y, 
                  W, 
-                 randomizer_scale=randomizer_scale * sigma)
+                 randomizer_scale=randomizer_scale * sigma,
+                 ridge_term=0.)
     
     signs = conv.fit()
     nonzero = signs != 0
 
     if nonzero.sum():
+
         estimate, _, _, pv = conv.selective_MLE(target="full")
         print(estimate, 'selective MLE')
         print(beta[nonzero], 'truth')
         print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
-        print(pv[beta[nonzero] == 0], pv[beta[nonzero] != 0])
-
-        if full:
-            _, pval, intervals = conv.summary(target="full",
-                                              ndraw=ndraw,
-                                              burnin=burnin, 
-                                              compute_intervals=False)
-        else:
-            _, pval, intervals = conv.summary(target="selected",
-                                              ndraw=ndraw,
-                                              burnin=burnin, 
-                                              compute_intervals=False)
-
-        return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
+        print(pv)
 
 
 def main(nsim=500):

From ed0d65409b8240fd9e277637426fdf205f0bb5a8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 1 Mar 2018 00:03:40 -0800
Subject: [PATCH 492/617] one dim problem

---
 .../tests/test_selective_MLE_onedim.py        | 30 +++++++------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index 0dbff802b..fc4516fd5 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -6,31 +6,21 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-def test_onedim_lasso(n=5000, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000, sigma=3, full=True, rho=0.4, randomizer_scale=1):
+def test_onedim_lasso(n=500000, W=1.5, beta=2., sigma=1, randomizer_scale=1):
     """
     Compare to R randomized lasso
     """
 
-    inst, const = gaussian_instance, highdim.gaussian
-    signal = signal_fac * np.sqrt(2 * np.log(p+1.))
-    X, Y, beta = inst(n=n,
-                      p=p, 
-                      signal=signal, 
-                      s=s, 
-                      equicorrelated=False, 
-                      rho=rho, 
-                      sigma=sigma, 
-                      random_signs=True)[:3]
+    beta = np.array([beta])
+    X = np.random.standard_normal((n, 1))
+    X /= np.sqrt((X**2).sum(0))[None, :]
+    Y = X.dot(beta) + sigma * np.random.standard_normal(n)
 
-    n, p = X.shape
-
-    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p+1.)) * sigma
-
-    conv = const(X, 
-                 Y, 
-                 W, 
-                 randomizer_scale=randomizer_scale * sigma,
-                 ridge_term=0.)
+    conv = highdim.gaussian(X, 
+                            Y, 
+                            W * np.ones(X.shape[1]), 
+                            randomizer_scale=randomizer_scale * sigma,
+                            ridge_term=0.)
     
     signs = conv.fit()
     nonzero = signs != 0

From 7c524e972cdbbeb8227977beb34443bc0f40728f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 1 Mar 2018 12:07:13 -0800
Subject: [PATCH 493/617] fixed an import

---
 selection/randomized/query.py                 |  2 +-
 .../tests/test_selective_MLE_onedim.py        | 25 ++++++++++---------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index b3b0e50ed..27551ebbc 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -6,7 +6,7 @@
 
 from regreg.affine import power_L
 
-from .selective_MLE import solve_barrier_nonneg
+#from .selective_MLE import solve_barrier_nonneg
 
 from ..distributions.api import discrete_family
 from ..sampling.langevin import projected_langevin
diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index 0dbff802b..19d487f14 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -2,7 +2,7 @@
 import nose.tools as nt
 
 import selection.randomized.lasso as L; reload(L)
-from selection.randomized.lasso import highdim 
+from selection.randomized.lasso import highdim
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
@@ -14,24 +14,24 @@ def test_onedim_lasso(n=5000, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000,
     inst, const = gaussian_instance, highdim.gaussian
     signal = signal_fac * np.sqrt(2 * np.log(p+1.))
     X, Y, beta = inst(n=n,
-                      p=p, 
-                      signal=signal, 
-                      s=s, 
-                      equicorrelated=False, 
-                      rho=rho, 
-                      sigma=sigma, 
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
                       random_signs=True)[:3]
 
     n, p = X.shape
 
     W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p+1.)) * sigma
 
-    conv = const(X, 
-                 Y, 
-                 W, 
+    conv = const(X,
+                 Y,
+                 W,
                  randomizer_scale=randomizer_scale * sigma,
                  ridge_term=0.)
-    
+
     signs = conv.fit()
     nonzero = signs != 0
 
@@ -43,6 +43,7 @@ def test_onedim_lasso(n=5000, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000,
         print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
         print(pv)
 
+test_onedim_lasso()
 
 def main(nsim=500):
 
@@ -59,7 +60,7 @@ def main(nsim=500):
         P0.extend(p0)
         PA.extend(pA)
         print(np.mean(P0), np.std(P0), np.mean(np.array(PA) < 0.05))
-    
+
         if i % 3 == 0 and i > 0:
             U = np.linspace(0, 1, 101)
             plt.clf()

From 8b7deee0de54cd23a5c6d0d80d22d9cf5ce2d8f7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 1 Mar 2018 21:37:45 -0800
Subject: [PATCH 494/617] sign change in target_lin

---
 selection/randomized/query.py                 |  22 +-
 selection/randomized/selective_MLE.py         | 274 ++++++------------
 .../tests/test_selective_MLE_onedim.py        |  50 +++-
 3 files changed, 147 insertions(+), 199 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 27551ebbc..8e61c46ff 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -483,18 +483,24 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
         """
         prec_target = np.linalg.inv(cov_target)
         logdens_lin, logdens_off = self.logdens_transform
-        target_lin = logdens_lin.dot(cov_target_score.T.dot(prec_target))
+        target_lin = -logdens_lin.dot(cov_target_score.T.dot(prec_target))
         target_offset = self.affine_con.mean - target_lin.dot(observed_target)
 
         cov_opt = self.affine_con.covariance
+        #print("cov target", cov_target, prec_target)
         prec_opt = np.linalg.inv(cov_opt)
-        conjugate_arg = prec_opt.dot(target_lin.dot(observed_target) + target_offset) # same as prec_opt.dot(self.sampler.affine_con.mean)
 
+        conjugate_arg = prec_opt.dot(target_lin.dot(observed_target) + target_offset)# same as prec_opt.dot(self.sampler.affine_con.mean)
+
+        #print("precision randomization", prec_opt, conjugate_arg, feasible_point)
+        feasible_point = np.ones(prec_opt.shape[0])
         soln, val, hess = solve_barrier_nonneg_(conjugate_arg,
                                                 prec_opt,
                                                 feasible_point=feasible_point,
                                                 **solve_args)
 
+        print("check target lin and target offset", target_lin, target_offset)
+
         final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(target_lin.dot(observed_target) + target_offset - soln)))
 
         L = target_lin.T.dot(prec_opt)
@@ -691,11 +697,11 @@ def naive_pvalues(diag_cov, observed, parameter):
     return pvalues
 
 def solve_barrier_nonneg_(conjugate_arg,
-                         precision,
-                         feasible_point=None,
-                         step=1,
-                         nstep=1000,
-                         tol=1.e-8):
+                          precision,
+                          feasible_point=None,
+                          step=1,
+                          nstep=1000,
+                          tol=1.e-8):
 
     scaling = np.sqrt(np.diag(precision))
 
@@ -747,6 +753,6 @@ def solve_barrier_nonneg_(conjugate_arg,
         if itercount % 4 == 0:
             step *= 2
 
-    print(grad(current))
     hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
     return current, current_value, hess
+
diff --git a/selection/randomized/selective_MLE.py b/selection/randomized/selective_MLE.py
index 87831a9f6..b8831001b 100644
--- a/selection/randomized/selective_MLE.py
+++ b/selection/randomized/selective_MLE.py
@@ -1,205 +1,57 @@
-from functools import partial
-
 import numpy as np
+import functools
 
-from regreg.api import power_L
-
-from .selective_MLE_utils import barrier_solve_
-
-def solve_barrier_nonneg(conjugate_arg,
-                         precision,
-                         initial=None,
-                         step=None,
-                         max_iter=150,
-                         value_tol=1.e-6):
-    """
-    Solve a smoothed version of the problem
-
-    .. math::
-    
-        \text{minimize}_{\beta \geq 0} -u^T\beta + \frac{1}{2} \beta^T\Theta \beta
-
-    with `conjugate_arg` as $u$ and `precision` as $\Theta$. The smoothing
-    is done by adding a barrier function with scale determined
-    by the diagonal of precision.
-
-    Parameters
-    ----------
-
-    conjugate_arg: np.float(p)
-        The value of the problem is a convex conjugate -- this is the
-        argument to that function.
-
-    precision: np.float((p,p))
-        A non-negative definite matrix -- precision meaning the inverse
-        of a covariance matrix.
-
-    initial: np.float(p)
-        Optional warm start.
-
-    step: float
-        An initial step size. Defaults to inverse of
-        (approximate) largest eigenvalue of precision.
-
-    max_iter: int
-        When to stop optimization.
-
-    value_tol: float
-        Relative decrease in value for stopping.
-    
-    Returns
-    -------
-
-    value: float
-        The value of the optimization problem.
-
-    soln: np.float(p)
-        The solution to the optimization problem,
-        also the gradient of the value function.
-
-    hess: np.float(p)
-        The Hessian of the value function.
-
-    """
-
-    p = precision.shape[0]
-    scaling = np.sqrt(np.diag(precision))
-
-    if initial is None:
-        initial, proposed, grad = np.ones((3, p))
-
-    if step is None:
-        step = 1. / power_L(precision)
-
-    soln, val = barrier_solve_(grad,
-                               initial,
-                               proposed,
-                               conjugate_arg,
-                               precision,
-                               scaling,
-                               step,
-                               value_tol=value_tol)
-
-    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
-    hess = np.linalg.inv(precision + np.diag(barrier_hessian(soln)))
-
-    return soln, val, hess
-
-def selective_MLE(target_observed,
-                  target_cov,
-                  target_transform,
-                  cond_cov,
-                  feasible_point,
-                  step=1,
-                  max_iter=30,
-                  tol=1.e-8):
-
-    """
-
-    Parameters
-    ----------
-
-    target_observed: np.float
-        The observed value of our target estimator.
-    
-    target_cov: np.float
-        Covariance matrix of target estimator.
-
-    target_transform: tuple
-        A pair (A, b) consisting of a linear transformation A and an offset b
-        representing an affine transformation $x \mapsto Ax+b$.
-        This transform should be computed as part of a linear decomposition of the
-        score of an optimization problem with respect to a target
-        of interest.
-
-    opt_transform: tuple
-        A pair (A, b) consisting of a linear transformation A and an offset b
-        representing an affine transformation $x \mapsto Ax+b$.
-        This transformation usually comes from the KKT conditions
-        of an appropriate (randomized) optimization problem.
-
-    feasible_point: np.float
-        An appropriate feasible point for the optimization
-        problem in the approximate likelihood.
-
-    randomization_precision: np.float((p,p))
-        Precision matrix of randomization in the randomized
-        optimization problem.
-
-    step: float
-        An initial step size. Defaults to inverse of
-        (approximate) largest eigenvalue of precision.
+def solve_UMVU(target_transform,
+               opt_transform,
+               target_observed,
+               feasible_point,
+               target_cov,
+               randomizer_precision):
 
-    max_iter: int
-        When to stop optimization.
+    A, data_offset = target_transform # data_offset = N
+    B, opt_offset = opt_transform     # opt_offset = u
 
-    value_tol: float
-        Relative decrease in value for stopping.
-    
-    
-    Returns
-    -------
-
-    XXXX
-
-    """
-
-    """
-
-    Notes
-    -----
-
-    With $(A, b)$ as `target_transform`, $\Sigma$ as `target_cov`  and $\Sigma_R$ as `cond_cov`, the joint density of
-    the target $\hat{\theta}$ under $H_0:\theta^*=0$ is proportional to
-
-    .. math::
-
-        (\theta, \omega) \mapsto \phi_{(\theta^*,\Sigma)}(\theta) \phi_{A\theta + b, \Sigma_R}(\omega) 1_K(\omega)
-
-    with $K$ representing the constraints on the randomization.
-    """
-
-    A, b = target_linear, target_offset = target_transform
-
-    cond_precision = np.linalg.inv(cond_cov)
-    target_precision = np.linalg.inv(target_cov)
-
-    nopt = cond_precision.shape[0]
+    nopt = B.shape[1]
     ntarget = A.shape[1]
 
     # setup joint implied covariance matrix
 
+    target_precision = np.linalg.inv(target_cov)
+
     implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
-    implied_precision[:ntarget,:ntarget] = A.T.dot(cond_precision).dot(A) + target_precision
-    implied_precision[:ntarget,ntarget:] = A.T.dot(cond_precision)
-    implied_precision[ntarget:,:ntarget] = cond_precision.dot(A)
-    implied_precision[ntarget:,ntarget:] = cond_precision
+    implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
+    implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
+    implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
+    implied_precision[ntarget:,ntarget:] = B.T.dot(randomizer_precision).dot(B)
     implied_cov = np.linalg.inv(implied_precision)
 
-    implied_opt = implied_cov[ntarget:, ntarget:]
-    implied_target = implied_cov[:ntarget, :ntarget]
-    implied_cross = implied_cov[:ntarget, ntarget:]
+    implied_opt = implied_cov[ntarget:,ntarget:]
+    implied_target = implied_cov[:ntarget,:ntarget]
+    implied_cross = implied_cov[:ntarget,ntarget:]
 
     L = implied_cross.dot(np.linalg.inv(implied_opt))
     M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
-    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T)
+    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
+
+    conditioned_value = data_offset + opt_offset
 
     linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
+    offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
 
-    natparam_transform = (linear_term, target_offset)
-    conditional_natural_parameter = linear_term.dot(target_observed) - target_offset
+    natparam_transform = (linear_term, offset_term)
+    conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
 
     conditional_precision = implied_precision[ntarget:,ntarget:]
 
     M_1_inv = np.linalg.inv(M_1)
-    mle_offset_term = - M_1_inv.dot(M_2.dot(target_offset))
+    mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
     mle_transform = (M_1_inv, -M_1_inv.dot(L), mle_offset_term)
     var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
-                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(target_offset)))
+                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
 
     cross_covariance = np.linalg.inv(implied_precision[:ntarget, :ntarget]).dot(implied_precision[:ntarget, ntarget:])
     var_matrices = (np.linalg.inv(implied_opt), np.linalg.inv(implied_precision[:ntarget,:ntarget]),
-                    cross_covariance, target_precision)
+                    cross_covariance,target_precision)
 
     def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
                 feasible_point, conditional_precision, target_observed):
@@ -216,8 +68,6 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
 
         selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
 
-        # why are we resolving? hmm...
-
         var_target_lin, var_offset = var_transform
         var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
         _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
@@ -235,6 +85,70 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
                                     feasible_point, conditional_precision)
     sel_MLE, inv_hessian = mle_partial(target_observed)
 
-    implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(target_offset), -target_offset])
+    #print("shapes", target_precision.dot(sel_MLE).shape,  A.T.dot(randomizer_precision).shape, offset_term.shape)
+    implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value),
+                                   offset_term*np.ones((1,1))])
+
+    print("selective MLE", sel_MLE)
+    return np.squeeze(sel_MLE)
+        #, inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
+
+def solve_barrier_nonneg(conjugate_arg,
+                         precision,
+                         feasible_point=None,
+                         step=1,
+                         nstep=1000,
+                         tol=1.e-8):
+
+    scaling = np.sqrt(np.diag(precision))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
+
+    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
+    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
+    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        newton_step = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * newton_step
+            if np.all(proposal > 0):
+                break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            proposal = current - step * newton_step
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
 
-    return np.squeeze(sel_MLE), inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
+    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
+    return current, current_value, hess
\ No newline at end of file
diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index 19d487f14..36d60b914 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -5,26 +5,42 @@
 from selection.randomized.lasso import highdim
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
+from selection.randomized.selective_MLE import solve_UMVU, solve_barrier_nonneg
 
-def test_onedim_lasso(n=5000, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000, sigma=3, full=True, rho=0.4, randomizer_scale=1):
+
+def test_onedim_lasso(n=200, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000, sigma=1., full=True, rho=0.4, randomizer_scale=1.):
     """
     Compare to R randomized lasso
     """
 
     inst, const = gaussian_instance, highdim.gaussian
     signal = signal_fac * np.sqrt(2 * np.log(p+1.))
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=signal,
-                      s=s,
-                      equicorrelated=False,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=True)[:3]
+
+    # X, Y, beta = inst(n=n,
+    #                   p=p,
+    #                   signal=signal,
+    #                   s=s,
+    #                   equicorrelated=False,
+    #                   rho=rho,
+    #                   sigma=sigma,
+    #                   random_signs=True)[:3]
+
+    X = 1./np.sqrt(n) * np.ones((n,1))
+    beta = np.zeros(p)
+    signal = np.atleast_1d(signal)
+    if signal.shape == (1,):
+        beta[:s] = signal[0]
+    else:
+        beta[:s] = np.linspace(signal[0], signal[1], s)
+    beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
+    np.random.shuffle(beta)
+    Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma
 
     n, p = X.shape
+    #print("covariates X", X)
 
     W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p+1.)) * sigma
+    print("lambda", W)
 
     conv = const(X,
                  Y,
@@ -33,13 +49,25 @@ def test_onedim_lasso(n=5000, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000,
                  ridge_term=0.)
 
     signs = conv.fit()
+    #print("conjugate_arg from test", (1./9.)*(signs*np.sqrt(n)*np.mean(Y) - W))
+    print("target lin and target offset from test", signs, -W)
     nonzero = signs != 0
 
     if nonzero.sum():
+        target_Z = np.sqrt(n) * np.mean(Y)
+        target_transform = (-np.identity(1), np.zeros(1))
+        s = signs
+        opt_transform = (s * np.identity(1), (s * W) * np.ones(1))
+        approx_MLE = solve_UMVU(target_transform,
+                                opt_transform,
+                                target_Z,
+                                np.ones(1),
+                                (sigma**2.) * np.identity(1),
+                                (1./(sigma **2.))* np.identity(1))
 
         estimate, _, _, pv = conv.selective_MLE(target="full")
-        print(estimate, 'selective MLE')
-        print(beta[nonzero], 'truth')
+        print(estimate, approx_MLE, 'selective MLE')
+        print(sigma* beta[nonzero], 'truth')
         print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
         print(pv)
 

From a5eb9c6b8bed7f16e41102149533ef4efdf53d96 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 2 Mar 2018 10:25:14 -0800
Subject: [PATCH 495/617] match with selective UMVU computation

---
 selection/randomized/selective_MLE.py         |   4 +-
 .../tests/test_selective_MLE_onedim.py        | 119 ++++++++++--------
 2 files changed, 71 insertions(+), 52 deletions(-)

diff --git a/selection/randomized/selective_MLE.py b/selection/randomized/selective_MLE.py
index b8831001b..c01e82a80 100644
--- a/selection/randomized/selective_MLE.py
+++ b/selection/randomized/selective_MLE.py
@@ -86,8 +86,8 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
     sel_MLE, inv_hessian = mle_partial(target_observed)
 
     #print("shapes", target_precision.dot(sel_MLE).shape,  A.T.dot(randomizer_precision).shape, offset_term.shape)
-    implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value),
-                                   offset_term*np.ones((1,1))])
+    #implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value),
+    #                               offset_term*np.ones((1,1))])
 
     print("selective MLE", sel_MLE)
     return np.squeeze(sel_MLE)
diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index 36d60b914..6980b4e1e 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -8,13 +8,13 @@
 from selection.randomized.selective_MLE import solve_UMVU, solve_barrier_nonneg
 
 
-def test_onedim_lasso(n=200, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000, sigma=1., full=True, rho=0.4, randomizer_scale=1.):
+def test_onedim_lasso(n=200, p=1, signal_fac=1.5, signal=1., s=1, ndraw=5000, burnin=1000, sigma=1., full=True, rho=0.4, randomizer_scale=1.):
     """
     Compare to R randomized lasso
     """
 
     inst, const = gaussian_instance, highdim.gaussian
-    signal = signal_fac * np.sqrt(2 * np.log(p+1.))
+    #signal = signal_fac * np.sqrt(2 * np.log(p+1.))
 
     # X, Y, beta = inst(n=n,
     #                   p=p,
@@ -24,54 +24,73 @@ def test_onedim_lasso(n=200, p=1, signal_fac=1.5, s=1, ndraw=5000, burnin=1000,
     #                   rho=rho,
     #                   sigma=sigma,
     #                   random_signs=True)[:3]
-
-    X = 1./np.sqrt(n) * np.ones((n,1))
-    beta = np.zeros(p)
-    signal = np.atleast_1d(signal)
-    if signal.shape == (1,):
-        beta[:s] = signal[0]
-    else:
-        beta[:s] = np.linspace(signal[0], signal[1], s)
-    beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
-    np.random.shuffle(beta)
-    Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma
-
-    n, p = X.shape
-    #print("covariates X", X)
-
-    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p+1.)) * sigma
-    print("lambda", W)
-
-    conv = const(X,
-                 Y,
-                 W,
-                 randomizer_scale=randomizer_scale * sigma,
-                 ridge_term=0.)
-
-    signs = conv.fit()
-    #print("conjugate_arg from test", (1./9.)*(signs*np.sqrt(n)*np.mean(Y) - W))
-    print("target lin and target offset from test", signs, -W)
-    nonzero = signs != 0
-
-    if nonzero.sum():
-        target_Z = np.sqrt(n) * np.mean(Y)
-        target_transform = (-np.identity(1), np.zeros(1))
-        s = signs
-        opt_transform = (s * np.identity(1), (s * W) * np.ones(1))
-        approx_MLE = solve_UMVU(target_transform,
-                                opt_transform,
-                                target_Z,
-                                np.ones(1),
-                                (sigma**2.) * np.identity(1),
-                                (1./(sigma **2.))* np.identity(1))
-
-        estimate, _, _, pv = conv.selective_MLE(target="full")
-        print(estimate, approx_MLE, 'selective MLE')
-        print(sigma* beta[nonzero], 'truth')
-        print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
-        print(pv)
-
-test_onedim_lasso()
+    while True:
+        X = 1. / np.sqrt(n) * np.ones((n, 1))
+        beta = np.zeros(p)
+        signal = np.atleast_1d(signal)
+        if signal.shape == (1,):
+            beta[:s] = signal[0]
+        else:
+            beta[:s] = np.linspace(signal[0], signal[1], s)
+        #beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
+        #np.random.shuffle(beta)
+
+        Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma
+
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p + 1.)) * sigma
+
+        conv = const(X,
+                     Y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma,
+                     ridge_term=0.)
+
+        signs = conv.fit()
+        # print("conjugate_arg from test", (1./9.)*(signs*np.sqrt(n)*np.mean(Y) - W))
+        print("target lin and target offset from test", signs, -W)
+        nonzero = signs != 0
+        if nonzero.sum()>0:
+            break
+
+    target_Z = np.sqrt(n) * np.mean(Y)
+    target_transform = (-np.identity(1), np.zeros(1))
+    s = signs
+    opt_transform = (s * np.identity(1), (s * W) * np.ones(1))
+    approx_MLE = solve_UMVU(target_transform,
+                            opt_transform,
+                            target_Z,
+                            np.ones(1),
+                            (sigma ** 2.) * np.identity(1),
+                            (1. / (sigma ** 2.)) * np.identity(1))
+
+    estimate, _, _, pv = conv.selective_MLE(target="full")
+    print(estimate, approx_MLE, 'selective MLE')
+    print(beta[nonzero], 'truth')
+    print(np.linalg.pinv(X[:, nonzero]).dot(Y), 'relaxed')
+    print(pv)
+
+    return estimate, approx_MLE
+
+if __name__ == "__main__":
+
+    import matplotlib.pyplot as plt
+
+    fac_seq = np.linspace(-6., 6., 100)
+    MLE_now = []
+    MLE_prev = []
+    for i in range(100):
+        test = test_onedim_lasso(n=200, p=1, signal_fac=1.5, signal= fac_seq[i], s=1, ndraw=5000, burnin=1000,
+                                 sigma=1., full=True, rho=0.4,randomizer_scale=1.)
+
+        MLE_now.append(test[0])
+        MLE_prev.append(test[1])
+
+    plt.plot(fac_seq, np.array(MLE_now), label='MLE now')
+    plt.plot(fac_seq, np.array(MLE_prev), 'r--', label='MLE prev')
+    plt.legend()
+    plt.show()
 
 def main(nsim=500):
 

From 278dc10249c9bcb3babc483b46070073719c432d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 6 Mar 2018 10:09:45 -0800
Subject: [PATCH 496/617] removing unused selective_MLE module -- for now
 everything in query

---
 selection/randomized/selective_MLE.py | 154 --------------------------
 1 file changed, 154 deletions(-)
 delete mode 100644 selection/randomized/selective_MLE.py

diff --git a/selection/randomized/selective_MLE.py b/selection/randomized/selective_MLE.py
deleted file mode 100644
index c01e82a80..000000000
--- a/selection/randomized/selective_MLE.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import numpy as np
-import functools
-
-def solve_UMVU(target_transform,
-               opt_transform,
-               target_observed,
-               feasible_point,
-               target_cov,
-               randomizer_precision):
-
-    A, data_offset = target_transform # data_offset = N
-    B, opt_offset = opt_transform     # opt_offset = u
-
-    nopt = B.shape[1]
-    ntarget = A.shape[1]
-
-    # setup joint implied covariance matrix
-
-    target_precision = np.linalg.inv(target_cov)
-
-    implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
-    implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
-    implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
-    implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
-    implied_precision[ntarget:,ntarget:] = B.T.dot(randomizer_precision).dot(B)
-    implied_cov = np.linalg.inv(implied_precision)
-
-    implied_opt = implied_cov[ntarget:,ntarget:]
-    implied_target = implied_cov[:ntarget,:ntarget]
-    implied_cross = implied_cov[:ntarget,ntarget:]
-
-    L = implied_cross.dot(np.linalg.inv(implied_opt))
-    M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
-    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
-
-    conditioned_value = data_offset + opt_offset
-
-    linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
-    offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
-
-    natparam_transform = (linear_term, offset_term)
-    conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
-
-    conditional_precision = implied_precision[ntarget:,ntarget:]
-
-    M_1_inv = np.linalg.inv(M_1)
-    mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
-    mle_transform = (M_1_inv, -M_1_inv.dot(L), mle_offset_term)
-    var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
-                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
-
-    cross_covariance = np.linalg.inv(implied_precision[:ntarget, :ntarget]).dot(implied_precision[:ntarget, ntarget:])
-    var_matrices = (np.linalg.inv(implied_opt), np.linalg.inv(implied_precision[:ntarget,:ntarget]),
-                    cross_covariance,target_precision)
-
-    def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
-                feasible_point, conditional_precision, target_observed):
-
-        param_lin, param_offset = natparam_transform
-        mle_target_lin, mle_soln_lin, mle_offset = mle_transform
-
-        soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
-                                              conditional_precision,
-                                              feasible_point=feasible_point,
-                                              step=1,
-                                              nstep=2000,
-                                              tol=1.e-8)
-
-        selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
-
-        var_target_lin, var_offset = var_transform
-        var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
-        _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
-                                          var_precision,
-                                          feasible_point=None,
-                                          step=1,
-                                          nstep=2000)
-
-        hessian = target_precision.dot(inv_precision_target +
-                                       cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
-
-        return selective_MLE, np.linalg.inv(hessian)
-
-    mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, var_transform, var_matrices,
-                                    feasible_point, conditional_precision)
-    sel_MLE, inv_hessian = mle_partial(target_observed)
-
-    #print("shapes", target_precision.dot(sel_MLE).shape,  A.T.dot(randomizer_precision).shape, offset_term.shape)
-    #implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value),
-    #                               offset_term*np.ones((1,1))])
-
-    print("selective MLE", sel_MLE)
-    return np.squeeze(sel_MLE)
-        #, inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
-
-def solve_barrier_nonneg(conjugate_arg,
-                         precision,
-                         feasible_point=None,
-                         step=1,
-                         nstep=1000,
-                         tol=1.e-8):
-
-    scaling = np.sqrt(np.diag(precision))
-
-    if feasible_point is None:
-        feasible_point = 1. / scaling
-
-    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
-    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
-    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
-
-    current = feasible_point
-    current_value = np.inf
-
-    for itercount in range(nstep):
-        newton_step = grad(current)
-
-        # make sure proposal is feasible
-
-        count = 0
-        while True:
-            count += 1
-            proposal = current - step * newton_step
-            if np.all(proposal > 0):
-                break
-            step *= 0.5
-            if count >= 40:
-                raise ValueError('not finding a feasible point')
-
-        # make sure proposal is a descent
-
-        count = 0
-        while True:
-            proposal = current - step * newton_step
-            proposed_value = objective(proposal)
-            if proposed_value <= current_value:
-                break
-            step *= 0.5
-
-        # stop if relative decrease is small
-
-        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
-            current = proposal
-            current_value = proposed_value
-            break
-
-        current = proposal
-        current_value = proposed_value
-
-        if itercount % 4 == 0:
-            step *= 2
-
-    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
-    return current, current_value, hess
\ No newline at end of file

From e2f1c9f094b4a409cff5237058cb8d0fdbaeaa6f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 6 Mar 2018 10:41:20 -0800
Subject: [PATCH 497/617] using C code for barrier solving

---
 selection/randomized/lasso.py                 |  3 +-
 selection/randomized/query.py                 | 69 ++--------------
 selection/randomized/selective_MLE_utils.pyx  | 30 ++++++-
 .../randomized/tests/test_selective_MLE.py    | 82 +++----------------
 .../tests/test_selective_MLE_onedim.py        | 14 ++--
 5 files changed, 53 insertions(+), 145 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index b2924c5b8..60df7ecfb 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1746,11 +1746,10 @@ def selective_MLE(self,
         # working out conditional law of opt variables given
         # target after decomposing score wrt target
 
-        print(observed_target, cov_target, cov_target_score)
         return self.sampler.selective_MLE(observed_target, 
                                           cov_target, 
                                           cov_target_score, 
-                                          feasible_point=self.observed_opt_state,
+                                          self.observed_opt_state,
                                           solve_args=solve_args)
 
     # Targets of inference
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 418ed5c5b..567b43acb 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -6,7 +6,7 @@
 
 from regreg.affine import power_L
 
-# from .selective_MLE import solve_barrier_nonneg
+from .selective_MLE_utils import solve_barrier_nonneg
 
 from ..distributions.api import discrete_family
 from ..sampling.langevin import projected_langevin
@@ -495,10 +495,10 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
         conjugate_arg = prec_opt.dot(self.affine_con.mean)
 
         feasible_point = np.ones(prec_opt.shape[0])
-        soln, val, hess = solve_barrier_nonneg_(conjugate_arg,
-                                                prec_opt,
-                                                feasible_point=feasible_point,
-                                                **solve_args)
+        val, soln, hess = solve_barrier_nonneg(conjugate_arg,
+                                               prec_opt,
+                                               feasible_point,
+                                               **solve_args)
 
         final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(self.affine_con.mean - soln)))
 
@@ -694,63 +694,4 @@ def naive_pvalues(diag_cov, observed, parameter):
         pvalues[j] = 2 * min(pval, 1-pval)
     return pvalues
 
-def solve_barrier_nonneg_(conjugate_arg,
-                          precision,
-                          feasible_point=None,
-                          step=1,
-                          nstep=1000,
-                          tol=1.e-8):
-
-    scaling = np.sqrt(np.diag(precision))
-
-    if feasible_point is None:
-        feasible_point = 1. / scaling
-
-    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
-    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
-    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
-
-    current = feasible_point
-    current_value = np.inf
-
-    for itercount in range(nstep):
-        newton_step = grad(current)
-
-        # make sure proposal is feasible
-
-        count = 0
-        while True:
-            count += 1
-            proposal = current - step * newton_step
-            if np.all(proposal > 0):
-                break
-            step *= 0.5
-            if count >= 40:
-                raise ValueError('not finding a feasible point')
-
-        # make sure proposal is a descent
-
-        count = 0
-        while True:
-            proposal = current - step * newton_step
-            proposed_value = objective(proposal)
-            if proposed_value <= current_value:
-                break
-            step *= 0.5
-
-        # stop if relative decrease is small
-
-        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
-            current = proposal
-            current_value = proposed_value
-            break
-
-        current = proposal
-        current_value = proposed_value
-
-        if itercount % 4 == 0:
-            step *= 2
-
-    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
-    return current, current_value, hess
 
diff --git a/selection/randomized/selective_MLE_utils.pyx b/selection/randomized/selective_MLE_utils.pyx
index 5149946df..b5d2603d9 100644
--- a/selection/randomized/selective_MLE_utils.pyx
+++ b/selection/randomized/selective_MLE_utils.pyx
@@ -1,7 +1,5 @@
 import warnings
 import numpy as np, cython
-from regreg.api import power_L
-
 cimport numpy as np
 
 DTYPE_float = np.float
@@ -30,7 +28,7 @@ def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient v
                    np.ndarray[DTYPE_float_t, ndim=1] scaling,       # Diagonal scaling matrix for log barrier
                    double initial_step,
                    int max_iter=1000,
-                   double value_tol=1.e-6):
+                   double value_tol=1.e-8):
    
     ndim = precision.shape[0]
 
@@ -45,4 +43,28 @@ def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient v
                            value_tol,
                            initial_step)
 
-    return opt_variable, value
+    barrier_hessian = lambda u, v: (-1./((v + u)**2.) + 1./(u**2.))			  
+    hess = np.linalg.inv(precision + np.diag(barrier_hessian(opt_variable, scaling)))
+    return value, opt_variable, hess
+
+def solve_barrier_nonneg(conjugate_arg,
+                         precision,
+                         feasible_point,
+                         step=1,
+                         max_iter=1000,
+                         tol=1.e-8):
+
+    gradient = np.zeros_like(conjugate_arg)
+    opt_variable = np.asarray(feasible_point)
+    opt_proposed = opt_variable.copy()
+    scaling = np.sqrt(np.diag(precision))
+    
+    return barrier_solve_(gradient,
+                          opt_variable,
+                          opt_proposed,
+                          conjugate_arg,
+                          precision,
+                          scaling,
+                          step,
+                          max_iter=max_iter,
+                          value_tol=tol)
diff --git a/selection/randomized/tests/test_selective_MLE.py b/selection/randomized/tests/test_selective_MLE.py
index 09851c8cf..6e2f38b09 100644
--- a/selection/randomized/tests/test_selective_MLE.py
+++ b/selection/randomized/tests/test_selective_MLE.py
@@ -4,65 +4,7 @@
 from ...tests.decorators import set_seed_iftrue
 from ..selective_MLE_utils import barrier_solve_
 
-def solve_barrier_nonneg(conjugate_arg,
-                         precision,
-                         feasible_point=None,
-                         step=1,
-                         nstep=150,
-                         tol=1.e-8):
-
-    scaling = np.sqrt(np.diag(precision))
-
-    if feasible_point is None:
-        feasible_point = 1. / scaling
-
-    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
-    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
-    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
-
-    current = feasible_point
-    current_value = np.inf
-
-    for itercount in range(nstep):
-        newton_step = grad(current)
-
-        # make sure proposal is feasible
-
-        count = 0
-        while True:
-            count += 1
-            proposal = current - step * newton_step
-            if np.all(proposal > 0):
-                break
-            step *= 0.5
-            if count >= 40:
-                raise ValueError('not finding a feasible point')
-
-        # make sure proposal is a descent
-
-        count = 0
-        while True:
-            proposal = current - step * newton_step
-            proposed_value = objective(proposal)
-            if proposed_value <= current_value:
-                break
-            step *= 0.5
-
-        # stop if relative decrease is small
-
-        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
-            current = proposal
-            current_value = proposed_value
-            break
-
-        current = proposal
-        current_value = proposed_value
-
-        if itercount % 4 == 0:
-            step *= 2
-
-    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
-    return current, current_value, hess
+from .test_selective_MLE_onedim import solve_barrier_nonneg
 
 @set_seed_iftrue(True)
 def test_C_solver():
@@ -72,21 +14,23 @@ def test_C_solver():
     conjugate_arg = np.random.standard_normal(5)
 
 
-    soln1, val1, _ = solve_barrier_nonneg(conjugate_arg,
-                                          precision,
-                                          tol=1.e-12)
+    soln1, val1, hess1 = solve_barrier_nonneg(conjugate_arg,
+                                              precision,
+                                              tol=1.e-12)
 
     grad, opt_val, opt_proposed = np.ones((3, 5))
     scaling = np.sqrt(np.diag(precision))
 
-    soln2, val2 = barrier_solve_(grad,
-                                 opt_val,
-                                 opt_proposed,
-                                 conjugate_arg,
-                                 precision,
-                                 scaling,
-                                 value_tol=1.e-12)
+    val2, soln2, hess2 = barrier_solve_(grad,
+                                        opt_val,
+                                        opt_proposed,
+                                        conjugate_arg,
+                                        precision,
+                                        scaling,
+                                        1.,
+                                        value_tol=1.e-12)
 
     np.testing.assert_allclose(soln1, soln2, atol=1.e-4, rtol=1.e-4)
+    np.testing.assert_allclose(hess1, hess2, atol=1.e-4, rtol=1.e-4)
     assert (np.fabs(val1 - val2) < 1.e-4 * np.fabs(val1))
 
diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index 04be4a293..d8fe49256 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -36,7 +36,8 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=1, randomizer_scale=1):
 
             target_Z = X.T.dot(Y) / np.sqrt((X**2).sum(0))
 
-            estimate, I, Z, pv = conv.sampler.selective_MLE(target_Z, sigma**2 * np.ones((1,1)), -sigma**2 * np.ones((1,1)), None)
+            estimate, I, Z, pv = conv.sampler.selective_MLE(target_Z, sigma**2 * np.ones((1,1)), -sigma**2 * np.ones((1,1)), np.ones((1,)),
+                                                            solve_args={'tol':1.e-12})
 
             target_transform = (-np.identity(1), np.zeros(1))
             s = signs
@@ -48,7 +49,7 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=1, randomizer_scale=1):
                                     target_Z,
                                     np.ones(1),
                                     (sigma ** 2.) * np.identity(1),
-                                    (1. / (sigma ** 2.)) * np.identity(1))
+                                    (1. / (sigma ** 2.)) * np.identity(1), tol=1.e-12)
 
             print(estimate, approx_MLE, 'selective MLE')
             print(beta[nonzero], 'truth')
@@ -63,7 +64,7 @@ def test_agreement(seed=0):
 
     np.random.seed(seed)
 
-    beta_seq = np.linspace(-6., 6., 300)
+    beta_seq = np.hstack([np.linspace(-6., -2., 100), np.linspace(2, 6, 100)])
     MLE_check = []
     MLE_cur = []
     MLE_prev = []
@@ -81,7 +82,7 @@ def test_agreement(seed=0):
     MLE_prev = np.hstack(MLE_prev)
     pivot = np.hstack(pivot)
 
-    np.testing.assert_allclose(MLE_check, MLE_prev)
+    np.testing.assert_allclose(MLE_check, MLE_prev, rtol=1.e-5)
     nt.assert_true(np.linalg.norm(MLE_cur - MLE_prev) / np.linalg.norm(MLE_prev) < 1.e-2)
 
     return beta_seq, MLE_cur, MLE_prev, pivot
@@ -110,7 +111,8 @@ def solve_UMVU(target_transform,
                target_observed,
                feasible_point,
                target_cov,
-               randomizer_precision):
+               randomizer_precision,
+               tol=1.e-8):
 
     A, data_offset = target_transform # data_offset = N
     B, opt_offset = opt_transform     # opt_offset = u
@@ -168,7 +170,7 @@ def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
                                               feasible_point=feasible_point,
                                               step=1,
                                               nstep=2000,
-                                              tol=1.e-8)
+                                              tol=tol)
 
         selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
 

From 76b4eaecf9bec6c3234eaeb2baacf1678bc818ff Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 6 Mar 2018 12:44:59 -0800
Subject: [PATCH 498/617] producing selective MLE intervals

---
 selection/randomized/query.py                 |   8 +-
 .../randomized/tests/test_highdim_lasso.py    |  15 +--
 .../tests/test_selective_MLE_high.py          | 109 ++++++++++++++++++
 3 files changed, 120 insertions(+), 12 deletions(-)
 create mode 100644 selection/randomized/tests/test_selective_MLE_high.py

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 567b43acb..92801be46 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -475,7 +475,7 @@ def sample(self, ndraw, burnin):
                                        ndraw=ndraw,
                                        burnin=burnin)
 
-    def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_point, solve_args={}):
+    def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_point, solve_args={}, alpha=0.1):
         """
         Selective MLE based on approximation of
         CGF.
@@ -509,7 +509,11 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
         Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
         pvalues = ndist.cdf(Z_scores)
         pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
-        return final_estimator, observed_info_mean, Z_scores, pvalues
+
+        quantile = ndist.ppf(1 - alpha / 2.)
+        intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
+                               final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
+        return final_estimator, observed_info_mean, Z_scores, pvalues, intervals
 
 class optimization_intervals(object):
 
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index 4a31b8df2..50c15096c 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -9,7 +9,7 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-def test_highdim_lasso(n=200, p=50, signal_fac=1.5, s=5, ndraw=5000, burnin=1000, sigma=3, full=True, rho=0.4, randomizer_scale=1):
+def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1):
     """
     Compare to R randomized lasso
     """
@@ -27,22 +27,17 @@ def test_highdim_lasso(n=200, p=50, signal_fac=1.5, s=5, ndraw=5000, burnin=1000
 
     n, p = X.shape
 
-    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma
+    sigma_ = np.std(Y)
+    W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma_
 
     conv = const(X, 
                  Y, 
                  W, 
-                 randomizer_scale=randomizer_scale * sigma)
+                 randomizer_scale=randomizer_scale * sigma_)
     
     signs = conv.fit()
     nonzero = signs != 0
 
-    estimate, _, _, pv = conv.selective_MLE(target="full")
-    print(estimate, 'selective MLE')
-    print(beta[nonzero], 'truth')
-    print(np.linalg.pinv(X[:,nonzero]).dot(Y), 'relaxed')
-    print(pv[beta[nonzero] == 0], pv[beta[nonzero] != 0])
-
     if full:
         _, pval, intervals = conv.summary(target="full",
                                           ndraw=ndraw,
@@ -103,7 +98,7 @@ def main(nsim=500):
             p0, pA = [], []
         P0.extend(p0)
         PA.extend(pA)
-        print(np.mean(P0), np.std(P0), np.mean(np.array(PA) < 0.05))
+        print(np.mean(P0), np.std(P0), np.mean(np.array(PA) < 0.05), 'null pvalue + power')
     
         if i % 3 == 0 and i > 0:
             U = np.linspace(0, 1, 101)
diff --git a/selection/randomized/tests/test_selective_MLE_high.py b/selection/randomized/tests/test_selective_MLE_high.py
new file mode 100644
index 000000000..76b054f89
--- /dev/null
+++ b/selection/randomized/tests/test_selective_MLE_high.py
@@ -0,0 +1,109 @@
+import numpy as np
+import nose.tools as nt
+import rpy2.robjects as rpy
+from rpy2.robjects import numpy2ri
+rpy.r('library(selectiveInference)')
+
+import selection.randomized.lasso as L; reload(L)
+from selection.randomized.lasso import highdim 
+from selection.tests.instance import gaussian_instance
+import matplotlib.pyplot as plt
+
+def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, randomizer_scale=1):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, highdim.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p, 
+                      signal=signal, 
+                      s=s, 
+                      equicorrelated=False, 
+                      rho=rho, 
+                      sigma=sigma, 
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer_scale=randomizer_scale * sigma_)
+    
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    estimate, _, _, pval, intervals = conv.selective_MLE(target="full")
+
+    coverage = (beta[nonzero] > intervals[:,0]) * (beta[nonzero] < intervals[:,1])
+    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage
+
+def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4, randomizer_scale=1):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, highdim.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p, 
+                      signal=signal, 
+                      s=s, 
+                      equicorrelated=False, 
+                      rho=rho, 
+                      sigma=sigma, 
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer_scale=randomizer_scale * sigma_)
+    
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    estimate, _, _, pval, intervals = conv.selective_MLE(target="selected")
+
+    beta_target = np.linalg.pinv(X[:,nonzero]).dot(X.dot(beta))
+
+    coverage = (beta_target > intervals[:,0]) * (beta_target < intervals[:,1])
+    return pval[beta_target == 0], pval[beta_target != 0], coverage
+
+def main(nsim=500, full=True):
+
+    P0, PA, cover = [], [], []
+    from statsmodels.distributions import ECDF
+
+    n, p, s = 500, 200, 20
+
+    for i in range(nsim):
+        if full:
+            p0, pA, cover_ = test_full_targets(n=n, p=p, s=s)
+        else:
+            p0, pA, cover_ = test_selected_targets(n=n, p=p, s=s)
+
+        cover.extend(cover_)
+        P0.extend(p0)
+        PA.extend(pA)
+        print(np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.05), np.mean(np.array(PA) < 0.05), np.mean(cover), 'null pvalue + power')
+    
+        if i % 3 == 0 and i > 0:
+            U = np.linspace(0, 1, 101)
+            plt.clf()
+            if len(P0) > 0:
+                plt.plot(U, ECDF(P0)(U))
+            if len(PA) > 0:
+                plt.plot(U, ECDF(PA)(U), 'r')
+            plt.plot([0, 1], [0, 1], 'k--')
+            plt.savefig("plot.pdf")
+    plt.show()

From 94cac3e3f58288c5d167d4852099f495e263d7bc Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 6 Mar 2018 12:54:30 -0800
Subject: [PATCH 499/617] allowed estimate of disperision

---
 .../tests/test_selective_MLE_high.py          | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/selection/randomized/tests/test_selective_MLE_high.py b/selection/randomized/tests/test_selective_MLE_high.py
index 76b054f89..28990ad4a 100644
--- a/selection/randomized/tests/test_selective_MLE_high.py
+++ b/selection/randomized/tests/test_selective_MLE_high.py
@@ -9,7 +9,7 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, randomizer_scale=1):
+def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, randomizer_scale=1, full_dispersion=True):
     """
     Compare to R randomized lasso
     """
@@ -38,12 +38,16 @@ def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, rand
     signs = conv.fit()
     nonzero = signs != 0
 
-    estimate, _, _, pval, intervals = conv.selective_MLE(target="full")
+    dispersion = None
+    if full_dispersion:
+        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y)))**2 / (n - p)
+
+    estimate, _, _, pval, intervals = conv.selective_MLE(target="full", dispersion=dispersion)
 
     coverage = (beta[nonzero] > intervals[:,0]) * (beta[nonzero] < intervals[:,1])
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage
 
-def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4, randomizer_scale=1):
+def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4, randomizer_scale=1, full_dispersion=True):
     """
     Compare to R randomized lasso
     """
@@ -72,14 +76,18 @@ def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4,
     signs = conv.fit()
     nonzero = signs != 0
 
-    estimate, _, _, pval, intervals = conv.selective_MLE(target="selected")
+    dispersion = None
+    if full_dispersion:
+        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y)))**2 / (n - p)
+
+    estimate, _, _, pval, intervals = conv.selective_MLE(target="selected", dispersion=dispersion)
 
     beta_target = np.linalg.pinv(X[:,nonzero]).dot(X.dot(beta))
 
     coverage = (beta_target > intervals[:,0]) * (beta_target < intervals[:,1])
     return pval[beta_target == 0], pval[beta_target != 0], coverage
 
-def main(nsim=500, full=True):
+def main(nsim=500, full=True, full_dispersion=False):
 
     P0, PA, cover = [], [], []
     from statsmodels.distributions import ECDF
@@ -88,9 +96,9 @@ def main(nsim=500, full=True):
 
     for i in range(nsim):
         if full:
-            p0, pA, cover_ = test_full_targets(n=n, p=p, s=s)
+            p0, pA, cover_ = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
         else:
-            p0, pA, cover_ = test_selected_targets(n=n, p=p, s=s)
+            p0, pA, cover_ = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
 
         cover.extend(cover_)
         P0.extend(p0)

From f78389c822e649ed58e33f8fc92d5c9bfa1a8232 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 6 Mar 2018 16:58:16 -0800
Subject: [PATCH 500/617] commit changes in test

---
 .../tests/test_selective_MLE_onedim.py        | 335 +++++++++++++-----
 1 file changed, 237 insertions(+), 98 deletions(-)

diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index 6980b4e1e..210a63f9d 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -1,121 +1,260 @@
+import functools
+
 import numpy as np
+from scipy.stats import norm as ndist
+import matplotlib.pyplot as plt
 import nose.tools as nt
 
-import selection.randomized.lasso as L; reload(L)
-from selection.randomized.lasso import highdim
-from selection.tests.instance import gaussian_instance
-import matplotlib.pyplot as plt
-from selection.randomized.selective_MLE import solve_UMVU, solve_barrier_nonneg
+from ..lasso import highdim
+from ...tests.instance import gaussian_instance
+from statsmodels.distributions import ECDF
 
+def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=1, randomizer_scale=1):
 
-def test_onedim_lasso(n=200, p=1, signal_fac=1.5, signal=1., s=1, ndraw=5000, burnin=1000, sigma=1., full=True, rho=0.4, randomizer_scale=1.):
-    """
-    Compare to R randomized lasso
-    """
+    beta = np.array([signal])
+    while True:
+        X = np.random.standard_normal((n, 1))
+        X /= np.sqrt((X**2).sum(0))[None, :]
+        Y = X.dot(beta) + sigma * np.random.standard_normal(n)
 
-    inst, const = gaussian_instance, highdim.gaussian
-    #signal = signal_fac * np.sqrt(2 * np.log(p+1.))
+        conv = highdim.gaussian(X,
+                                Y,
+                                W * np.ones(X.shape[1]),
+                                randomizer_scale=randomizer_scale * sigma,
+                                ridge_term=0.)
 
-    # X, Y, beta = inst(n=n,
-    #                   p=p,
-    #                   signal=signal,
-    #                   s=s,
-    #                   equicorrelated=False,
-    #                   rho=rho,
-    #                   sigma=sigma,
-    #                   random_signs=True)[:3]
-    while True:
-        X = 1. / np.sqrt(n) * np.ones((n, 1))
-        beta = np.zeros(p)
-        signal = np.atleast_1d(signal)
-        if signal.shape == (1,):
-            beta[:s] = signal[0]
-        else:
-            beta[:s] = np.linspace(signal[0], signal[1], s)
-        #beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
-        #np.random.shuffle(beta)
+        signs = conv.fit()
+        nonzero = signs != 0
 
-        Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma
+        if nonzero.sum():
 
-        n, p = X.shape
+            # this is current code where we estimate sigma
 
-        W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p + 1.)) * sigma
+            estimate_cur, I_cur, Z_cur, pv_cur = conv.selective_MLE(target="full")
 
-        conv = const(X,
-                     Y,
-                     W,
-                     randomizer_scale=randomizer_scale * sigma,
-                     ridge_term=0.)
+            # this matches exactly with old code
 
-        signs = conv.fit()
-        # print("conjugate_arg from test", (1./9.)*(signs*np.sqrt(n)*np.mean(Y) - W))
-        print("target lin and target offset from test", signs, -W)
-        nonzero = signs != 0
-        if nonzero.sum()>0:
-            break
+            target_Z = X.T.dot(Y) / np.sqrt((X**2).sum(0))
 
-    target_Z = np.sqrt(n) * np.mean(Y)
-    target_transform = (-np.identity(1), np.zeros(1))
-    s = signs
-    opt_transform = (s * np.identity(1), (s * W) * np.ones(1))
-    approx_MLE = solve_UMVU(target_transform,
-                            opt_transform,
-                            target_Z,
-                            np.ones(1),
-                            (sigma ** 2.) * np.identity(1),
-                            (1. / (sigma ** 2.)) * np.identity(1))
+            estimate, I, Z, pv = conv.sampler.selective_MLE(target_Z, sigma**2 * np.ones((1,1)), -sigma**2 * np.ones((1,1)), np.ones((1,)),
+                                                            solve_args={'tol':1.e-12})
 
-    estimate, _, _, pv = conv.selective_MLE(target="full")
-    print(estimate, approx_MLE, 'selective MLE')
-    print(beta[nonzero], 'truth')
-    print(np.linalg.pinv(X[:, nonzero]).dot(Y), 'relaxed')
-    print(pv)
+            target_transform = (-np.identity(1), np.zeros(1))
+            s = signs
+            opt_transform = (s * np.identity(1), (s * W) * np.ones(1))
+            beta_hat = X.T.dot(Y) / np.sum(X**2, 0)
+            sigma_ = np.linalg.norm(Y - X.dot(beta_hat)) / np.sqrt(n-1)
+            approx_MLE = solve_UMVU(target_transform,
+                                    opt_transform,
+                                    target_Z,
+                                    np.ones(1),
+                                    (sigma ** 2.) * np.identity(1),
+                                    (1. / (sigma ** 2.)) * np.identity(1), tol=1.e-12)
 
-    return estimate, approx_MLE
+            print(estimate, approx_MLE, 'selective MLE')
+            print(beta[nonzero], 'truth')
+            print(np.linalg.pinv(X[:, nonzero]).dot(Y), 'relaxed')
+            print(pv, 'pv')
 
-if __name__ == "__main__":
+            pivot = ndist.cdf((estimate_cur - signal) / np.sqrt(I_cur[0,0]))
+            print(pivot, 'pivot')
+            return estimate, estimate_cur, np.atleast_1d(approx_MLE), pivot
 
-    import matplotlib.pyplot as plt
+def test_agreement(seed=0):
 
-    fac_seq = np.linspace(-6., 6., 100)
-    MLE_now = []
+    np.random.seed(seed)
+
+    beta_seq = np.hstack([np.linspace(-6., -2., 100), np.linspace(2, 6, 100)])
+    MLE_check = []
+    MLE_cur = []
     MLE_prev = []
-    for i in range(100):
-        test = test_onedim_lasso(n=200, p=1, signal_fac=1.5, signal= fac_seq[i], s=1, ndraw=5000, burnin=1000,
-                                 sigma=1., full=True, rho=0.4,randomizer_scale=1.)
+    pivot = []
+    for signal in beta_seq:
+        test = test_onedim_lasso(n=2000, signal=signal, sigma=1.,randomizer_scale=1.)
+
+        MLE_check.append(test[0])
+        MLE_cur.append(test[1])
+        MLE_prev.append(test[2])
+        pivot.append(test[3])
+
+    MLE_check = np.hstack(MLE_check)
+    MLE_cur = np.hstack(MLE_cur)
+    MLE_prev = np.hstack(MLE_prev)
+    pivot = np.hstack(pivot)
+
+    np.testing.assert_allclose(MLE_check, MLE_prev, rtol=1.e-5)
+    nt.assert_true(np.linalg.norm(MLE_cur - MLE_prev) / np.linalg.norm(MLE_prev) < 1.e-2)
+
+    return beta_seq, MLE_cur, MLE_prev, pivot
 
-        MLE_now.append(test[0])
-        MLE_prev.append(test[1])
+def main():
 
-    plt.plot(fac_seq, np.array(MLE_now), label='MLE now')
-    plt.plot(fac_seq, np.array(MLE_prev), 'r--', label='MLE prev')
+    beta_seq, MLE_cur, MLE_prev, pivot = test_agreement()
+
+    plt.figure(num=1)
+
+    plt.plot(beta_seq, np.array(MLE_cur), label='MLE now')
+    plt.plot(beta_seq, np.array(MLE_prev), 'r--', label='MLE prev')
     plt.legend()
-    plt.show()
-
-def main(nsim=500):
-
-    P0, PA = [], []
-    from statsmodels.distributions import ECDF
-
-    n, p = 500, 200
-
-    for i in range(nsim):
-        try:
-            p0, pA = test_highdim_lasso(n=n, p=p, full=True)
-        except:
-            p0, pA = [], []
-        P0.extend(p0)
-        PA.extend(pA)
-        print(np.mean(P0), np.std(P0), np.mean(np.array(PA) < 0.05))
-
-        if i % 3 == 0 and i > 0:
-            U = np.linspace(0, 1, 101)
-            plt.clf()
-            if len(P0) > 0:
-                plt.plot(U, ECDF(P0)(U))
-            if len(PA) > 0:
-                plt.plot(U, ECDF(PA)(U), 'r')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.savefig("plot.pdf")
-    plt.show()
 
+    plt.figure(num=2)
+    U = np.linspace(0, 1, 101)
+    plt.plot(U, ECDF(pivot)(U))
+    plt.plot([0,1],[0,1], 'k--')
+
+#####################################################
+
+# Old selection.randomized.selective_MLE module
+
+def solve_UMVU(target_transform,
+               opt_transform,
+               target_observed,
+               feasible_point,
+               target_cov,
+               randomizer_precision,
+               tol=1.e-8):
+
+    A, data_offset = target_transform # data_offset = N
+    B, opt_offset = opt_transform     # opt_offset = u
+
+    nopt = B.shape[1]
+    ntarget = A.shape[1]
+
+    # setup joint implied covariance matrix
+
+    target_precision = np.linalg.inv(target_cov)
+
+    implied_precision = np.zeros((ntarget + nopt, ntarget + nopt))
+    implied_precision[:ntarget,:ntarget] = A.T.dot(randomizer_precision).dot(A) + target_precision
+    implied_precision[:ntarget,ntarget:] = A.T.dot(randomizer_precision).dot(B)
+    implied_precision[ntarget:,:ntarget] = B.T.dot(randomizer_precision).dot(A)
+    implied_precision[ntarget:,ntarget:] = B.T.dot(randomizer_precision).dot(B)
+    implied_cov = np.linalg.inv(implied_precision)
+
+    implied_opt = implied_cov[ntarget:,ntarget:]
+    implied_target = implied_cov[:ntarget,:ntarget]
+    implied_cross = implied_cov[:ntarget,ntarget:]
+
+    L = implied_cross.dot(np.linalg.inv(implied_opt))
+    M_1 = np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(target_precision)
+    M_2 = -np.linalg.inv(implied_precision[:ntarget,:ntarget]).dot(A.T.dot(randomizer_precision))
+
+    conditioned_value = data_offset + opt_offset
+
+    linear_term = implied_precision[ntarget:,ntarget:].dot(implied_cross.T.dot(np.linalg.inv(implied_target)))
+    offset_term = -B.T.dot(randomizer_precision).dot(conditioned_value)
+
+    natparam_transform = (linear_term, offset_term)
+    conditional_natural_parameter = linear_term.dot(target_observed) + offset_term
+
+    conditional_precision = implied_precision[ntarget:,ntarget:]
+
+    M_1_inv = np.linalg.inv(M_1)
+    mle_offset_term = - M_1_inv.dot(M_2.dot(conditioned_value))
+    mle_transform = (M_1_inv, -M_1_inv.dot(L), mle_offset_term)
+    var_transform = (-implied_precision[ntarget:,:ntarget].dot(M_1),
+                     -implied_precision[ntarget:,:ntarget].dot(M_2.dot(conditioned_value)))
+
+    cross_covariance = np.linalg.inv(implied_precision[:ntarget, :ntarget]).dot(implied_precision[:ntarget, ntarget:])
+    var_matrices = (np.linalg.inv(implied_opt), np.linalg.inv(implied_precision[:ntarget,:ntarget]),
+                    cross_covariance,target_precision)
+
+    def mle_map(natparam_transform, mle_transform, var_transform, var_matrices,
+                feasible_point, conditional_precision, target_observed):
+
+        param_lin, param_offset = natparam_transform
+        mle_target_lin, mle_soln_lin, mle_offset = mle_transform
+
+        soln, value, _ = solve_barrier_nonneg(param_lin.dot(target_observed) + param_offset,
+                                              conditional_precision,
+                                              feasible_point=feasible_point,
+                                              step=1,
+                                              nstep=2000,
+                                              tol=tol)
+
+        selective_MLE = mle_target_lin.dot(target_observed) + mle_soln_lin.dot(soln) + mle_offset
+
+        var_target_lin, var_offset = var_transform
+        var_precision, inv_precision_target, cross_covariance, target_precision =  var_matrices
+        _, _, hess = solve_barrier_nonneg(var_target_lin.dot(selective_MLE) + var_offset + mle_offset,
+                                          var_precision,
+                                          feasible_point=None,
+                                          step=1,
+                                          nstep=2000)
+
+        hessian = target_precision.dot(inv_precision_target +
+                                       cross_covariance.dot(hess).dot(cross_covariance.T)).dot(target_precision)
+
+        return selective_MLE, np.linalg.inv(hessian)
+
+    mle_partial = functools.partial(mle_map, natparam_transform, mle_transform, var_transform, var_matrices,
+                                    feasible_point, conditional_precision)
+    sel_MLE, inv_hessian = mle_partial(target_observed)
+
+    #print("shapes", target_precision.dot(sel_MLE).shape,  A.T.dot(randomizer_precision).shape, offset_term.shape)
+    #implied_parameter = np.hstack([target_precision.dot(sel_MLE)-A.T.dot(randomizer_precision).dot(conditioned_value),
+    #                               offset_term*np.ones((1,1))])
+
+    print("selective MLE", sel_MLE)
+    return np.squeeze(sel_MLE)
+        #, inv_hessian, mle_partial, implied_cov, implied_cov.dot(implied_parameter), mle_transform
+
+def solve_barrier_nonneg(conjugate_arg,
+                         precision,
+                         feasible_point=None,
+                         step=1,
+                         nstep=1000,
+                         tol=1.e-8):
+
+    scaling = np.sqrt(np.diag(precision))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
+
+    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
+    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
+    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        newton_step = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * newton_step
+            if np.all(proposal > 0):
+                break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            proposal = current - step * newton_step
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
+
+    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
+    return current, current_value, hess

From 1b4c1ae1162bb2e96694f83dfef108b3adc91f09 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 8 Mar 2018 17:41:55 -0800
Subject: [PATCH 501/617] starting to work on sqrtlasso

---
 selection/randomized/lasso.py                 | 93 +++++++++++++------
 .../randomized/tests/test_highdim_lasso.py    | 44 ++++++++-
 2 files changed, 110 insertions(+), 27 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 60df7ecfb..085de9d16 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -9,6 +9,7 @@
 import regreg.affine as ra
 
 from ..constraints.affine import constraints
+from ..algorithms.sqrt_lasso import solve_sqrt_lasso, choose_lambda
 
 from .query import (query, 
                     multiple_queries,
@@ -1397,7 +1398,8 @@ def __init__(self,
                  loglike, 
                  feature_weights,
                  ridge_term,
-                 randomizer_scale):
+                 randomizer_scale,
+                 perturb=None):
         r"""
 
         Create a new post-selection object for the LASSO problem
@@ -1418,6 +1420,9 @@ def __init__(self,
         randomizer_scale : float
             Scale for IID components of randomization.
 
+        perturb : np.ndarray
+            Random perturbation subtracted as a linear
+            term in the objective function.
 
         """
 
@@ -1431,7 +1436,7 @@ def __init__(self,
         self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
         self.ridge_term = ridge_term
         self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
-
+        self._initial_omega = perturb # random perturbation
 
     def fit(self, 
             solve_args={'tol':1.e-12, 'min_its':50}, 
@@ -1455,10 +1460,11 @@ def fit(self,
 
         p = self.nfeature
 
-        if perturb is None:
-            perturb = self.randomizer.sample()
         self._initial_omega = perturb
-        quad = rr.identity_quadratic(self.ridge_term, 0, -perturb)
+        if self._initial_omega is None:
+            self._initial_omega = self.randomizer.sample()
+
+        quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega)
         problem = rr.simple_problem(self.loglike, self.penalty)
         self.initial_soln = problem.solve(quad, **solve_args)
 
@@ -1841,11 +1847,9 @@ def gaussian(X,
                  Y, 
                  feature_weights, 
                  sigma=1.,
-                 parametric_cov_estimator=False,
                  quadratic=None,
                  ridge_term=None,
-                 randomizer_scale=None,
-                 randomizer='gaussian'):
+                 randomizer_scale=None):
         r"""
         Squared-error LASSO with feature weights.
 
@@ -1910,7 +1914,7 @@ def gaussian(X,
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
         return highdim(loglike, np.asarray(feature_weights) / sigma**2,
-                     ridge_term, randomizer_scale)
+                       ridge_term, randomizer_scale)
 
 
     @staticmethod
@@ -1918,10 +1922,8 @@ def logistic(X,
                  successes, 
                  feature_weights, 
                  trials=None,
-                 parametric_cov_estimator=False,
                  quadratic=None,
                  ridge_term=None,
-                 randomizer='gaussian',
                  randomizer_scale=None):
         r"""
         Logistic LASSO with feature weights.
@@ -1997,10 +1999,8 @@ def coxph(X,
               times, 
               status, 
               feature_weights,
-              parametric_cov_estimator=False,
               quadratic=None,
               ridge_term=None,
-              randomizer='gaussian',
               randomizer_scale=None):
         r"""
         Cox proportional hazards LASSO with feature weights.
@@ -2080,11 +2080,9 @@ def coxph(X,
     def poisson(X, 
                 counts, 
                 feature_weights,
-                parametric_cov_estimator=False,
                 quadratic=None,
                 ridge_term=None,
-                randomizer_scale=None,
-                randomizer='gaussian'):
+                randomizer_scale=None):
         r"""
         Poisson log-linear LASSO with feature weights.
 
@@ -2157,11 +2155,9 @@ def sqrt_lasso(X,
                    Y, 
                    feature_weights, 
                    quadratic=None,
-                   parametric_cov_estimator=False,
-                   sigma_estimate='truncated',
-                   solve_args={'min_its':200},
+                   ridge_term=None,
                    randomizer_scale=None,
-                   randomizer='gaussian'):
+                   solve_args={'min_its':200}):
         r"""
         Use sqrt-LASSO to choose variables.
 
@@ -2199,11 +2195,6 @@ def sqrt_lasso(X,
             used to estimate covariance for inference
             in second stage.
 
-        sigma_estimate : str
-            One of 'truncated' or 'OLS'. Method
-            used to estimate $\sigma$ when using
-            parametric covariance.
-
         solve_args : dict
             Arguments passed to solver.
 
@@ -2233,5 +2224,55 @@ def sqrt_lasso(X,
 
         """
 
-        raise NotImplementedError
+        n, p = X.shape
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+
+        mean_diag = np.mean((X**2).sum(0))
+        if ridge_term is None:
+            ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.sqrt(n / (n - 1.))
+
+        ridge_term = 0.
+
+        perturb = np.random.standard_normal(p) * randomizer_scale
+        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0) # a ridge + linear term
+
+        if quadratic is not None:
+            totalQ = randomQ + quadratic
+        else:
+            totalQ = randomQ
+
+        soln, sqrt_loss = solve_sqrt_lasso(X, Y, weights=feature_weights, quadratic=totalQ, solve_args={'min_its':1000, 'tol':1.e-12})
+        active_set = (soln != 0)
+        X_A = X[:,active_set]
+        unrestricted_soln = np.linalg.pinv(X_A).dot(Y)
+#        sigma_hat = np.linalg.norm(Y - X_A.dot(unrestricted_soln)) / np.sqrt(n - active_set.sum())
+        denom = np.linalg.norm(Y - X.dot(soln))
+        subgrad_ = perturb - X.T.dot(X.dot(soln) - Y) / denom
+        coef, center, linear_term, cons = totalQ.coef, totalQ.center, totalQ.linear_term, totalQ.constant_term
+        rescaledQ = rr.identity_quadratic(coef * denom,
+                                          center,
+                                          linear_term * denom,
+                                          cons * denom)
+
+        loglike = rr.glm.gaussian(X, Y, coef=1., quadratic=rescaledQ)
+        
+        # sanity check
+
+        new_weights = feature_weights * denom
+        pen = rr.weighted_l1norm(new_weights, lagrange=1.)
+        prob = rr.simple_problem(loglike, pen)
+        soln2 = prob.solve(quadratic=rescaledQ, min_its=500, tol=1.e-12)
+
+        stop
+
+        return highdim(loglike, np.asarray(feature_weights),
+                       ridge_term * denom, 
+                       randomizer_scale * denom, 
+                       perturb=perturb * denom)
+
 
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index 50c15096c..056fdefdd 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -7,9 +7,10 @@
 import selection.randomized.lasso as L; reload(L)
 from selection.randomized.lasso import highdim 
 from selection.tests.instance import gaussian_instance
+from selection.algorithms.sqrt_lasso import choose_lambda
 import matplotlib.pyplot as plt
 
-def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1):
+def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000):
     """
     Compare to R randomized lasso
     """
@@ -51,6 +52,47 @@ def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rh
 
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
+def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, highdim.sqrt_lasso
+    signal = np.sqrt(signal_fac * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p, 
+                      signal=signal, 
+                      s=s, 
+                      equicorrelated=False, 
+                      rho=rho, 
+                      sigma=sigma, 
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    W = np.ones(X.shape[1]) * choose_lambda(X) * 0.5
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer_scale=randomizer_scale / np.sqrt(n))
+    
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    if full:
+        _, pval, intervals = conv.summary(target="full",
+                                          ndraw=ndraw,
+                                          burnin=burnin, 
+                                          compute_intervals=False)
+    else:
+        _, pval, intervals = conv.summary(target="selected",
+                                          ndraw=ndraw,
+                                          burnin=burnin, 
+                                          compute_intervals=False)
+
+    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
+
 def test_compareR(n=200, p=10, signal=np.sqrt(4) * np.sqrt(2 * np.log(10)), s=5, ndraw=5000, burnin=1000, param=True, sigma=3):
     """
     Compare to R randomized lasso

From ece9a1d7411efbd043cc855f9e3682dfa5d3c57a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 9 Mar 2018 09:22:19 -0800
Subject: [PATCH 502/617] BF: the skinny problem doesn't work with perturbation
 without modification

---
 selection/algorithms/sqrt_lasso.py               |  7 +++++--
 selection/randomized/lasso.py                    | 16 ++++++++++------
 selection/randomized/tests/test_highdim_lasso.py |  2 +-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/selection/algorithms/sqrt_lasso.py b/selection/algorithms/sqrt_lasso.py
index e1f99face..783bd8a23 100644
--- a/selection/algorithms/sqrt_lasso.py
+++ b/selection/algorithms/sqrt_lasso.py
@@ -239,7 +239,7 @@ def l2norm_glm(X,
                   initial=initial,
                   offset=offset)
 
-def solve_sqrt_lasso(X, Y, weights=None, initial=None, quadratic=None, solve_args={}):
+def solve_sqrt_lasso(X, Y, weights=None, initial=None, quadratic=None, solve_args={}, force_fat=False):
     """
 
     Solve the square-root LASSO optimization problem:
@@ -273,7 +273,7 @@ def solve_sqrt_lasso(X, Y, weights=None, initial=None, quadratic=None, solve_arg
         A quadratic term added to objective function.
     """
     n, p = X.shape
-    if n > p:
+    if n > p and not force_fat:
         return solve_sqrt_lasso_skinny(X, Y, weights=weights, initial=initial, quadratic=quadratic, solve_args=solve_args)
     else:
         return solve_sqrt_lasso_fat(X, Y, weights=weights, initial=initial, quadratic=quadratic, solve_args=solve_args)
@@ -449,6 +449,9 @@ def solve_sqrt_lasso_skinny(X, Y, weights=None, initial=None, quadratic=None, so
 
     soln = problem.solve(new_quadratic, **solve_args)
     _loss = sqlasso_objective(X, Y)
+    subgrad2 = _loss.smooth_objective(soln[:-1], 'grad') + new_quadratic.objective(soln, 'grad')[:-1]
+    subgrad = loss.smooth_objective(soln, 'grad') + new_quadratic.objective(soln, 'grad')
+    print(subgrad[soln != 0])
     return soln[:-1], _loss
 
 def estimate_sigma(observed, truncated_df, lower_bound, upper_bound, untruncated_df=0, factor=3, npts=50, nsample=2000):
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 085de9d16..a8c42cf0a 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -2246,12 +2246,18 @@ def sqrt_lasso(X,
         else:
             totalQ = randomQ
 
-        soln, sqrt_loss = solve_sqrt_lasso(X, Y, weights=feature_weights, quadratic=totalQ, solve_args={'min_its':1000, 'tol':1.e-12})
+        soln, sqrt_loss = solve_sqrt_lasso(X, 
+                                           Y, 
+                                           weights=feature_weights, 
+                                           quadratic=totalQ, 
+                                           solve_args={'min_its':1000, 'tol':1.e-12},
+                                           force_fat=True)
         active_set = (soln != 0)
         X_A = X[:,active_set]
         unrestricted_soln = np.linalg.pinv(X_A).dot(Y)
-#        sigma_hat = np.linalg.norm(Y - X_A.dot(unrestricted_soln)) / np.sqrt(n - active_set.sum())
+
         denom = np.linalg.norm(Y - X.dot(soln))
+
         subgrad_ = perturb - X.T.dot(X.dot(soln) - Y) / denom
         coef, center, linear_term, cons = totalQ.coef, totalQ.center, totalQ.linear_term, totalQ.constant_term
         rescaledQ = rr.identity_quadratic(coef * denom,
@@ -2259,16 +2265,14 @@ def sqrt_lasso(X,
                                           linear_term * denom,
                                           cons * denom)
 
-        loglike = rr.glm.gaussian(X, Y, coef=1., quadratic=rescaledQ)
+        loglike = rr.glm.gaussian(X, Y, coef=1.)
         
         # sanity check
 
         new_weights = feature_weights * denom
         pen = rr.weighted_l1norm(new_weights, lagrange=1.)
         prob = rr.simple_problem(loglike, pen)
-        soln2 = prob.solve(quadratic=rescaledQ, min_its=500, tol=1.e-12)
-
-        stop
+        soln2 = prob.solve(quadratic=rescaledQ, min_its=1000, tol=1.e-12)
 
         return highdim(loglike, np.asarray(feature_weights),
                        ridge_term * denom, 
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index 056fdefdd..aec64c9e0 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -70,7 +70,7 @@ def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=Tru
 
     n, p = X.shape
 
-    W = np.ones(X.shape[1]) * choose_lambda(X) * 0.5
+    W = np.ones(X.shape[1]) * choose_lambda(X)
 
     conv = const(X, 
                  Y, 

From ae30a14938739456aac2ae94fffc0df838e64fd1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 9 Mar 2018 10:27:36 -0800
Subject: [PATCH 503/617] NF: randomized sqrtLASSO implemented for highdim and
 general, only tested for highdim (as others)

---
 selection/algorithms/sqrt_lasso.py            |   3 -
 selection/randomized/lasso.py                 | 207 +++++++-----------
 .../randomized/tests/test_highdim_lasso.py    |  59 ++++-
 3 files changed, 123 insertions(+), 146 deletions(-)

diff --git a/selection/algorithms/sqrt_lasso.py b/selection/algorithms/sqrt_lasso.py
index 783bd8a23..e29409892 100644
--- a/selection/algorithms/sqrt_lasso.py
+++ b/selection/algorithms/sqrt_lasso.py
@@ -449,9 +449,6 @@ def solve_sqrt_lasso_skinny(X, Y, weights=None, initial=None, quadratic=None, so
 
     soln = problem.solve(new_quadratic, **solve_args)
     _loss = sqlasso_objective(X, Y)
-    subgrad2 = _loss.smooth_objective(soln[:-1], 'grad') + new_quadratic.objective(soln, 'grad')[:-1]
-    subgrad = loss.smooth_objective(soln, 'grad') + new_quadratic.objective(soln, 'grad')
-    print(subgrad[soln != 0])
     return soln[:-1], _loss
 
 def estimate_sigma(observed, truncated_df, lower_bound, upper_bound, untruncated_df=0, factor=3, npts=50, nsample=2000):
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index a8c42cf0a..47b4752ac 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -669,7 +669,8 @@ def __init__(self,
                  ridge_term,
                  randomizer_scale,
                  randomizer='gaussian',
-                 parametric_cov_estimator=False):
+                 parametric_cov_estimator=False,
+                 perturb=None):
         r"""
 
         Create a new post-selection object for the LASSO problem
@@ -716,6 +717,7 @@ def __init__(self,
 
         self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
 
+        self._initial_omega = perturb
 
     def fit(self, 
             solve_args={'tol':1.e-12, 'min_its':50}, 
@@ -738,12 +740,15 @@ def fit(self,
              
         """
 
+        if perturb is not None:
+            self._initial_omega = perturb
+
         p = self.nfeature
         if self.parametric_cov_estimator==True:
             self._view = glm_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
         else:
             self._view = glm_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
-        self._view.solve(nboot=nboot, perturb=perturb, solve_args=solve_args)
+        self._view.solve(nboot=nboot, perturb=self._initial_omega, solve_args=solve_args)
 
         self.signs = np.sign(self._view.initial_soln)
         self.selection_variable = self._view.selection_variable
@@ -875,7 +880,8 @@ def gaussian(X,
                  quadratic=None,
                  ridge_term=None,
                  randomizer_scale=None,
-                 randomizer='gaussian'):
+                 randomizer='gaussian',
+                 perturb=None):
         r"""
         Squared-error LASSO with feature weights.
 
@@ -939,9 +945,13 @@ def gaussian(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        return lasso(loglike, np.asarray(feature_weights) / sigma**2,
-                     ridge_term, randomizer_scale, randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator)
+        return lasso(loglike, 
+                     np.asarray(feature_weights) / sigma**2,
+                     ridge_term, 
+                     randomizer_scale, 
+                     randomizer=randomizer,
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     perturb=perturb)
 
     @staticmethod
     def logistic(X, 
@@ -952,7 +962,8 @@ def logistic(X,
                  quadratic=None,
                  ridge_term=None,
                  randomizer='gaussian',
-                 randomizer_scale=None):
+                 randomizer_scale=None,
+                 perturb=None):
         r"""
         Logistic LASSO with feature weights.
 
@@ -1023,7 +1034,8 @@ def logistic(X,
                      ridge_term, 
                      randomizer_scale,
                      parametric_cov_estimator=parametric_cov_estimator,
-                     randomizer=randomizer)
+                     randomizer=randomizer,
+                     perturb=perturb)
 
     @staticmethod
     def coxph(X, 
@@ -1034,7 +1046,8 @@ def coxph(X,
               quadratic=None,
               ridge_term=None,
               randomizer='gaussian',
-              randomizer_scale=None):
+              randomizer_scale=None,
+              perturb=None):
         r"""
         Cox proportional hazards LASSO with feature weights.
 
@@ -1109,7 +1122,8 @@ def coxph(X,
                      ridge_term,
                      randomizer_scale, 
                      randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator)
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     perturb=perturb)
 
     @staticmethod
     def poisson(X, 
@@ -1119,7 +1133,8 @@ def poisson(X,
                 quadratic=None,
                 ridge_term=None,
                 randomizer_scale=None,
-                randomizer='gaussian'):
+                randomizer='gaussian',
+                perturb=None):
         r"""
         Poisson log-linear LASSO with feature weights.
 
@@ -1187,7 +1202,8 @@ def poisson(X,
                      ridge_term,
                      randomizer_scale, 
                      randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator)
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     perturb=perturb)
 
     @staticmethod
     def sqrt_lasso(X, 
@@ -1198,7 +1214,7 @@ def sqrt_lasso(X,
                    sigma_estimate='truncated',
                    solve_args={'min_its':200},
                    randomizer_scale=None,
-                   randomizer='gaussian'):
+                   perturb=None):
         r"""
         Use sqrt-LASSO to choose variables.
 
@@ -1250,9 +1266,6 @@ def sqrt_lasso(X,
         randomizer_scale : float
             Scale for IID components of randomizer.
 
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
 
@@ -1270,109 +1283,49 @@ def sqrt_lasso(X,
 
         """
 
-        raise NotImplementedError
-
         n, p = X.shape
 
-        # scale for randomization seems kind of meaningless here...
-
-        mean_diag = np.mean((X**2).sum(0))
-        ridge_term = (np.std(Y)**2 * mean_diag / np.sqrt(n)) * n / (n - 1.)
-        randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
-
         if np.asarray(feature_weights).shape == ():
-            feature_weights = np.ones(p) * feature_weights
-        feature_weights = np.asarray(feature_weights)
-
-        # TODO: refits sqrt lasso more than once -- make an override for avoiding refitting?
-
-        soln = solve_sqrt_lasso(X, Y, weights=feature_weights, quadratic=quadratic, solve_args=solve_args)[0]
-
-        # find active set, and estimate of sigma
-
-        active = (soln != 0)
-        nactive = active.sum()
-
-        if nactive:
-
-            subgrad = np.sign(soln[active]) * feature_weights[active]
-            X_E = X[:,active]
-            X_Ei = np.linalg.pinv(X_E)
-            sigma_E = np.linalg.norm(Y - X_E.dot(X_Ei.dot(Y))) / np.sqrt(n - nactive)
-            multiplier = np.sqrt((n - nactive) / (1 - np.linalg.norm(X_Ei.T.dot(subgrad))**2))
-
-            # check truncation interval for sigma_E
-
-            # the KKT conditions imply an inequality like
-            # \hat{\sigma}_E \cdot LHS \leq RHS
-
-            penalized = feature_weights[active] != 0
-
-            if penalized.sum():
-                D_E = np.sign(soln[active][penalized]) # diagonal matrix of signs
-                LHS = D_E * np.linalg.solve(X_E.T.dot(X_E), subgrad)[penalized]
-                RHS = D_E * X_Ei.dot(Y)[penalized] 
-
-                ratio = RHS / LHS
-
-                group1 = LHS > 0
-                upper_bound = np.inf
-                if group1.sum():
-                    upper_bound = min(upper_bound, np.min(ratio[group1])) # necessarily these will have RHS > 0
+            feature_weights = np.ones(loglike.shape) * feature_weights
 
-                group2 = (LHS <= 0) * (RHS <= 0) # we can ignore the other possibility since this gives a lower bound of 0
-                lower_bound = 0
-                if group2.sum():
-                    lower_bound = max(lower_bound, np.max(ratio[group2]))
+        mean_diag = np.mean((X**2).sum(0))
+        if ridge_term is None:
+            ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
 
-                upper_bound /= multiplier
-                lower_bound /= multiplier
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.sqrt(n / (n - 1.))
 
-            else:
-                lower_bound = 0
-                upper_bound = np.inf
-
-            _sigma_estimator_args = (sigma_E, 
-                                     n - nactive,
-                                     lower_bound, 
-                                     upper_bound)
-
-            if sigma_estimate == 'truncated':
-                _sigma_hat = estimate_sigma(*_sigma_estimator_args)
-            elif sigma_estimate == 'OLS':
-                _sigma_hat = sigma_E
-            else:
-                raise ValueError('sigma_estimate must be one of ["truncated", "OLS"]')
-        else:
-            _sigma_hat = np.linalg.norm(Y) / np.sqrt(n)
-            multiplier = np.sqrt(n)
-            sigma_E = _sigma_hat
+        if perturb is None:
+            perturb = np.random.standard_normal(p) * randomizer_scale
 
-        # XXX how should quadratic be changed?
-        # multiply everything by sigma_E?
+        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0) # a ridge + linear term
 
         if quadratic is not None:
-            qc = quadratic.collapsed()
-            qc.coef *= np.sqrt(n - nactive) / sigma_E
-            qc.linear_term *= np.sqrt(n - nactive) / sigma_E
-            quadratic = qc
+            totalQ = randomQ + quadratic
+        else:
+            totalQ = randomQ
 
-        loglike = rr.glm.gaussian(X, Y, quadratic=quadratic)
+        soln, sqrt_loss = solve_sqrt_lasso(X, 
+                                           Y, 
+                                           weights=feature_weights, 
+                                           quadratic=totalQ, 
+                                           solve_args=solve_args,
+                                           force_fat=True)
 
-        L = lasso(loglike, feature_weights * multiplier * sigma_E,
-                  parametric_cov_estimator=parametric_cov_estimator,
-                  ignore_inactive_constraints=True)
+        denom = np.linalg.norm(Y - X.dot(soln))
 
-        # these arguments are reused for data carving
+        loglike = rr.glm.gaussian(X, Y)
+        
+        raise NotImplementedError('lasso_view needs to be modified so that the initial randomization can be set at construction time')
 
-        if nactive:
-            L._sigma_hat = _sigma_hat
-            L._sigma_estimator_args = _sigma_estimator_args
-            L._weight_multiplier = multiplier * sigma_E
-            L._multiplier = multiplier
-            L.lasso_solution = soln
+        return lasso(loglike, 
+                     np.asarray(feature_weights) * denom, 
+                     ridge_term * denom, 
+                     randomizer_scale * denom, 
+                     randomizer='gaussian',
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     perturb=perturb)
 
-        return L
 
 #### High dimensional version
 #### - parametric covariance
@@ -1460,11 +1413,13 @@ def fit(self,
 
         p = self.nfeature
 
-        self._initial_omega = perturb
+        # take a new perturbation if supplied
+        if perturb is not None:
+            self._initial_omega = perturb
         if self._initial_omega is None:
             self._initial_omega = self.randomizer.sample()
 
-        quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega)
+        quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0)
         problem = rr.simple_problem(self.loglike, self.penalty)
         self.initial_soln = problem.solve(quad, **solve_args)
 
@@ -2157,7 +2112,8 @@ def sqrt_lasso(X,
                    quadratic=None,
                    ridge_term=None,
                    randomizer_scale=None,
-                   solve_args={'min_its':200}):
+                   solve_args={'min_its':200},
+                   perturb=None):
         r"""
         Use sqrt-LASSO to choose variables.
 
@@ -2236,9 +2192,9 @@ def sqrt_lasso(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.sqrt(n / (n - 1.))
 
-        ridge_term = 0.
+        if perturb is None:
+            perturb = np.random.standard_normal(p) * randomizer_scale
 
-        perturb = np.random.standard_normal(p) * randomizer_scale
         randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0) # a ridge + linear term
 
         if quadratic is not None:
@@ -2250,33 +2206,18 @@ def sqrt_lasso(X,
                                            Y, 
                                            weights=feature_weights, 
                                            quadratic=totalQ, 
-                                           solve_args={'min_its':1000, 'tol':1.e-12},
+                                           solve_args=solve_args,
                                            force_fat=True)
-        active_set = (soln != 0)
-        X_A = X[:,active_set]
-        unrestricted_soln = np.linalg.pinv(X_A).dot(Y)
 
         denom = np.linalg.norm(Y - X.dot(soln))
-
-        subgrad_ = perturb - X.T.dot(X.dot(soln) - Y) / denom
-        coef, center, linear_term, cons = totalQ.coef, totalQ.center, totalQ.linear_term, totalQ.constant_term
-        rescaledQ = rr.identity_quadratic(coef * denom,
-                                          center,
-                                          linear_term * denom,
-                                          cons * denom)
-
-        loglike = rr.glm.gaussian(X, Y, coef=1.)
+        loglike = rr.glm.gaussian(X, Y)
         
-        # sanity check
+        obj = highdim(loglike, np.asarray(feature_weights) * denom,
+                      ridge_term * denom, 
+                      randomizer_scale * denom, 
+                      perturb=perturb * denom)
+        obj._sqrt_soln = soln
 
-        new_weights = feature_weights * denom
-        pen = rr.weighted_l1norm(new_weights, lagrange=1.)
-        prob = rr.simple_problem(loglike, pen)
-        soln2 = prob.solve(quadratic=rescaledQ, min_its=1000, tol=1.e-12)
-
-        return highdim(loglike, np.asarray(feature_weights),
-                       ridge_term * denom, 
-                       randomizer_scale * denom, 
-                       perturb=perturb * denom)
+        return obj
 
 
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index aec64c9e0..e3f18c919 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -1,13 +1,18 @@
+from __future__ import division, print_function
+
 import numpy as np
 import nose.tools as nt
+
+import regreg.api as rr
+
 import rpy2.robjects as rpy
 from rpy2.robjects import numpy2ri
 rpy.r('library(selectiveInference)')
 
 import selection.randomized.lasso as L; reload(L)
-from selection.randomized.lasso import highdim 
-from selection.tests.instance import gaussian_instance
-from selection.algorithms.sqrt_lasso import choose_lambda
+from ..lasso import highdim 
+from ...tests.instance import gaussian_instance
+from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
 import matplotlib.pyplot as plt
 
 def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000):
@@ -52,13 +57,14 @@ def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rh
 
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
-def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000):
+def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1., ndraw=5000, burnin=1000, 
+                            ridge_term=None, compare_to_lasso=True):
     """
     Compare to R randomized lasso
     """
 
     inst, const = gaussian_instance, highdim.sqrt_lasso
-    signal = np.sqrt(signal_fac * np.log(p))
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
     X, Y, beta = inst(n=n,
                       p=p, 
                       signal=signal, 
@@ -68,18 +74,48 @@ def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=Tru
                       sigma=sigma, 
                       random_signs=True)[:3]
 
-    n, p = X.shape
+    if ridge_term is None:
+        mean_diag = np.mean((X**2).sum(0))
+        ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+
+    W = np.ones(X.shape[1]) * choose_lambda(X) * 0.7
 
-    W = np.ones(X.shape[1]) * choose_lambda(X)
+    perturb = np.random.standard_normal(p) * randomizer_scale / np.sqrt(n)
 
     conv = const(X, 
                  Y, 
                  W, 
-                 randomizer_scale=randomizer_scale / np.sqrt(n))
+                 randomizer_scale=randomizer_scale / np.sqrt(n),
+                 perturb=perturb,
+                 ridge_term=ridge_term)
     
     signs = conv.fit()
     nonzero = signs != 0
 
+    # sanity check
+
+    if compare_to_lasso:
+        q_term = rr.identity_quadratic(ridge_term, 0, -perturb, 0)
+
+        soln2, sqrt_loss = solve_sqrt_lasso(X, Y, W, solve_args={'min_its':1000}, quadratic=q_term, force_fat=True)
+        soln = conv.initial_soln
+
+        denom = np.linalg.norm(Y - X.dot(soln))
+        new_weights = W * denom
+        loss = rr.glm.gaussian(X, Y)
+        pen = rr.weighted_l1norm(new_weights, lagrange=1.)
+        prob = rr.simple_problem(loss, pen)
+
+        rescaledQ = rr.identity_quadratic(ridge_term * denom,
+                                          0,
+                                          -perturb * denom,
+                                          0)
+
+        soln3 = prob.solve(quadratic=rescaledQ, min_its=1000, tol=1.e-12)
+        np.testing.assert_allclose(conv._initial_omega, perturb * denom)
+        np.testing.assert_allclose(soln, soln2)
+        np.testing.assert_allclose(soln, soln3)
+
     if full:
         _, pval, intervals = conv.summary(target="full",
                                           ndraw=ndraw,
@@ -126,7 +162,7 @@ def test_compareR(n=200, p=10, signal=np.sqrt(4) * np.sqrt(2 * np.log(10)), s=5,
     assert np.linalg.norm(conv.sampler.affine_con.mean - cond_mean[:,0]) / np.linalg.norm(cond_mean[:,0]) < 1.e-3
 
 
-def main(nsim=500):
+def main(nsim=500, sqrt=False, full=True):
 
     P0, PA = [], []
     from statsmodels.distributions import ECDF
@@ -135,7 +171,10 @@ def main(nsim=500):
 
     for i in range(nsim):
         try:
-            p0, pA = test_highdim_lasso(n=n, p=p, full=True)
+            if not sqrt:
+                p0, pA = test_highdim_lasso(n=n, p=p, full=full)
+            else:
+                p0, pA = test_sqrt_highdim_lasso(n=n, p=p, full=full, compare_to_lasso=False)
         except:
             p0, pA = [], []
         P0.extend(p0)

From a1ecae0f7684a6318ae358dd854dbf4349a9d58c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 9 Mar 2018 12:27:03 -0800
Subject: [PATCH 504/617] BF: fixing ridge scale of sqrtLASSO, cleaning up some
 ratios of sqrt n,n-1

---
 selection/randomized/lasso.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 47b4752ac..b4d60f8a5 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -940,7 +940,7 @@ def gaussian(X,
 
         mean_diag = np.mean((X**2).sum(0))
         if ridge_term is None:
-            ridge_term = (np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
@@ -1025,7 +1025,7 @@ def logistic(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = (np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 
@@ -1112,7 +1112,7 @@ def coxph(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = (np.std(times) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
@@ -1192,7 +1192,7 @@ def poisson(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = (np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
@@ -1290,7 +1290,7 @@ def sqrt_lasso(X,
 
         mean_diag = np.mean((X**2).sum(0))
         if ridge_term is None:
-            ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.sqrt(n / (n - 1.))
@@ -1863,7 +1863,7 @@ def gaussian(X,
 
         mean_diag = np.mean((X**2).sum(0))
         if ridge_term is None:
-            ridge_term = (np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
@@ -1941,7 +1941,7 @@ def logistic(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = (np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 
@@ -2021,7 +2021,7 @@ def coxph(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = (np.std(times) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
@@ -2095,7 +2095,7 @@ def poisson(X,
         mean_diag = np.mean((X**2).sum(0))
 
         if ridge_term is None:
-            ridge_term = (np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n-1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
@@ -2183,14 +2183,14 @@ def sqrt_lasso(X,
         n, p = X.shape
 
         if np.asarray(feature_weights).shape == ():
-            feature_weights = np.ones(loglike.shape) * feature_weights
+            feature_weights = np.ones(p) * feature_weights
 
         mean_diag = np.mean((X**2).sum(0))
         if ridge_term is None:
-            ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+            ridge_term = np.sqrt(mean_diag) / (n - 1)
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.sqrt(n / (n - 1.))
+            randomizer_scale = 0.5 * np.sqrt(mean_diag) / np.sqrt(n-1)
 
         if perturb is None:
             perturb = np.random.standard_normal(p) * randomizer_scale

From 41b5eb198f469116c4f810ca3f74cf337b1c5397 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 13 Mar 2018 13:12:08 -0700
Subject: [PATCH 505/617] including line search for debiasing matrix, not
 tested yet

---
 selection/algorithms/debiased_lasso.py        | 109 ++++++++++++++++++
 selection/algorithms/debiased_lasso_utils.pyx |  60 +++++++++-
 2 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index b7976c1d5..c4d4dbab2 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -6,6 +6,115 @@
 
 #from .debiased_lasso_utils import solve_wide_
 from ..constraints.affine import constraints
+from .debiased_lasso_utils import solve_wide_
+
+def debiasing_row(X,
+                  j, 
+                  delta=None,
+                  linesearch=True,     # do a linesearch?
+                  scaling_factor=1.5,  # multiplicative factor for linesearch
+                  max_active=None,     # how big can active set get?
+                  max_try=10,          # how many steps in linesearch?
+                  warn_kkt=FALSE,      # warn if KKT does not seem to be satisfied?
+                  max_iter=50,         # how many iterations for each optimization problem
+                  kkt_stop=True,       # stop based on KKT conditions?
+                  parameter_stop=True, # stop based on relative convergence of parameter?
+                  objective_stop=True, # stop based on relative decrease in objective?
+                  kkt_tol=1.e-4,       # tolerance for the KKT conditions
+                  parameter_tol=1.e-4, # tolerance for relative convergence of parameter
+                  objective_tol=1.e-4  # tolerance for relative decrease in objective
+                  ):
+    """
+    Find a row of debiasing matrix using line search of
+    Javanmard and Montanari.
+
+    """
+
+    n, p = X.shape
+
+    if max_active is None:
+      max_active = min(n, p)
+
+    soln = np.zeros(p)
+    ever_active = np.zeros(p, np.int)
+    ever_active[0] = row
+    nactive = 1
+
+    linear_func = np.zeros(p)
+    linear_func[row] = -1
+    gradient = linear_func.copy()
+
+    counter_idx = 1
+    incr = 0;
+
+    last_output = None
+
+    Xsoln = np.zeros(n) # X\hat{\beta}
+
+    while (counter_idx < max_try):
+
+        result = solve_wide_(Xinfo,                      # this is a design matrix
+                             as.numeric(rep(bound, p)),  # vector of Lagrange multipliers
+                             0,                          # ridge_term 
+                             max_iter, 
+                             soln, 
+                             linear_func, 
+                             gradient, 
+                             Xsoln,
+                             ever_active, 
+                             nactive, 
+                             kkt_tol, 
+                             objective_tol, 
+                             parameter_tol,
+                             max_active,
+                             kkt_stop,
+                             objective_stop,	
+                             parameter_stop)
+
+      iter = result$iter
+
+      # Logic for whether we should continue the line search
+
+      if not linesearch: break
+
+      if counter_idx == 1:
+          if iter == (max_iter+1):
+              incr = 1 # was the original problem feasible? 1 if not
+          else:
+              incr = 0 # original problem was feasible
+
+      if incr == 1: # trying to find a feasible point
+         if iter < (max_iter+1) and counter_idx > 1:
+             break
+         bound = bound * scaling_factor;
+      else if iter == (max_iter + 1) and counter_idx > 1:
+            result = last_output # problem seems infeasible because we didn't solve it
+   	    break               # so we revert to previously found solution
+      
+      bound = bound / scaling_factor
+
+      # If the active set has grown to a certain size
+      # then we stop, presuming problem has become
+      # infeasible.
+
+      # We revert to the previous solution
+	
+      if result['max_active_check']:
+	  result = last_output
+	  break
+      
+      counter_idx += 1
+      last_output = {'soln':result['soln'],
+                     'kkt_check':result['kkt_check']}
+
+    # Check feasibility
+
+    if warn_kkt and not result$kkt_check:
+        warning("Solution for row of M does not seem to be feasible")
+
+    return {'soln':result['soln'],
+            'kkt_check':result['kkt_check'],
+            'gradient':result['gradient']}
 
 def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):
     """
diff --git a/selection/algorithms/debiased_lasso_utils.pyx b/selection/algorithms/debiased_lasso_utils.pyx
index d0992cd09..8bd2b37e0 100644
--- a/selection/algorithms/debiased_lasso_utils.pyx
+++ b/selection/algorithms/debiased_lasso_utils.pyx
@@ -32,7 +32,26 @@ cdef extern from "debias.h":
                   int objective_stop,         # Break based on convergence of objective value? #
                   int parameter_stop)         # Break based on parameter convergence? #
 
-
+   int check_KKT_wide(double *theta_ptr,        # current theta #
+                      double *gradient_ptr,     # X^TX/ncase times theta + linear_func#
+                      double *X_theta_ptr,      # Current fitted values #
+                      double *X_ptr,            # Sqrt of non-neg def matrix -- X^TX/ncase = nndef #
+                      double *linear_func_ptr,  # Linear term in objective #   
+                      int *need_update_ptr,     # Which coordinates need to be updated? #
+                      int nfeature,             # how many columns in X #
+                      int ncase,                # how many rows in X #
+                      double *bound_ptr,        # Lagrange multiplers for \ell_1 #
+                      double ridge_term,        # Ridge / ENet term #
+                      double tol)               # precision for checking KKT conditions #        
+   
+   void update_gradient_wide(double *gradient_ptr,     # X^TX/ncase times theta + linear_func #
+                             double *X_theta_ptr,      # Current fitted values #
+                             double *X_ptr,            # Sqrt of non-neg def matrix -- X^TX/ncase = nndef #
+                             double *linear_func_ptr,  # Linear term in objective #   
+                             int *need_update_ptr,     # Which coordinates need to be updated? #
+                             int nfeature,             # how many columns in X #
+                             int ncase)                # how many rows in X #
+   
 def solve_wide_(np.ndarray[DTYPE_float_t, ndim=2] X,            # Sqrt of non-neg def matrix -- X^TX/ncase = nndef 
                 np.ndarray[DTYPE_float_t, ndim=1] X_theta,      # Fitted values   #
                 np.ndarray[DTYPE_float_t, ndim=1] linear_func,  # Linear term in objective #
@@ -79,3 +98,42 @@ def solve_wide_(np.ndarray[DTYPE_float_t, ndim=2] X,            # Sqrt of non-ne
                parameter_stop,
                objective_stop)
 
+    # Check whether feasible
+
+    ncase = X.shape[0]
+    nfeature = X.shape[1]
+
+    kkt_check = check_KKT_wide(<double *>theta.data,
+                                <double *>gradient.data,
+                                <double *>X_theta.data,
+                                <double *>X.data,
+                                <double *>linear_func.data,
+                                <int *>need_update.data,
+                                ncase,
+                                nfeature,
+                                <double *>bound.data,
+                                ridge_term,
+                                kkt_tol)
+
+    max_active_check = nactive[0] >= max_active
+
+    # Make sure gradient is updated -- essentially a matrix multiply
+
+    update_gradient_wide(<double *>gradient.data,
+                          <double *>X_theta.data,
+                          <double *>X.data,
+                          <double *>linear_func.data,
+                          <int *>need_update.data,
+                          ncase,
+                          nfeature)
+
+    return {'soln':theta,
+            'gradient':gradient,
+            'X_theta':X_theta,
+            'linear_func':linear_func,
+            'iter':iter,
+            'kkt_check':kkt_check,
+            'ever_active':ever_active,
+            'nactive':nactive,
+            'max_active_check':max_active_check}
+              

From df99f81a0258278d01c121e31e5dd6cd5f0d06e3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 13 Mar 2018 16:15:32 -0700
Subject: [PATCH 506/617] WIP: trying to match with previous C function call

---
 selection/algorithms/debiased_lasso.py        | 220 ++++++++++--------
 selection/algorithms/debiased_lasso_utils.pyx |  67 +++---
 .../algorithms/tests/test_debiased_lasso.py   |  24 +-
 3 files changed, 168 insertions(+), 143 deletions(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index c4d4dbab2..6baff7bf7 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -1,29 +1,32 @@
+from warnings import warn
+
 import numpy as np
+from scipy.stats import norm as ndist
+
 from regreg.api import (quadratic_loss,
                         identity_quadratic,
                         l1norm,
                         simple_problem)
 
-#from .debiased_lasso_utils import solve_wide_
 from ..constraints.affine import constraints
 from .debiased_lasso_utils import solve_wide_
 
-def debiasing_row(X,
-                  j, 
-                  delta=None,
-                  linesearch=True,     # do a linesearch?
-                  scaling_factor=1.5,  # multiplicative factor for linesearch
-                  max_active=None,     # how big can active set get?
-                  max_try=10,          # how many steps in linesearch?
-                  warn_kkt=FALSE,      # warn if KKT does not seem to be satisfied?
-                  max_iter=50,         # how many iterations for each optimization problem
-                  kkt_stop=True,       # stop based on KKT conditions?
-                  parameter_stop=True, # stop based on relative convergence of parameter?
-                  objective_stop=True, # stop based on relative decrease in objective?
-                  kkt_tol=1.e-4,       # tolerance for the KKT conditions
-                  parameter_tol=1.e-4, # tolerance for relative convergence of parameter
-                  objective_tol=1.e-4  # tolerance for relative decrease in objective
-                  ):
+def debiasing_matrix(X,
+                     rows, 
+                     bound=None,
+                     linesearch=True,     # do a linesearch?
+                     scaling_factor=1.5,  # multiplicative factor for linesearch
+                     max_active=None,     # how big can active set get?
+                     max_try=10,          # how many steps in linesearch?
+                     warn_kkt=False,      # warn if KKT does not seem to be satisfied?
+                     max_iter=50,         # how many iterations for each optimization problem
+                     kkt_stop=True,       # stop based on KKT conditions?
+                     parameter_stop=True, # stop based on relative convergence of parameter?
+                     objective_stop=True, # stop based on relative decrease in objective?
+                     kkt_tol=1.e-4,       # tolerance for the KKT conditions
+                     parameter_tol=1.e-4, # tolerance for relative convergence of parameter
+                     objective_tol=1.e-4  # tolerance for relative decrease in objective
+                     ):
     """
     Find a row of debiasing matrix using line search of
     Javanmard and Montanari.
@@ -33,88 +36,105 @@ def debiasing_row(X,
     n, p = X.shape
 
     if max_active is None:
-      max_active = min(n, p)
+        max_active = max(50, 0.3 * n)
 
-    soln = np.zeros(p)
-    ever_active = np.zeros(p, np.int)
-    ever_active[0] = row
-    nactive = 1
+    rows = np.atleast_1d(rows)
+    M = np.zeros((len(rows), p))
 
-    linear_func = np.zeros(p)
-    linear_func[row] = -1
-    gradient = linear_func.copy()
+    nndef_diag = (X**2).sum(0) / n
+
+    for idx, row in enumerate(rows):
+
+        soln = np.zeros(p)
+        soln_old = np.zeros(p)
+        ever_active = np.zeros(p, np.int)
+        ever_active[0] = row
+        nactive = np.array([1], np.int)
+
+        linear_func = np.zeros(p)
+        linear_func[row] = -1
+        gradient = linear_func.copy()
+
+        counter_idx = 1
+        incr = 0;
+
+        last_output = None
+
+        Xsoln = np.zeros(n) # X\hat{\beta}
+
+        bound_vec = np.zeros(p) * bound
+        ridge_term = 0
+
+        need_update = np.zeros(p, np.int)
+
+        while (counter_idx < max_try):
 
-    counter_idx = 1
-    incr = 0;
-
-    last_output = None
-
-    Xsoln = np.zeros(n) # X\hat{\beta}
-
-    while (counter_idx < max_try):
-
-        result = solve_wide_(Xinfo,                      # this is a design matrix
-                             as.numeric(rep(bound, p)),  # vector of Lagrange multipliers
-                             0,                          # ridge_term 
-                             max_iter, 
-                             soln, 
-                             linear_func, 
-                             gradient, 
-                             Xsoln,
-                             ever_active, 
-                             nactive, 
-                             kkt_tol, 
-                             objective_tol, 
-                             parameter_tol,
-                             max_active,
-                             kkt_stop,
-                             objective_stop,	
-                             parameter_stop)
-
-      iter = result$iter
-
-      # Logic for whether we should continue the line search
-
-      if not linesearch: break
-
-      if counter_idx == 1:
-          if iter == (max_iter+1):
-              incr = 1 # was the original problem feasible? 1 if not
-          else:
-              incr = 0 # original problem was feasible
-
-      if incr == 1: # trying to find a feasible point
-         if iter < (max_iter+1) and counter_idx > 1:
-             break
-         bound = bound * scaling_factor;
-      else if iter == (max_iter + 1) and counter_idx > 1:
-            result = last_output # problem seems infeasible because we didn't solve it
-   	    break               # so we revert to previously found solution
-      
-      bound = bound / scaling_factor
-
-      # If the active set has grown to a certain size
-      # then we stop, presuming problem has become
-      # infeasible.
-
-      # We revert to the previous solution
-	
-      if result['max_active_check']:
-	  result = last_output
-	  break
-      
-      counter_idx += 1
-      last_output = {'soln':result['soln'],
-                     'kkt_check':result['kkt_check']}
-
-    # Check feasibility
-
-    if warn_kkt and not result$kkt_check:
-        warning("Solution for row of M does not seem to be feasible")
-
-    return {'soln':result['soln'],
-            'kkt_check':result['kkt_check'],
-            'gradient':result['gradient']}
+            print(soln)
+            result = solve_wide_(X,                          # this is a design matrix
+                                 Xsoln,
+                                 linear_func,
+                                 nndef_diag,
+                                 gradient,
+                                 need_update,
+                                 ever_active, 
+                                 nactive,
+                                 bound_vec,
+                                 ridge_term,
+                                 soln,
+                                 soln_old,
+                                 max_iter,
+                                 kkt_tol,
+                                 objective_tol,
+                                 parameter_tol,
+                                 max_active,
+                                 kkt_stop,
+                                 objective_stop,
+                                 parameter_stop)
+
+            niter = result['iter']
+
+            # Logic for whether we should continue the line search
+
+            if not linesearch: break
+
+            if counter_idx == 1:
+                if niter == (max_iter+1):
+                    incr = 1 # was the original problem feasible? 1 if not
+                else:
+                    incr = 0 # original problem was feasible
+                    
+            if incr == 1: # trying to find a feasible point
+                if niter < (max_iter+1) and counter_idx > 1:
+                    break
+                bound = bound * scaling_factor;
+            elif niter == (max_iter + 1) and counter_idx > 1:
+                result = last_output # problem seems infeasible because we didn't solve it
+                break               # so we revert to previously found solution
+
+            bound = bound / scaling_factor
+
+            # If the active set has grown to a certain size
+            # then we stop, presuming problem has become
+            # infeasible.
+            
+            # We revert to the previous solution
+
+            if result['max_active_check']:
+                result = last_output
+                break
+
+            counter_idx += 1
+            last_output = {'soln':result['soln'],
+                           'kkt_check':result['kkt_check']}
+
+            # Check feasibility
+
+            if warn_kkt and not result['kkt_check']:
+                warn("Solution for row of M does not seem to be feasible")
+
+            M[idx] = result['soln'] * 1.
+
+    return M
 
 def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):
     """
@@ -301,8 +321,12 @@ def debiased_lasso_inference(lasso_obj, variables, delta):
 
     intervals = []
     pvalues = []
-    for var in variables:
-        theta_var = _find_row_approx_inverse(H, var, delta)
+
+    approx_inverse = debiasing_matrix(H, variables, delta)
+
+    for Midx, var in enumerate(variables):
+
+        theta_var = approx_inverse[Midx]
 
         # express target in pair (\hat{\beta}_A, G_I)
         eta = np.zeros_like(theta_var)
diff --git a/selection/algorithms/debiased_lasso_utils.pyx b/selection/algorithms/debiased_lasso_utils.pyx
index 8bd2b37e0..09e46fcde 100644
--- a/selection/algorithms/debiased_lasso_utils.pyx
+++ b/selection/algorithms/debiased_lasso_utils.pyx
@@ -75,28 +75,28 @@ def solve_wide_(np.ndarray[DTYPE_float_t, ndim=2] X,            # Sqrt of non-ne
                 int objective_stop,                             # Break based on convergence of objective value? #
                 int parameter_stop):                            # Break based on parameter convergence? #
 
-    solve_wide(<double *>X.data,
-               <double *>X_theta.data,
-               <double *>linear_func.data,
-               <double *>nndef_diag.data,
-               <double *>gradient.data,
-               <int *>need_update.data,
-               <int *>ever_active.data,
-               <int *>nactive.data,
-	       <int>X.shape[0],
-	       <int>X.shape[1],
-               <double *>bound.data,
-               ridge_term,
-               <double *>theta.data,
-               <double *>theta_old.data,
-               maxiter,
-               kkt_tol,
-               parameter_tol,
-               objective_tol,
-               max_active,
-               kkt_stop,
-               parameter_stop,
-               objective_stop)
+    niter = solve_wide(<double *>X.data,
+                        <double *>X_theta.data,
+                        <double *>linear_func.data,
+                        <double *>nndef_diag.data,
+                        <double *>gradient.data,
+                        <int *>need_update.data,
+                        <int *>ever_active.data,
+                        <int *>nactive.data,
+                        <int>X.shape[0],
+                        <int>X.shape[1],
+                        <double *>bound.data,
+                        ridge_term,
+                        <double *>theta.data,
+                        <double *>theta_old.data,
+                        maxiter,
+                        kkt_tol,
+                        parameter_tol,
+                        objective_tol,
+                        max_active,
+                        kkt_stop,
+                        parameter_stop,
+                        objective_stop)
 
     # Check whether feasible
 
@@ -104,16 +104,17 @@ def solve_wide_(np.ndarray[DTYPE_float_t, ndim=2] X,            # Sqrt of non-ne
     nfeature = X.shape[1]
 
     kkt_check = check_KKT_wide(<double *>theta.data,
-                                <double *>gradient.data,
-                                <double *>X_theta.data,
-                                <double *>X.data,
-                                <double *>linear_func.data,
-                                <int *>need_update.data,
-                                ncase,
-                                nfeature,
-                                <double *>bound.data,
-                                ridge_term,
-                                kkt_tol)
+                               <double *>gradient.data,
+                               <double *>X_theta.data,
+                               <double *>X.data,
+                               <double *>linear_func.data,
+                               <int *>need_update.data,
+                               ncase,
+                               nfeature,
+                               <double *>bound.data,
+                               ridge_term,
+                               kkt_tol)
+    print(kkt_check, 'kkt')
 
     max_active_check = nactive[0] >= max_active
 
@@ -131,7 +132,7 @@ def solve_wide_(np.ndarray[DTYPE_float_t, ndim=2] X,            # Sqrt of non-ne
             'gradient':gradient,
             'X_theta':X_theta,
             'linear_func':linear_func,
-            'iter':iter,
+            'iter':niter,
             'kkt_check':kkt_check,
             'ever_active':ever_active,
             'nactive':nactive,
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index 1746594eb..bde24b0ea 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -2,14 +2,13 @@
 import nose.tools as nt
 import numpy.testing.decorators as dec
 
-from selection.tests.instance import gaussian_instance as instance
-import selection.tests.reports as reports
+from ...tests.instance import gaussian_instance as instance
 
-from selection.algorithms.lasso import lasso 
-from selection.algorithms.debiased_lasso import (debiased_lasso_inference,
-                                                 _find_row_approx_inverse,
-                                                 _find_row_approx_inverse_X)
-import regreg.api as rr
+from ..lasso import lasso 
+from ..debiased_lasso import (debiased_lasso_inference,
+                              _find_row_approx_inverse,
+                              _find_row_approx_inverse_X,
+                              debiasing_matrix)
 
 def test_gaussian(n=100, p=20):
 
@@ -41,7 +40,8 @@ def test_approx_inverse(n=50, p=100):
     soln = _find_row_approx_inverse(S, j, delta, solve_args={'min_its':500, 'tol':1.e-14, 'max_its':1000} )
 
     soln_C = _find_row_approx_inverse_X(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, maxiter=1000, objective_tol=1.e-14)
-
+    soln_C2 = debiasing_matrix(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, max_iter=1000, objective_tol=1.e-14, linesearch=False)
+    stop
     basis_vector = np.zeros(p)
     basis_vector[j] = 1.
 
@@ -49,7 +49,7 @@ def test_approx_inverse(n=50, p=100):
 
     U = - S.dot(-soln) - basis_vector
 
-    yield nt.assert_true, np.fabs(U).max() < delta * 1.001
-    yield nt.assert_equal, np.sign(U[j]), -np.sign(soln[j])
-    yield nt.assert_raises, ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta
-    yield np.testing.assert_allclose, soln, soln_C, 1.e-3
+    #yield nt.assert_true, np.fabs(U).max() < delta * 1.001
+    #yield nt.assert_equal, np.sign(U[j]), -np.sign(soln[j])
+    #yield nt.assert_raises, ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta
+    #yield np.testing.assert_allclose, soln, soln_C, 1.e-3

From e0a819c79b751a55d0a4bc0b51ac49a66385d59d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 14 Mar 2018 07:36:01 -0700
Subject: [PATCH 507/617] updates to C software

---
 C-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/C-software b/C-software
index fc60f471e..aca77f1e3 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit fc60f471ecd5fc40b822ee36d46b1a5aaf7ce7e8
+Subproject commit aca77f1e320dafba6041c4dc44cf9ffc049edec8

From 065ff202d8dd3b7748212ff3ee2ce707cf13ad33 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 14 Mar 2018 13:44:35 -0700
Subject: [PATCH 508/617] NF: finished linesearch for debiasing matrix, tested
 with R comparison

---
 selection/algorithms/debiased_lasso.py        | 63 +++---------
 selection/algorithms/debiased_lasso_utils.pyx |  1 -
 .../algorithms/tests/test_debiased_lasso.py   | 95 +++++++++++++++++--
 3 files changed, 101 insertions(+), 58 deletions(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 6baff7bf7..72a3798ed 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -3,11 +3,6 @@
 import numpy as np
 from scipy.stats import norm as ndist
 
-from regreg.api import (quadratic_loss,
-                        identity_quadratic,
-                        l1norm,
-                        simple_problem)
-
 from ..constraints.affine import constraints
 from .debiased_lasso_utils import solve_wide_
 
@@ -35,6 +30,9 @@ def debiasing_matrix(X,
 
     n, p = X.shape
 
+    if bound is None:
+        bound = (1./np.sqrt(n)) * ndist.ppf(1.-(0.1/(p**2)))
+
     if max_active is None:
         max_active = max(50, 0.3 * n)
 
@@ -48,7 +46,7 @@ def debiasing_matrix(X,
         soln = np.zeros(p)
         soln_old = np.zeros(p)
         ever_active = np.zeros(p, np.int)
-        ever_active[0] = row
+        ever_active[0] = row + 1 # C code is 1-based
         nactive = np.array([1], np.int)
 
         linear_func = np.zeros(p)
@@ -62,15 +60,14 @@ def debiasing_matrix(X,
 
         Xsoln = np.zeros(n) # X\hat{\beta}
 
-        bound_vec = np.zeros(p) * bound
         ridge_term = 0
 
         need_update = np.zeros(p, np.int)
 
         while (counter_idx < max_try):
+            bound_vec = np.ones(p) * bound
 
-            print(soln)
-            result = solve_wide_(X,                          # this is a design matrix
+            result = solve_wide_(X,       
                                  Xsoln,
                                  linear_func,
                                  nndef_diag,
@@ -96,6 +93,8 @@ def debiasing_matrix(X,
             # Logic for whether we should continue the line search
 
             if not linesearch: break
+#                M[idx] = result['soln'].copy()
+#                break
 
             if counter_idx == 1:
                 if niter == (max_iter+1):
@@ -132,49 +131,13 @@ def debiasing_matrix(X,
             if warn_kkt and not result['kkt_check']:
                 warn("Solution for row of M does not seem to be feasible")
 
-            M[idx] = result['soln'] * 1.
-
-    return M
-
-def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):
-    """
-
-    Find an approximation of j-th row of inverse of Sigma.
-
-    Solves the problem
-
-    .. math::
-
-        \text{min}_{\theta} \frac{1}{2} \theta^TS\theta
-
-    subject to $\|\Sigma \hat{\theta} - e_j\|_{\infty} \leq \delta$ with
-    $e_j$ the $j$-th elementary basis vector and `S` as $\Sigma$, 
-    and `delta` as $\delta$.
-
-    Described in Table 1, display (4) of https://arxiv.org/pdf/1306.3171.pdf
-
-    """
-    p = Sigma.shape[0]
-    elem_basis = np.zeros(p, np.float)
-    elem_basis[j] = 1.
-    loss = quadratic_loss(p, Q=Sigma)
-    penalty = l1norm(p, lagrange=delta)
-    iq = identity_quadratic(0, 0, elem_basis, 0)
-    problem = simple_problem(loss, penalty)
-    dual_soln = problem.solve(iq, **solve_args)
-
-    soln = -dual_soln
-
-    # check feasibility -- if it fails miserably
-    # presume delta was too small
-
-    feasibility_gap = np.fabs(Sigma.dot(soln) - elem_basis).max()
-    if feasibility_gap > (1.01) * delta:
-        raise ValueError('does not seem to be a feasible point -- try increasing delta')
+        M[idx] = result['soln'] * 1.
 
-    return soln
+    return np.squeeze(M)
 
-def _find_row_approx_inverse_X(X, j, delta, 
+def _find_row_approx_inverse_X(X, 
+                               j, 
+                               delta, 
                                maxiter=50,
                                kkt_tol=1.e-4,
                                objective_tol=1.e-4,
diff --git a/selection/algorithms/debiased_lasso_utils.pyx b/selection/algorithms/debiased_lasso_utils.pyx
index 09e46fcde..e46a20c7f 100644
--- a/selection/algorithms/debiased_lasso_utils.pyx
+++ b/selection/algorithms/debiased_lasso_utils.pyx
@@ -114,7 +114,6 @@ def solve_wide_(np.ndarray[DTYPE_float_t, ndim=2] X,            # Sqrt of non-ne
                                <double *>bound.data,
                                ridge_term,
                                kkt_tol)
-    print(kkt_check, 'kkt')
 
     max_active_check = nactive[0] >= max_active
 
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index bde24b0ea..fc19283fa 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -6,10 +6,22 @@
 
 from ..lasso import lasso 
 from ..debiased_lasso import (debiased_lasso_inference,
-                              _find_row_approx_inverse,
                               _find_row_approx_inverse_X,
                               debiasing_matrix)
 
+# for regreg implementation comparison
+
+from regreg.api import (quadratic_loss,
+                        identity_quadratic,
+                        l1norm,
+                        simple_problem)
+
+# to compare to R code
+
+import rpy2.robjects as rpy
+from rpy2.robjects import numpy2ri
+rpy.r('library(selectiveInference)')
+
 def test_gaussian(n=100, p=20):
 
     X, y, beta = instance(n=n, p=p, sigma=1.)[:3]
@@ -41,15 +53,84 @@ def test_approx_inverse(n=50, p=100):
 
     soln_C = _find_row_approx_inverse_X(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, maxiter=1000, objective_tol=1.e-14)
     soln_C2 = debiasing_matrix(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, max_iter=1000, objective_tol=1.e-14, linesearch=False)
-    stop
+
+    # make sure linesearch terminates
+
+    debiasing_matrix(X, j, delta, linesearch=True)
+
     basis_vector = np.zeros(p)
     basis_vector[j] = 1.
 
     nt.assert_true(np.fabs(S.dot(soln) - basis_vector).max() < delta * 1.001)
-
+    
     U = - S.dot(-soln) - basis_vector
 
-    #yield nt.assert_true, np.fabs(U).max() < delta * 1.001
-    #yield nt.assert_equal, np.sign(U[j]), -np.sign(soln[j])
-    #yield nt.assert_raises, ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta
-    #yield np.testing.assert_allclose, soln, soln_C, 1.e-3
+    yield np.testing.assert_allclose, soln_C, soln_C2
+    yield nt.assert_true, np.fabs(U).max() < delta * 1.001
+    yield nt.assert_equal, np.sign(U[j]), -np.sign(soln[j])
+    yield nt.assert_raises, ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta
+    yield np.testing.assert_allclose, soln, soln_C, 1.e-3
+
+def test_compareR(n=50, p=100):
+
+    n, p = 50, 100
+    X = np.random.standard_normal((n, p))
+    j = 5
+    delta = 0.30
+    
+    X[:,3] = X[:,3] + X[:,j]
+    X[:,10] = X[:,10] + X[:,j]
+    S = X.T.dot(X) / n
+    
+    numpy2ri.activate()
+    rpy.r.assign('X', X)
+    rpy.r.assign('j', j+1)
+    rpy.r('soln = selectiveInference:::debiasingMatrix(X, TRUE, nrow(X), j)')
+    soln_R = np.squeeze(np.asarray(rpy.r('soln')))
+
+    soln_py = debiasing_matrix(X, j, linesearch=True)
+
+    np.testing.assert_allclose(soln_R, soln_py)
+
+    numpy2ri.activate()
+    
+## regreg implementation
+
+def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):
+    """
+
+    Find an approximation of j-th row of inverse of Sigma.
+
+    Solves the problem
+
+    .. math::
+
+        \text{min}_{\theta} \frac{1}{2} \theta^TS\theta
+
+    subject to $\|\Sigma \hat{\theta} - e_j\|_{\infty} \leq \delta$ with
+    $e_j$ the $j$-th elementary basis vector and `S` as $\Sigma$, 
+    and `delta` as $\delta$.
+
+    Described in Table 1, display (4) of https://arxiv.org/pdf/1306.3171.pdf
+
+    """
+    p = Sigma.shape[0]
+    elem_basis = np.zeros(p, np.float)
+    elem_basis[j] = 1.
+    loss = quadratic_loss(p, Q=Sigma)
+    penalty = l1norm(p, lagrange=delta)
+    iq = identity_quadratic(0, 0, elem_basis, 0)
+    problem = simple_problem(loss, penalty)
+    dual_soln = problem.solve(iq, **solve_args)
+
+    soln = -dual_soln
+
+    # check feasibility -- if it fails miserably
+    # presume delta was too small
+
+    feasibility_gap = np.fabs(Sigma.dot(soln) - elem_basis).max()
+    if feasibility_gap > (1.01) * delta:
+        raise ValueError('does not seem to be a feasible point -- try increasing delta')
+
+    return soln
+

From 82bb8cbad60f42e73763091439776805ddcef09a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 14 Mar 2018 15:44:14 -0700
Subject: [PATCH 509/617] NF: debiased lasso targets, a little anticonservative

---
 selection/randomized/lasso.py                 | 52 ++++++++++++++-----
 .../randomized/tests/test_highdim_lasso.py    | 38 ++++++--------
 2 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index b4d60f8a5..8358f7b8b 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -22,6 +22,7 @@
 from .glm import (pairs_bootstrap_glm,
                   glm_nonparametric_bootstrap,
                   glm_parametric_covariance)
+from ..algorithms.debiased_lasso import debiasing_matrix
 
 class lasso_view(query):
 
@@ -1614,10 +1615,10 @@ def summary(self,
 
         if target == 'selected':
             observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
-        elif target == 'full':
+        else:
             X, y = self.loglike.data
             n, p = X.shape
-            if n > p:
+            if n > p and target == 'full':
                 observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features, dispersion=dispersion)
             else:
                 observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features, dispersion=dispersion)
@@ -1761,9 +1762,9 @@ def full_targets(self, features=None, dispersion=None):
 
         if features is None:
             features = self._overall
-        features_b = np.zeros(self._overall.shape, np.bool)
-        features_b[features] = True
-        features = features_b
+        features_bool = np.zeros(self._overall.shape, np.bool)
+        features_bool[features] = True
+        features = features_bool
 
         X, y = self.loglike.data
         n, p = X.shape
@@ -1785,17 +1786,41 @@ def full_targets(self, features=None, dispersion=None):
         alternatives = ['twosided'] * features.sum()
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
-    def debiased_targets(self, dispersion=None):
-        
-        raise NotImplementedError
+    def debiased_targets(self, features=None, dispersion=None, **debiasing_args):
 
-        if not hasattr(self, "_debiased_targets"):
-            X, y = self.loglike.data
-            n, p = X.shape
+        if features is None:
+            features = self._overall
+        features_bool = np.zeros(self._overall.shape, np.bool)
+        features_bool[features] = True
+        features = features_bool
 
-            self._debiased_targets = observed_target, cov_target, crosscov_target_score
+        X, y = self.loglike.data
+        n, p = X.shape
 
-        return self._debiased_targets
+        # target is one-step estimator
+
+        G = self.loglike.smooth_objective(self.initial_soln, 'grad')
+        Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(self._W)[:, None], 
+                                                  np.nonzero(features)[0],
+                                                  **debiasing_args)) / n
+        observed_target = self.initial_soln[features] - Qinv_hat.dot(G)
+        if p > n:
+            M1 = Qinv_hat.dot(X.T)
+            cov_target = (M1 * self._W[None,:]).dot(M1.T)
+            crosscov_target_score = -(M1 * self._W[None,:]).dot(X).T
+        else:
+            Qfull = X.T.dot(self._W[:, None] * X)
+            cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T))
+            crosscov_target_score = -Qinv_hat.dot(Qfull).T
+
+        if dispersion is None: # use Pearson's X^2
+            Xfeat = X[:,features]
+            Qrelax = Xfeat.T.dot(self._W[:, None] * Xfeat)
+            relaxed_soln = self.initial_soln[features] - np.linalg.inv(Qrelax).dot(G[features])
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / self._W).sum() / (n - features.sum()) 
+
+        alternatives = ['twosided'] * features.sum()
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     @staticmethod
     def gaussian(X, 
@@ -1871,7 +1896,6 @@ def gaussian(X,
         return highdim(loglike, np.asarray(feature_weights) / sigma**2,
                        ridge_term, randomizer_scale)
 
-
     @staticmethod
     def logistic(X, 
                  successes, 
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index e3f18c919..5fd3232f7 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -9,13 +9,12 @@
 from rpy2.robjects import numpy2ri
 rpy.r('library(selectiveInference)')
 
-import selection.randomized.lasso as L; reload(L)
 from ..lasso import highdim 
 from ...tests.instance import gaussian_instance
 from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
 import matplotlib.pyplot as plt
 
-def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000):
+def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, target='full', rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000):
     """
     Compare to R randomized lasso
     """
@@ -44,19 +43,17 @@ def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rh
     signs = conv.fit()
     nonzero = signs != 0
 
-    if full:
-        _, pval, intervals = conv.summary(target="full",
-                                          ndraw=ndraw,
-                                          burnin=burnin, 
-                                          compute_intervals=False)
-    else:
-        _, pval, intervals = conv.summary(target="selected",
-                                          ndraw=ndraw,
-                                          burnin=burnin, 
-                                          compute_intervals=False)
-
+    _, pval, intervals = conv.summary(target=target,
+                                      ndraw=ndraw,
+                                      burnin=burnin, 
+                                      compute_intervals=False)
+        
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
+def test_all_targets(n=100, p=20, signal_fac=1.5, s=5, sigma=3, rho=0.4):
+    for target in ['full', 'selected', 'debiased']:
+        test_highdim_lasso(n=n, p=p, signal_fac=signal_fac, s=s, sigma=sigma, rho=rho)
+
 def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=True, rho=0.4, randomizer_scale=1., ndraw=5000, burnin=1000, 
                             ridge_term=None, compare_to_lasso=True):
     """
@@ -162,20 +159,18 @@ def test_compareR(n=200, p=10, signal=np.sqrt(4) * np.sqrt(2 * np.log(10)), s=5,
     assert np.linalg.norm(conv.sampler.affine_con.mean - cond_mean[:,0]) / np.linalg.norm(cond_mean[:,0]) < 1.e-3
 
 
-def main(nsim=500, sqrt=False, full=True):
+def main(nsim=500, n=500, p=200, sqrt=False, target='full', sigma=3):
 
     P0, PA = [], []
     from statsmodels.distributions import ECDF
 
-    n, p = 500, 200
-
     for i in range(nsim):
-        try:
+        if True: # try:
             if not sqrt:
-                p0, pA = test_highdim_lasso(n=n, p=p, full=full)
+                p0, pA = test_highdim_lasso(n=n, p=p, target=target, sigma=sigma)
             else:
-                p0, pA = test_sqrt_highdim_lasso(n=n, p=p, full=full, compare_to_lasso=False)
-        except:
+                p0, pA = test_sqrt_highdim_lasso(n=n, p=p, target=target, compare_to_lasso=False)
+        else: # except:
             p0, pA = [], []
         P0.extend(p0)
         PA.extend(pA)
@@ -203,7 +198,7 @@ def Rpval(X, Y, W, noise_scale=None):
         rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam, noise_scale=noise_scale, kkt_tol=1.e-8, parameter_tol=1.e-8)')
     else:
         rpy.r('soln = selectiveInference:::randomizedLasso(X, Y, lam)')
-    rpy.r('targets=selectiveInference:::set.targets(soln,type="full")')
+    rpy.r('targets=selectiveInference:::set.target(soln, type="full")')
     #rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="norejection", targets=targets, nsample=5000, burnin=1000)')
     rpy.r('rand_inf = selectiveInference:::randomizedLassoInf(soln, sampler="restrictedMVN", targets=targets, nsample=5000, burnin=2000)')
 
@@ -216,6 +211,7 @@ def Rpval(X, Y, W, noise_scale=None):
     soln = np.asarray(rpy.r('soln$soln'))
     ridge = rpy.r('soln$ridge_term')
 
+    numpy2ri.deactivate()
     return pval, vars, rand, active, soln, ridge, cond_cov, cond_mean
 
 

From d47d1e12f801884951a6467465d91f3fecec0480 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 15 Mar 2018 13:08:31 -0700
Subject: [PATCH 510/617] BF: debiasing_matrix was returning none

---
 selection/algorithms/debiased_lasso.py            | 12 +++++-------
 selection/algorithms/tests/test_debiased_lasso.py | 14 ++++++++++++--
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 72a3798ed..58e5cd92d 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -93,8 +93,6 @@ def debiasing_matrix(X,
             # Logic for whether we should continue the line search
 
             if not linesearch: break
-#                M[idx] = result['soln'].copy()
-#                break
 
             if counter_idx == 1:
                 if niter == (max_iter+1):
@@ -112,6 +110,10 @@ def debiasing_matrix(X,
 
             bound = bound / scaling_factor
 
+            counter_idx += 1
+            last_output = {'soln':result['soln'],
+                           'kkt_check':result['kkt_check']}
+
             # If the active set has grown to a certain size
             # then we stop, presuming problem has become
             # infeasible.
@@ -122,10 +124,6 @@ def debiasing_matrix(X,
                 result = last_output
                 break
 
-            counter_idx += 1
-            last_output = {'soln':result['soln'],
-                           'kkt_check':result['kkt_check']}
-
             # Check feasibility
 
             if warn_kkt and not result['kkt_check']:
@@ -225,7 +223,7 @@ def debiased_lasso_inference(lasso_obj, variables, delta):
     """
 
     if not lasso_obj.ignore_inactive_constraints:
-        raise ValueError('debiased lasso should be fit ignoring active constraints as implied covariance between active and inactive score is 0')
+        raise ValueError('debiased lasso should be fit ignoring inactive constraints as implied covariance between active and inactive score is 0')
 
     # should we check that loglike is gaussian
 
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index fc19283fa..30ce91a41 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -40,7 +40,6 @@ def test_gaussian(n=100, p=20):
 
 def test_approx_inverse(n=50, p=100):
 
-    n, p = 50, 100
     X = np.random.standard_normal((n, p))
     j = 5
     delta = 0.30
@@ -71,9 +70,20 @@ def test_approx_inverse(n=50, p=100):
     yield nt.assert_raises, ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta
     yield np.testing.assert_allclose, soln, soln_C, 1.e-3
 
+def test_approx_inverse_nondegen(n=100, p=20):
+
+    X = np.random.standard_normal((n, p))
+    j = 5
+    delta = 0.30
+    
+    X[:,3] = X[:,3] + X[:,j]
+    X[:,10] = X[:,10] + X[:,j]
+
+    M = debiasing_matrix(X, np.arange(p))
+
+
 def test_compareR(n=50, p=100):
 
-    n, p = 50, 100
     X = np.random.standard_normal((n, p))
     j = 5
     delta = 0.30

From a62bbc78b22c5f067ff43100281f0fa376c76980 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 17 Mar 2018 13:22:31 -0700
Subject: [PATCH 511/617] commit changes

---
 selection/algorithms/debiased_lasso.py                | 1 +
 selection/randomized/tests/test_selective_MLE_high.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index b7976c1d5..613c6c7eb 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -45,6 +45,7 @@ def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1
 
     return soln
 
+
 def _find_row_approx_inverse_X(X, j, delta, 
                                maxiter=50,
                                kkt_tol=1.e-4,
diff --git a/selection/randomized/tests/test_selective_MLE_high.py b/selection/randomized/tests/test_selective_MLE_high.py
index 28990ad4a..d912675de 100644
--- a/selection/randomized/tests/test_selective_MLE_high.py
+++ b/selection/randomized/tests/test_selective_MLE_high.py
@@ -19,7 +19,7 @@ def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, rand
     X, Y, beta = inst(n=n,
                       p=p, 
                       signal=signal, 
-                      s=s, 
+                      s=s,
                       equicorrelated=False, 
                       rho=rho, 
                       sigma=sigma, 
@@ -115,3 +115,6 @@ def main(nsim=500, full=True, full_dispersion=False):
             plt.plot([0, 1], [0, 1], 'k--')
             plt.savefig("plot.pdf")
     plt.show()
+
+main()
+

From 8d4906ff7de81a72c2e3a6ee23b813be3a3853e6 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 19 Mar 2018 10:40:02 -0700
Subject: [PATCH 512/617] clean tests

---
 selection/adjusted_MLE/tests/approx_MLE.py    | 105 -----
 .../tests/compare_lasso_simple.py             | 143 ------
 selection/adjusted_MLE/tests/compare_risks.py | 221 ---------
 selection/adjusted_MLE/tests/exact_MLE.py     |  47 --
 .../tests/high_dim_boot_coverage.py           | 155 -------
 selection/adjusted_MLE/tests/mle_LASSO.py     |  61 ---
 selection/adjusted_MLE/tests/relaxed_lasso.py | 421 ------------------
 selection/adjusted_MLE/tests/test_BH.py       | 169 -------
 selection/adjusted_MLE/tests/test_MLE.py      | 253 -----------
 selection/adjusted_MLE/tests/test_MLE_boot.py | 217 ---------
 .../adjusted_MLE/tests/test_MLE_univariate.py | 123 -----
 .../adjusted_MLE/tests/test_boot_selective.py | 125 ------
 .../adjusted_MLE/tests/test_simple_problem.py | 223 ----------
 13 files changed, 2263 deletions(-)
 delete mode 100644 selection/adjusted_MLE/tests/approx_MLE.py
 delete mode 100644 selection/adjusted_MLE/tests/compare_lasso_simple.py
 delete mode 100644 selection/adjusted_MLE/tests/compare_risks.py
 delete mode 100644 selection/adjusted_MLE/tests/exact_MLE.py
 delete mode 100644 selection/adjusted_MLE/tests/high_dim_boot_coverage.py
 delete mode 100644 selection/adjusted_MLE/tests/mle_LASSO.py
 delete mode 100644 selection/adjusted_MLE/tests/relaxed_lasso.py
 delete mode 100644 selection/adjusted_MLE/tests/test_BH.py
 delete mode 100644 selection/adjusted_MLE/tests/test_MLE.py
 delete mode 100644 selection/adjusted_MLE/tests/test_MLE_boot.py
 delete mode 100644 selection/adjusted_MLE/tests/test_MLE_univariate.py
 delete mode 100644 selection/adjusted_MLE/tests/test_boot_selective.py
 delete mode 100644 selection/adjusted_MLE/tests/test_simple_problem.py

diff --git a/selection/adjusted_MLE/tests/approx_MLE.py b/selection/adjusted_MLE/tests/approx_MLE.py
deleted file mode 100644
index fc86317f9..000000000
--- a/selection/adjusted_MLE/tests/approx_MLE.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import numpy as np
-from scipy.stats import norm as ndist
-from scipy.optimize import minimize
-
-def log_barrier(u, barrier_scale, threshold = 2.):
-
-    BIG = 10 ** 10
-    violation = u-threshold<0.
-    return np.log(1 + (np.sqrt(barrier_scale)/ (u-threshold))) + violation* BIG
-
-def grad_log_barrier(u, barrier_scale, threshold = 2.):
-    return 1./(u-threshold + np.sqrt(barrier_scale)) - 1./(u-threshold)
-
-def grad_log_hessian(u, barrier_scale, threshold = 2.):
-    return -1. / ((u - threshold + np.sqrt(barrier_scale))**2.) + 1. / ((u - threshold)** 2.)
-
-def approx_grad_cgf(mu, randomization_scale = 0.5, threshold = 2, nstep= 50, tol=1.e-10):
-
-    variance = 1 + randomization_scale ** 2.
-    objective = lambda u: -u*(mu/variance) + (u ** 2.)/(2.* variance)+ log_barrier(u, variance)
-    gradient = lambda u: -(mu/variance) + u/variance + grad_log_barrier(u, variance)
-    hessian = lambda u: 1/variance + grad_log_hessian(u, variance)
-
-    current_value = np.inf
-    initial = threshold +1.
-    current = initial
-    step = 1
-
-    for itercount in range(nstep):
-        newton_step = (gradient(current)/(hessian(current)))
-
-        # make sure proposal is feasible
-        count = 0
-        while True:
-            count += 1
-            proposal = current - step * newton_step
-            failing = (proposal < threshold)
-            if not failing.sum():
-                break
-            step *= 0.5 ** failing
-
-            if count >= 40:
-                raise ValueError('not finding a feasible point')
-
-        # make sure proposal is a descent
-
-        while True:
-            proposal = current - step * newton_step
-            proposed_value = objective(proposal)
-            if proposed_value <= current_value:
-                break
-            step *= 0.5
-
-        # stop if relative decrease is small
-
-        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
-            current = proposal
-            current_value = proposed_value
-            break
-
-        current = proposal
-        current_value = proposed_value
-
-        if itercount % 4 == 0:
-            step *= 2
-
-    value = objective(current)
-    return current/variance + ((randomization_scale** 2.)/(1+randomization_scale**2.))*mu, value, current
-
-def approx_fisher_info(mu, randomization_scale=0.5, threshold=2):
-
-    variance = 1 + randomization_scale ** 2.
-    minimizer = approx_grad_cgf(mu)[2]
-    return (1./ variance**2.)* (1./((1./variance) + grad_log_hessian(minimizer, randomization_scale**2.)))+ ((randomization_scale ** 2.)/variance)
-
-def simulate_truncated(mu, randomization_scale = 0.5, threshold = 2):
-    while True:
-        Z = np.random.normal(mu, 1, 1)
-        W = np.random.normal(0, randomization_scale, 1)
-        if (Z + W > threshold):
-            return Z
-
-def test_pivot(mu, randomization_scale=0.5, threshold=2):
-    Z = np.array([simulate_truncated(mu, randomization_scale=randomization_scale, threshold=threshold) for _ in
-                  range(25000)])
-
-    mu_seq = np.linspace(-7., 6, num=2600)
-    grad_partition = np.zeros(mu_seq.shape[0])
-    for i in range(mu_seq.shape[0]):
-        grad_partition[i] = approx_grad_cgf(mu_seq[i])[0]
-
-    pivot = []
-    approx_MLE = []
-    sd_MLE = 1 / np.sqrt(approx_fisher_info(mu))
-    for k in range(Z.shape[0]):
-        MLE = mu_seq[np.argmin(np.abs(grad_partition - Z[k]))]
-        approx_MLE.append(MLE)
-        pivot.append((MLE - mu) / sd_MLE)
-
-    return np.asarray(pivot), np.asarray(approx_MLE)
-
-print(test_pivot(1))
-
-    #print("grad cgf check", approx_grad_cgf(-1)[0])
-#print("fisher info check", approx_fisher_info(-2))
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/compare_lasso_simple.py b/selection/adjusted_MLE/tests/compare_lasso_simple.py
deleted file mode 100644
index d5b7619cc..000000000
--- a/selection/adjusted_MLE/tests/compare_lasso_simple.py
+++ /dev/null
@@ -1,143 +0,0 @@
-from __future__ import print_function
-import numpy as np, sys
-
-import regreg.api as rr
-from selection.tests.instance import gaussian_instance
-from scipy.stats import norm as ndist
-from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from statsmodels.distributions.empirical_distribution import ECDF
-
-def test_lasso_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization_scale=1.):
-
-    lam = 2.
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        nactive = np.sum(active)
-        if nactive > 0:
-            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-            print("true target", true_target)
-            approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                         M_est.opt_transform,
-                                                         M_est.target_observed,
-                                                         M_est.feasible_point,
-                                                         M_est.target_cov,
-                                                         M_est.randomizer_precision)
-
-            print("approx_MLE", approx_MLE)
-            #print("check maps", M_est.opt_transform, M_est.target_transform, M_est.feasible_point, M_est.target_cov,
-            #      M_est.randomizer_precision, M_est.target_observed)
-
-            _ , opt_offset = M_est.opt_transform
-            target_observed = np.atleast_1d(M_est.target_observed)
-            target_transform = (-np.identity(1), np.zeros(1))
-            s = np.asscalar(np.sign(opt_offset))
-            opt_transform = (s * (np.identity(1)+epsilon), np.ones(1) * (s * 2.))
-            feasible_point = np.ones(1)
-            randomizer_precision = np.identity(1) / randomization_scale ** 2
-            target_cov = np.identity(1)
-            approx_MLE_0, value_0, var_0, mle_map_0= solve_UMVU(target_transform,
-                                                                opt_transform,
-                                                                target_observed,
-                                                                feasible_point,
-                                                                target_cov,
-                                                                randomizer_precision)
-            break
-
-    return np.squeeze((approx_MLE - true_target)/float(np.sqrt(var))), (approx_MLE - true_target), \
-           np.squeeze((approx_MLE_0 - true_target)/float(np.sqrt(var_0))), (approx_MLE_0 - true_target)
-
-
-def test_approx_var(n=100, p=1, s=0, signal=0., lam_frac=1., randomization_scale=1.):
-
-    lam = 2.
-    while True:
-        X = np.ones((n, p)) / float(np.sqrt(n))
-        n, p = X.shape
-        beta = signal
-        y = np.random.standard_normal(n)
-        y += (beta / np.sqrt(n))
-        omega = np.random.standard_normal(1)
-
-        true_target = beta * np.sqrt(n)
-        target_observed = y.sum()/float(np.sqrt(n))
-        if np.abs(target_observed + omega) > lam :
-
-            target_transform = (-np.identity(1), np.zeros(1))
-            s = np.asscalar(np.sign(target_observed + omega))
-            opt_transform = (s * np.identity(1), np.ones(1) * (s * 2.))
-            feasible_point = np.ones(1)
-            randomizer_precision = np.identity(1) / randomization_scale ** 2
-            target_cov = np.identity(1)
-            approx_MLE_0, value_0, var_0, mle_map_0= solve_UMVU(target_transform,
-                                                                opt_transform,
-                                                                target_observed,
-                                                                feasible_point,
-                                                                target_cov,
-                                                                randomizer_precision)
-            break
-
-    return np.squeeze((approx_MLE_0 - true_target)/float(np.sqrt(var_0))), (approx_MLE_0 - true_target)
-
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 400
-    pivot_lasso = []
-    pivot_simple = []
-    diff = 0.
-    bias = 0.
-    for i in range(ndraw):
-        approx = test_lasso_approx_var(n=300, p=1, s=1, signal=-1.)
-        if approx is not None:
-            pivot_lasso.append(approx[0])
-            pivot_simple.append(approx[2])
-            bias += approx[1]
-            #diff += approx[0]-approx[2]
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("bias" + str(bias/float(i)) + "\n")
-    #sys.stderr.write("diff" + str(diff) + "\n")
-
-    #if i % 10 == 0:
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(pivot_lasso)))
-    ecdf_0 = ECDF(ndist.cdf(np.asarray(pivot_simple)))
-    grid = np.linspace(0, 1, 101)
-    #print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot(grid, ecdf_0(grid), '-b')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_lasso_selective_MLE_lasso_p1_amp5.png")
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 200
-#     pivot_simple = []
-#     diff = 0.
-#     for i in range(ndraw):
-#         approx = test_approx_var(n=300, p=1, s=0, signal=0.)
-#         print("here")
-#         pivot_simple.append(approx[0])
-#         sys.stderr.write("iteration completed" + str(i) + "\n")
-#
-#     #if i % 10 == 0:
-#     plt.clf()
-#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_simple)))
-#     grid = np.linspace(0, 1, 101)
-#     plt.plot(grid, ecdf(grid), c='red', marker='^')
-#     plt.plot(grid, grid, 'k--')
-#     plt.show()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/compare_risks.py b/selection/adjusted_MLE/tests/compare_risks.py
deleted file mode 100644
index 3c089bfea..000000000
--- a/selection/adjusted_MLE/tests/compare_risks.py
+++ /dev/null
@@ -1,221 +0,0 @@
-from __future__ import print_function
-import numpy as np, sys
-
-import regreg.api as rr
-from selection.tests.instance import gaussian_instance
-from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from selection.randomized.M_estimator import M_estimator
-import statsmodels.api as sm
-from rpy2.robjects.packages import importr
-from rpy2 import robjects
-glmnet = importr('glmnet')
-import rpy2.robjects.numpy2ri
-
-rpy2.robjects.numpy2ri.activate()
-
-def glmnet_sigma(X, y):
-    robjects.r('''
-                glmnet_cv = function(X,y){
-                y = as.matrix(y)
-                X = as.matrix(X)
-                n = nrow(X)
-                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                lam_1se = out$lambda.1se
-                lam_min = out$lambda.min
-                return(list(lam_min = n * as.numeric(lam_min), lam_1se = n* as.numeric(lam_1se)))
-                }''')
-
-    lambda_cv_R = robjects.globalenv['glmnet_cv']
-    n, p = X.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-
-    lam = lambda_cv_R(r_X, r_y)
-    lam_min = np.array(lam.rx2('lam_min'))
-    lam_1se = np.array(lam.rx2('lam_1se'))
-    return lam_min, lam_1se
-
-def relative_risk(est, truth, Sigma):
-
-    return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
-
-def AR1(rho, p):
-    idx = np.arange(p)
-    cov = rho ** np.abs(np.subtract.outer(idx, idx))
-    return cov, np.linalg.cholesky(cov)
-
-def risk_selective_mle(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=np.sqrt(0.1)):
-
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
-                                                       random_signs=True, equicorrelated=False)
-        n, p = X.shape
-
-        if p>n:
-            sigma_est = np.std(y)/2.
-            print("sigma est", sigma_est)
-        else:
-            ols_fit = sm.OLS(y, X).fit()
-            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-            print("sigma est", sigma_est)
-
-        #sigma_est = 1.
-        snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
-        print("snr", snr)
-
-        #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        lam_min, lam_1se = glmnet_sigma(X, y)
-        print(" here lambda")
-        lam = lam_1se[0]
-        print(" here lambda", lam)
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma = sigma_est)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-        nactive = np.sum(active)
-        print("number of variables selected by randomized LASSO", nactive)
-
-        if nactive > 0:
-            approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
-                                                                       M_est.opt_transform,
-                                                                       M_est.target_observed,
-                                                                       M_est.feasible_point,
-                                                                       M_est.target_cov,
-                                                                       M_est.randomizer_precision)
-
-            mle_target_lin, mle_soln_lin, mle_offset = mle_transform
-            break
-
-    est_Sigma = X[:, active].T.dot(X[:, active])
-    ind_est = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
-    target_par = beta[active]
-    Lasso_est = M_est.observed_opt_state[:nactive]
-
-    return (approx_MLE - target_par).sum()/float(nactive), \
-           relative_risk(approx_MLE, target_par, est_Sigma),\
-           relative_risk(M_est.target_observed, target_par, est_Sigma),\
-           relative_risk(ind_est, target_par, est_Sigma),\
-           relative_risk(Lasso_est, target_par, est_Sigma)
-
-def risk_selective_mle_full(n=500, p=100, s=5, signal=5., lam_frac=1., randomization_scale=0.7):
-
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=1.,
-                                                       random_signs=True, equicorrelated=False)
-        n, p = X.shape
-
-        if p>n:
-            sigma_est = np.std(y)/2.
-            #sigma_est = 1.
-            print("sigma est", sigma_est)
-        else:
-            ols_fit = sm.OLS(y, X).fit()
-            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-            print("sigma est", sigma_est)
-
-        snr = (beta.T).dot(X.T.dot(X)).dot(beta)/n
-        print("snr", snr)
-
-        #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        lam_min, lam_1se = glmnet_sigma(X, y)
-        lam = lam_1se[0]
-        print("lambda from glmnet", lam)
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1. /np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma = sigma_est)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        #true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-        nactive = np.sum(active)
-        print("number of variables selected by randomized LASSO", nactive)
-
-        if nactive > 0:
-            approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
-                                                                       M_est.opt_transform,
-                                                                       M_est.target_observed,
-                                                                       M_est.feasible_point,
-                                                                       M_est.target_cov,
-                                                                       M_est.randomizer_precision)
-
-            mle_target_lin, mle_soln_lin, mle_offset = mle_transform
-            break
-
-    Sigma, _ = AR1(rho=0.35, p=p)
-    ind_est = np.zeros(p)
-    ind_est[active] = mle_target_lin.dot(M_est.target_observed) + mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset
-    target_par = beta
-
-    Lasso_est = np.zeros(p)
-    Lasso_est[active] = M_est.observed_opt_state[:nactive]
-    selective_MLE = np.zeros(p)
-    selective_MLE[active] = approx_MLE
-
-    relaxed_Lasso = np.zeros(p)
-    relaxed_Lasso[active] = M_est.target_observed
-
-    M_est_nonrand = M_estimator(loss, epsilon, penalty, randomization.isotropic_gaussian((p,), scale=0.005))
-    M_est_nonrand.solve()
-    rel_Lasso_nonrand = np.zeros(p)
-    rel_Lasso_nonrand[M_est_nonrand._overall] = M_est_nonrand.observed_internal_state[M_est_nonrand._overall.sum()]
-    Lasso_nonrand = np.zeros(p)
-    Lasso_nonrand[M_est_nonrand._overall] = M_est_nonrand.observed_opt_state[:M_est_nonrand._overall.sum()]
-
-    print("number of variables selected by non-randomized LASSO", M_est_nonrand._overall.sum())
-
-    return (selective_MLE - target_par).sum()/float(nactive), \
-           relative_risk(selective_MLE, target_par, Sigma), \
-           relative_risk(relaxed_Lasso, target_par, Sigma), \
-           relative_risk(ind_est, target_par, Sigma), \
-           relative_risk(Lasso_est, target_par, Sigma), \
-           relative_risk(rel_Lasso_nonrand, target_par, Sigma),\
-           relative_risk(Lasso_nonrand, target_par, Sigma)
-
-if __name__ == "__main__":
-
-    ndraw = 100
-    bias = 0.
-    risk_selMLE = 0.
-    risk_relLASSO = 0.
-    risk_indest = 0.
-    risk_LASSO = 0.
-    risk_relLASSO_nonrand = 0.
-    risk_LASSO_nonrand = 0.
-    for i in range(ndraw):
-        approx = risk_selective_mle_full(n=500, p=100, s=5, signal=5.)
-        if approx is not None:
-            bias += approx[0]
-            risk_selMLE += approx[1]
-            risk_relLASSO += approx[2]
-            risk_indest += approx[3]
-            risk_LASSO += approx[4]
-            risk_relLASSO_nonrand += approx[5]
-            risk_LASSO_nonrand += approx[6]
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
-        sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
-        sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n")
-
diff --git a/selection/adjusted_MLE/tests/exact_MLE.py b/selection/adjusted_MLE/tests/exact_MLE.py
deleted file mode 100644
index b7561637e..000000000
--- a/selection/adjusted_MLE/tests/exact_MLE.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import numpy as np
-from scipy.stats import norm as ndist
-
-def grad_CGF(mu, randomization_scale = 0.5, threshold = 2):
-    grad = mu + (1. / np.sqrt(1. + randomization_scale ** 2.)) * (ndist.pdf((threshold -mu)
-                                                                          / (np.sqrt(1.+randomization_scale ** 2.)))
-                                                                / (1.-ndist.cdf(( threshold -mu) /(np.sqrt(1.+randomization_scale ** 2.)))))
-    return grad
-
-def fisher_info(mu, randomization_scale = 0.5, threshold = 2):
-    variance = 1.+randomization_scale**2.
-    hessian = 1.- (1./variance)*((((mu-threshold)/(np.sqrt(variance)))*ndist.pdf((threshold-mu)/(np.sqrt(variance))))/(1.-ndist.cdf((threshold-mu)/(np.sqrt(variance)))))\
-              - (1./(variance))*((ndist.pdf((threshold-mu)/(np.sqrt(variance)))
-                                                     / (1.-ndist.cdf((threshold-mu)/(np.sqrt(variance)))))**2)
-
-    return hessian
-
-
-def simulate_truncated(mu, randomization_scale = 0.5, threshold = 2):
-    while True:
-        Z = np.random.normal(mu, 1, 1)
-        W = np.random.normal(0, randomization_scale, 1)
-        if (Z + W > threshold):
-            return Z
-
-
-def test_pivot(mu, randomization_scale = 0.5, threshold = 2):
-    Z = np.array([simulate_truncated(mu, randomization_scale = randomization_scale, threshold=threshold) for _ in range(25000)])
-
-    mu_seq = np.linspace(-7., 6, num = 2600)
-    grad_partition = np.zeros(mu_seq.shape[0])
-    for i in range(mu_seq.shape[0]):
-        grad_partition[i] = grad_CGF(mu_seq[i])
-
-    pivot = []
-    exact_MLE = []
-    sd_MLE = 1/ np.sqrt(fisher_info(mu))
-    for k in range(Z.shape[0]):
-        MLE = mu_seq[np.argmin(np.abs(grad_partition - Z[k]))]
-        exact_MLE.append(MLE)
-        pivot.append((MLE-mu)/sd_MLE)
-
-    return np.asarray(pivot), np.asarray(exact_MLE)
-
-#print("grad cgf check", grad_CGF(2))
-#print("hessian cgf check", fisher_info(0))
-#print(test_pivot(1))
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py b/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
deleted file mode 100644
index fb2e1b121..000000000
--- a/selection/adjusted_MLE/tests/high_dim_boot_coverage.py
+++ /dev/null
@@ -1,155 +0,0 @@
-from __future__ import print_function
-from rpy2 import robjects
-
-import rpy2.robjects.numpy2ri
-rpy2.robjects.numpy2ri.activate()
-
-import statsmodels.api as sm
-import numpy as np, sys
-import regreg.api as rr
-from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-import scipy.stats as stats
-
-def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
-    robjects.r('''
-    library(bestsubset)
-    sim_xy = bestsubset::sim.xy
-    ''')
-
-    r_simulate = robjects.globalenv['sim_xy']
-    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
-    X = np.array(sim.rx2('x'))
-    y = np.array(sim.rx2('y'))
-    X_val = np.array(sim.rx2('xval'))
-    y_val = np.array(sim.rx2('yval'))
-    Sigma = np.array(sim.rx2('Sigma'))
-    beta = np.array(sim.rx2('beta'))
-    sigma = np.array(sim.rx2('sigma'))
-
-    return X, y, X_val, y_val, Sigma, beta, sigma
-
-def inference_approx(n=100, p=1000, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                     randomization_scale=np.sqrt(0.25), target="partial"):
-    while True:
-        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-        true_mean = X.dot(beta)
-
-        X -= X.mean(0)[None, :]
-        X /= (X.std(0)[None, :] * np.sqrt(n))
-
-        X_val -= X_val.mean(0)[None, :]
-        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
-
-        if p > n:
-            sigma_est = np.std(y)
-            print("sigma and sigma_est", sigma, sigma_est)
-        else:
-            ols_fit = sm.OLS(y, X).fit()
-            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-            print("sigma and sigma_est", sigma, sigma_est)
-
-        y = y - y.mean()
-        y /= sigma_est
-        y_val = y_val - y_val.mean()
-        y_val /= sigma_est
-        true_mean /= sigma_est
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1. / np.sqrt(n)
-        lam_seq = np.linspace(0.75, 2.75, num=100) * np.mean(
-            np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        err = np.zeros(100)
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M = np.identity(p)
-        for k in range(100):
-            lam = lam_seq[k]
-            W = np.ones(p) * lam
-            penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target,
-                                    randomization_scale=randomization_scale, sigma=1.)
-
-            active = M_est._overall
-            nactive = active.sum()
-            approx_MLE_est = np.zeros(p)
-            if nactive > 0:
-                M_est.solve_map()
-                approx_MLE = solve_UMVU(M_est.target_transform,
-                                        M_est.opt_transform,
-                                        M_est.target_observed,
-                                        M_est.feasible_point,
-                                        M_est.target_cov,
-                                        M_est.randomizer_precision)[0]
-                approx_MLE_est[active] = approx_MLE
-
-            err[k] = np.mean((y_val - X_val.dot(approx_MLE_est)) ** 2.)
-
-        lam = lam_seq[np.argmin(err)]
-        print('lambda', lam)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target,
-                                randomization_scale=randomization_scale, sigma=1.)
-        active = M_est._overall
-        nactive = np.sum(active)
-
-        print("number of variables selected by randomized LASSO", nactive)
-
-        if nactive > 0:
-            M_est.solve_map()
-            approx_MLE, var, mle_map, _, _, mle_transform = solve_UMVU(M_est.target_transform,
-                                                                       M_est.opt_transform,
-                                                                       M_est.target_observed,
-                                                                       M_est.feasible_point,
-                                                                       M_est.target_cov,
-                                                                       M_est.randomizer_precision)
-
-            approx_sd = np.sqrt(np.diag(var))
-
-            if nactive == 1:
-                approx_MLE = np.array([approx_MLE])
-                approx_sd = np.array([approx_sd])
-
-            coverage_sel = 0.
-            if target == "full":
-                true_target = np.linalg.pinv(X)[active].dot(true_mean)
-            if target == "partial":
-                true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
-            print("true target", true_target)
-
-            for j in range(nactive):
-                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
-                    coverage_sel += 1
-
-                print("selective intervals", sigma_est*(approx_MLE[j] - (1.65 * approx_sd[j])),
-                      sigma_est *(approx_MLE[j] + (1.65 * approx_sd[j])))
-
-            break
-
-    if True:
-        return coverage_sel/float(nactive), np.true_divide(approx_MLE- true_target, approx_sd)
-
-if __name__ == "__main__":
-
-    import matplotlib.pyplot as plt
-    ndraw = 100
-    coverage_sel = 0.
-    pivot_obs_info = []
-    for i in range(ndraw):
-        approx = inference_approx(n=500, p=2500, nval=500, rho=0.35, s=20, beta_type=1, snr=0.20, target="full")
-        if approx is not None:
-            coverage_sel += approx[0]
-            pivot = approx[1]
-            for j in range(pivot.shape[0]):
-                pivot_obs_info.append(pivot[j])
-
-        sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        #sys.stderr.write("pivot" + str(pivot_obs_info) + "\n")
-
-    stats.probplot(np.asarray(pivot_obs_info), dist="norm", plot=plt)
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/high_10_0.20_.png")
-
-
-
diff --git a/selection/adjusted_MLE/tests/mle_LASSO.py b/selection/adjusted_MLE/tests/mle_LASSO.py
deleted file mode 100644
index 65ceabf60..000000000
--- a/selection/adjusted_MLE/tests/mle_LASSO.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import print_function
-import sys
-
-import numpy as np
-import regreg.api as rr
-from selection.tests.instance import gaussian_instance
-from selection.approx_ci.ci_approx_density import approximate_conditional_density
-from selection.approx_ci.selection_map import M_estimator_map
-
-def test_approximate_MLE(X,
-                         y,
-                         true_mean,
-                         sigma,
-                         seed_n = 0,
-                         lam_frac = 1.,
-                         loss='gaussian',
-                         randomization_scale = 1.):
-    from selection.api import randomization
-
-    n, p = X.shape
-    np.random.seed(seed_n)
-    if loss == "gaussian":
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-        loss = rr.glm.gaussian(X, y)
-
-    epsilon = 1. / np.sqrt(n)
-
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    randomization = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-    M_est = M_estimator_map(loss, epsilon, penalty, randomization, randomization_scale=randomization_scale)
-
-    M_est.map_solve()
-    active = M_est._overall
-    active_set = np.asarray([i for i in range(p) if active[i]])
-    nactive = np.sum(active)
-    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
-    sys.stderr.write("Active set selected by lasso" + str(active_set) + "\n")
-    sys.stderr.write("Observed target" + str(M_est.target_observed) + "\n")
-
-    ci = approximate_conditional_density(M_est)
-    ci.solve_approx()
-    sel_MLE = np.zeros(nactive)
-
-    for j in range(nactive):
-        sel_MLE[j] = ci.approx_MLE_solver(j, step=1, nstep=150)[0]
-
-    return sel_MLE
-
-X, y, beta, nonzero, sigma = gaussian_instance(n=100, p=100, s=2, rho=0., signal=3., sigma=1.)
-true_mean = X.dot(beta)
-test = test_approximate_MLE(X,
-                            y,
-                            true_mean,
-                            sigma,
-                            seed_n = 0,
-                            lam_frac = 1.,
-                            loss='gaussian')
-print(test)
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/relaxed_lasso.py b/selection/adjusted_MLE/tests/relaxed_lasso.py
deleted file mode 100644
index 26c0feb40..000000000
--- a/selection/adjusted_MLE/tests/relaxed_lasso.py
+++ /dev/null
@@ -1,421 +0,0 @@
-from __future__ import print_function, division
-from scipy.stats import norm as ndist
-import numpy as np, sys
-
-import regreg.api as rr
-import statsmodels.api as sm
-
-# rpy2 imports
-
-from rpy2.robjects.packages import importr
-from rpy2 import robjects
-import rpy2.robjects.numpy2ri
-rpy2.robjects.numpy2ri.activate()
-
-from selection.randomized.api import randomization
-from selection.randomized.selective_MLE import selective_MLE as solve_selective_MLE
-from selection.adjusted_MLE.selective_MLE import M_estimator_map
-
-def glmnet_sigma(X, y):
-    robjects.r('''
-                glmnet_cv = function(X,y){
-                y = as.matrix(y)
-                X = as.matrix(X)
-                n = nrow(X)
-                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                lam_1se = out$lambda.1se
-                lam_min = out$lambda.min
-                return(list(lam_min = n * as.numeric(lam_min), lam_1se = n* as.numeric(lam_1se)))
-                }''')
-
-    lambda_cv_R = robjects.globalenv['glmnet_cv']
-    n, p = X.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-
-    lam = lambda_cv_R(r_X, r_y)
-    lam_min = np.array(lam.rx2('lam_min'))
-    lam_1se = np.array(lam.rx2('lam_1se'))
-    return lam_min, lam_1se
-
-
-def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
-    robjects.r('''
-    library(bestsubset) #source('~/best-subset/bestsubset/R/sim.R')
-
-    sim_xy = bestsubset::sim.xy
-    ''')
-
-    r_simulate = robjects.globalenv['sim_xy']
-    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
-    X = np.array(sim.rx2('x'))
-    y = np.array(sim.rx2('y'))
-    X_val = np.array(sim.rx2('xval'))
-    y_val = np.array(sim.rx2('yval'))
-    Sigma = np.array(sim.rx2('Sigma'))
-    beta = np.array(sim.rx2('beta'))
-    sigma = np.array(sim.rx2('sigma'))
-
-    return X, y, X_val, y_val, Sigma, beta, sigma
-
-def tuned_lasso(X, y, X_val,y_val):
-    robjects.r('''
-        #source('~/best-subset/bestsubset/R/lasso.R')
-        tuned_lasso_estimator = function(X,Y,X.val,Y.val){
-        Y = as.matrix(Y)
-        X = as.matrix(X)
-        Y.val = as.vector(Y.val)
-        X.val = as.matrix(X.val)
-        rel.LASSO = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
-        LASSO = lasso(X,Y,intercept=FALSE,nlam=50)
-        beta.hat.rellasso = as.matrix(coef(rel.LASSO))
-        beta.hat.lasso = as.matrix(coef(LASSO))
-        min.lam = min(rel.LASSO$lambda)
-        max.lam = max(rel.LASSO$lambda)
-        lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
-        muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
-        muhat.val.lasso = as.matrix(predict(LASSO, X.val))
-        err.val.rellasso = colMeans((muhat.val.rellasso - Y.val)^2)
-        err.val.lasso = colMeans((muhat.val.lasso - Y.val)^2)
-        opt_lam = ceiling(which.min(err.val.rellasso)/10)
-        lambda.tuned = lam.seq[opt_lam]
-        return(list(beta.hat.rellasso = beta.hat.rellasso[,which.min(err.val.rellasso)],
-        beta.hat.lasso = beta.hat.lasso[,which.min(err.val.lasso)],
-        lambda.tuned = lambda.tuned, lambda.seq = lam.seq))
-        }''')
-
-    r_lasso = robjects.globalenv['tuned_lasso_estimator']
-
-    n, p = X.shape
-    nval, _ = X_val.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-    r_X_val = robjects.r.matrix(X_val, nrow=nval, ncol=p)
-    r_y_val = robjects.r.matrix(y_val, nrow=nval, ncol=1)
-
-    tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
-    estimator_rellasso = np.array(tuned_est.rx2('beta.hat.rellasso'))
-    estimator_lasso = np.array(tuned_est.rx2('beta.hat.lasso'))
-    lam_tuned = np.array(tuned_est.rx2('lambda.tuned'))
-    lam_seq = np.array(tuned_est.rx2('lambda.seq'))
-    return estimator_rellasso, estimator_lasso, lam_tuned, lam_seq
-
-def relative_risk(est, truth, Sigma):
-
-    return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
-
-def inference_approx(n=500, p=100, nval=100, rho=0.35, s=5, beta_type=2, snr=0.2,
-                         randomization_scale=np.sqrt(0.25), target="partial"):
-
-    while True:
-        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-        true_mean = X.dot(beta)
-        rel_LASSO, est_LASSO, lam_tuned, lam_seq = tuned_lasso(X, y, X_val, y_val)
-        active_nonrand = (rel_LASSO != 0)
-        nactive_nonrand = active_nonrand.sum()
-
-        X -= X.mean(0)[None, :]
-        X /= (X.std(0)[None, :] * np.sqrt(n))
-
-        X_val -= X_val.mean(0)[None, :]
-        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
-
-        if p > n:
-            sigma_est = np.std(y)
-            print("sigma and sigma_est", sigma, sigma_est)
-        else:
-            ols_fit = sm.OLS(y, X).fit()
-            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-            print("sigma and sigma_est", sigma, sigma_est)
-
-        if target == "debiased":
-            M = np.linalg.inv(Sigma)
-        else:
-            M = np.identity(p)
-
-        y = y - y.mean()
-        y /= sigma_est
-        y_val = y_val - y_val.mean()
-        y_val /= sigma_est
-        true_mean /= sigma_est
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1. / np.sqrt(n)
-        lam_seq = np.linspace(0.75, 2.75, num=100) * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-
-        err = np.zeros(100)
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        for k in range(100):
-            lam = lam_seq[k]
-            W = np.ones(p) * lam
-            penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-            M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target, randomization_scale=randomization_scale, sigma=1.)
-
-            active = M_est._overall
-            nactive = active.sum()
-            approx_MLE_est = np.zeros(p)
-            if nactive>0:
-                M_est.solve_map()
-                approx_MLE = solve_selective_MLE(M_est.target_observed,
-                                                 M_est.target_cov,
-                                                 M_est.target_transform,
-                                                 M_est.opt_transform,
-                                                 M_est.feasible_point,
-                                                 M_est.randomizer_precision)[0]
-                approx_MLE_est[active] = approx_MLE
-
-            err[k] = np.mean((y_val - X_val.dot(approx_MLE_est)) ** 2.)
-
-        lam = lam_seq[np.argmin(err)]
-        print('lambda', lam)
-
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p), weights=dict(zip(np.arange(p), W)), lagrange=1.)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M, target=target,
-                                randomization_scale=randomization_scale,sigma=1.)
-        active = M_est._overall
-        nactive = np.sum(active)
-
-        print("number of variables selected by randomized LASSO", nactive)
-        print("number of variables selected by tuned LASSO", (rel_LASSO != 0).sum())
-        true_signals = np.zeros(p, np.bool)
-        true_signals[beta != 0] = 1
-        screened_randomized = np.logical_and(active, true_signals).sum() / float(s)
-        screened_nonrandomized = np.logical_and(active_nonrand, true_signals).sum() / float(s)
-        false_positive_randomized = np.logical_and(active, ~true_signals).sum() / max(float(nactive), 1.)
-        false_positive_nonrandomized = np.logical_and(active_nonrand, ~true_signals).sum() / max(float(nactive_nonrand),
-                                                                                                 1.)
-
-        true_set = np.asarray([u for u in range(p) if true_signals[u]])
-        active_set = np.asarray([t for t in range(p) if active[t]])
-        active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
-        active_bool = np.zeros(nactive, np.bool)
-        for x in range(nactive):
-            active_bool[x] = (np.in1d(active_set[x], true_set).sum() > 0)
-        active_bool_nonrand = np.zeros(nactive_nonrand, np.bool)
-        for w in range(nactive_nonrand):
-            active_bool_nonrand[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
-
-        if target == "partial":
-            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(true_mean)
-            unad_sd =  np.sqrt(np.diag(np.linalg.inv(X[:, active].T.dot(X[:, active]))))
-            true_target_nonrand = np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])). \
-                dot(X[:, active_nonrand].T).dot(true_mean)
-            unad_sd_nonrand = np.sqrt(np.diag(np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand]))))
-        elif target == "full":
-            X_full_inv = np.linalg.pinv(X)
-            true_target = X_full_inv[active].dot(true_mean)
-            unad_sd = np.sqrt(np.diag(X_full_inv[active].dot(X_full_inv[active].T)))
-            true_target_nonrand = X_full_inv[active_nonrand].dot(true_mean)
-            unad_sd_nonrand = np.sqrt(np.diag(X_full_inv[active_nonrand].dot(X_full_inv[active_nonrand].T)))
-        elif target == "debiased":
-            X_full_inv = M.dot(X.T)
-            true_target = X_full_inv[active].dot(true_mean)
-            unad_sd = np.sqrt(np.diag(X_full_inv[active].dot(X_full_inv[active].T)))
-            true_target_nonrand = X_full_inv[active_nonrand].dot(true_mean)
-            unad_sd_nonrand = np.sqrt(np.diag(X_full_inv[active_nonrand].dot(X_full_inv[active_nonrand].T)))
-
-        coverage_sel = 0.
-        coverage_rand = 0.
-        coverage_nonrand = 0.
-
-        power_sel = 0.
-        power_rand = 0.
-        power_nonrand = 0.
-
-        for k in range(nactive_nonrand):
-            if ((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])) <= true_target_nonrand[k] \
-                    and ((np.sqrt(n)*rel_LASSO[k]/sigma_est) + (1.65 * unad_sd_nonrand[k])) >= true_target_nonrand[k]:
-                coverage_nonrand += 1
-            if active_bool_nonrand[k] == True and (((np.sqrt(n)*rel_LASSO[k]/sigma_est) - (1.65 * unad_sd_nonrand[k])) > 0.
-                                                   or ((np.sqrt(n)*rel_LASSO[k]/sigma_est) + (1.65 * unad_sd_nonrand[k])) < 0.):
-                power_nonrand += 1
-
-        if nactive > 0:
-            M_est.solve_map()
-            approx_MLE, var, mle_map, _, _, mle_transform = solve_selective_MLE(M_est.target_observed,
-                                                                                M_est.target_cov,
-                                                                                M_est.target_transform,
-                                                                                M_est.opt_transform,
-                                                                                M_est.feasible_point,
-                                                                                M_est.randomizer_precision)
-
-            mle_target_lin, mle_soln_lin, mle_offset = mle_transform
-            approx_sd = np.sqrt(np.diag(var))
-
-            if nactive == 1:
-                approx_MLE = np.array([approx_MLE])
-                approx_sd = np.array([approx_sd])
-
-            for j in range(nactive):
-                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and \
-                                (approx_MLE[j] + (1.65 * approx_sd[j])) >= true_target[j]:
-                    coverage_sel += 1
-                print("selective intervals",sigma_est* (approx_MLE[j] - (1.65 * approx_sd[j])),
-                      sigma_est* (approx_MLE[j] + (1.65 * approx_sd[j])))
-
-                if active_bool[j] == True and (
-                                (approx_MLE[j] - (1.65 * approx_sd[j])) > 0. or (
-                            approx_MLE[j] + (1.65 * approx_sd[j])) < 0.):
-                    power_sel += 1
-
-                if (M_est.target_observed[j] - (1.65 * unad_sd[j])) <= true_target[j] and (
-                            M_est.target_observed[j] + (1.65 * unad_sd[j])) >= true_target[j]:
-                    coverage_rand += 1
-                print("randomized intervals", sigma_est* (M_est.target_observed[j] - (1.65 * unad_sd[j])),
-                      sigma_est* (M_est.target_observed[j] + (1.65 * unad_sd[j])))
-
-                if active_bool[j] == True and ((M_est.target_observed[j] - (1.65 * unad_sd[j])) > 0. or (
-                            M_est.target_observed[j] + (1.65 * unad_sd[j])) < 0.):
-                    power_rand += 1
-
-            break
-
-    target_par = beta
-
-    ind_est = np.zeros(p)
-    ind_est[active] = (mle_target_lin.dot(M_est.target_observed) +
-                                         mle_soln_lin.dot(M_est.observed_opt_state[:nactive]) + mle_offset)
-    partial_ind_est = ind_est[active]
-    ind_est /= (np.sqrt(n)*(1./sigma_est))
-
-    relaxed_Lasso = np.zeros(p)
-    relaxed_Lasso[active] = M_est.target_observed / (np.sqrt(n)*(1./sigma_est))
-    partial_relaxed_Lasso = M_est.target_observed
-
-    Lasso_est = np.zeros(p)
-    Lasso_est[active] = M_est.observed_opt_state[:nactive] / (np.sqrt(n)*(1./sigma_est))
-    partial_Lasso_est = M_est.observed_opt_state[:nactive]
-
-    selective_MLE = np.zeros(p)
-
-    selective_MLE[active] = approx_MLE / (np.sqrt(n)*(1./sigma_est))
-    partial_selective_MLE = approx_MLE
-
-    partial_Sigma = (Sigma[:, active])[active,:]
-    partial_Sigma_nonrand = (Sigma[:, active_nonrand])[active_nonrand,:]
-
-    if True:
-        return (selective_MLE - target_par).sum() / float(nactive), \
-               relative_risk(selective_MLE, target_par, Sigma), \
-               relative_risk(relaxed_Lasso, target_par, Sigma), \
-               relative_risk(ind_est, target_par, Sigma), \
-               relative_risk(Lasso_est, target_par, Sigma), \
-               relative_risk(rel_LASSO, target_par, Sigma), \
-               relative_risk(est_LASSO, target_par, Sigma), \
-               screened_randomized, \
-               screened_nonrandomized, \
-               false_positive_randomized, \
-               false_positive_nonrandomized, \
-               coverage_sel / max(float(nactive), 1.), \
-               coverage_rand / max(float(nactive), 1.), \
-               coverage_nonrand / max(float(nactive_nonrand), 1.), \
-               power_sel / float(s), \
-               power_rand / float(s), \
-               power_nonrand / float(s), \
-               relative_risk(partial_selective_MLE, true_target, partial_Sigma), \
-               relative_risk(partial_relaxed_Lasso, true_target, partial_Sigma), \
-               relative_risk(partial_ind_est, true_target, partial_Sigma), \
-               relative_risk(partial_Lasso_est, true_target, partial_Sigma), \
-               relative_risk(np.sqrt(n) * rel_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand), \
-               relative_risk(np.sqrt(n) * est_LASSO[active_nonrand], true_target_nonrand, partial_Sigma_nonrand)
-
-
-if __name__ == "__main__":
-
-    ndraw = 150
-    bias = 0.
-    risk_selMLE = 0.
-    risk_relLASSO = 0.
-    risk_indest = 0.
-    risk_LASSO = 0.
-    risk_relLASSO_nonrand = 0.
-    risk_LASSO_nonrand = 0.
-    spower_rand = 0.
-    spower_nonrand = 0.
-    false_positive_randomized = 0.
-    false_positive_nonrandomized = 0.
-    coverage_sel = 0.
-    coverage_rand = 0.
-    coverage_nonrand = 0.
-    power_sel = 0.
-    power_rand = 0.
-    power_nonrand = 0.
-    partial_risk_selMLE = 0.
-    partial_risk_relLASSO = 0.
-    partial_risk_indest = 0.
-    partial_risk_LASSO = 0.
-    partial_risk_relLASSO_nonrand = 0.
-    partial_risk_LASSO_nonrand = 0.
-
-    count = 0
-    for i in range(ndraw):
-        approx = inference_approx(n=200, p=1000, nval=200, rho=0.70, s=10, beta_type=2, snr=0.20, target="full")
-
-        if approx is not None:
-            bias += approx[0]
-            risk_selMLE += approx[1]
-            risk_relLASSO += approx[2]
-            risk_indest += approx[3]
-            risk_LASSO += approx[4]
-            risk_relLASSO_nonrand += approx[5]
-            risk_LASSO_nonrand += approx[6]
-
-            spower_rand += approx[7]
-            spower_nonrand += approx[8]
-            false_positive_randomized += approx[9]
-            false_positive_nonrandomized += approx[10]
-
-            coverage_sel += approx[11]
-            coverage_rand += approx[12]
-            coverage_nonrand += approx[13]
-
-            power_sel += approx[14]
-            power_rand += approx[15]
-            power_nonrand += approx[16]
-
-            partial_risk_selMLE += approx[17]
-            partial_risk_relLASSO += approx[18]
-            partial_risk_indest += approx[19]
-            partial_risk_LASSO += approx[20]
-            partial_risk_relLASSO_nonrand += approx[21]
-            partial_risk_LASSO_nonrand += approx[22]
-
-        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
-        sys.stderr.write("overall_selrisk" + str(risk_selMLE / float(i + 1)) + "\n")
-        sys.stderr.write("overall_relLASSOrisk" + str(risk_relLASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_indepestrisk" + str(risk_indest / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSOrisk" + str(risk_LASSO / float(i + 1)) + "\n")
-        sys.stderr.write("overall_relLASSOrisk_norand" + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall_LASSOrisk_norand" + str(risk_LASSO_nonrand / float(i + 1)) + "\n"+"\n")
-
-        # sys.stderr.write("overall_LASSO_rand_spower" + str(spower_rand / float(i + 1)) + "\n")
-        # sys.stderr.write("overall_LASSO_norand_spower" + str(spower_nonrand / float(i + 1)) + "\n")
-        # sys.stderr.write("overall_LASSO_rand_falsepositives" + str(false_positive_randomized / float(i + 1)) + "\n")
-        # sys.stderr.write("overall_LASSO_norand_falsepositives" + str(false_positive_nonrandomized / float(i + 1)) + "\n"+"\n")
-
-        sys.stderr.write("selective coverage" + str(coverage_sel / float(i + 1)) + "\n")
-        sys.stderr.write("randomized coverage" + str(coverage_rand / float(i + 1)) + "\n")
-        sys.stderr.write("nonrandomized coverage" + str(coverage_nonrand / float(i + 1)) + "\n"+"\n")
-
-        sys.stderr.write("selective power" + str(power_sel / float(i + 1)) + "\n")
-        sys.stderr.write("randomized power" + str(power_rand / float(i + 1)) + "\n")
-        sys.stderr.write("nonrandomized power" + str(power_nonrand / float(i + 1)) + "\n"+"\n")
-
-        # sys.stderr.write("overall_partial_selrisk" + str(partial_risk_selMLE / float(i + 1)) + "\n")
-        # sys.stderr.write("overall_partial_relLASSOrisk" + str(partial_risk_relLASSO / float(i + 1)) + "\n")
-        # sys.stderr.write("overall_partial_indepestrisk" + str(partial_risk_indest / float(i + 1)) + "\n")
-        # sys.stderr.write("overall_partial_LASSOrisk" + str(partial_risk_LASSO / float(i + 1)) + "\n")
-        # sys.stderr.write("overall_partial_relLASSOrisk_norand" + str(partial_risk_relLASSO_nonrand / float(i + 1)) + "\n")
-        # sys.stderr.write("overall_partial_LASSOrisk_norand" + str(partial_risk_LASSO_nonrand / float(i + 1)) + "\n"+ "\n")
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-
-
-
-
-
-
-
-
-
diff --git a/selection/adjusted_MLE/tests/test_BH.py b/selection/adjusted_MLE/tests/test_BH.py
deleted file mode 100644
index 1fb86722b..000000000
--- a/selection/adjusted_MLE/tests/test_BH.py
+++ /dev/null
@@ -1,169 +0,0 @@
-from __future__ import print_function
-import numpy as np, sys
-
-import regreg.api as rr
-from scipy.stats import norm as ndist
-from selection.randomized.api import randomization
-from selection.tests.instance import gaussian_instance
-from selection.adjusted_MLE.selective_MLE import solve_UMVU
-from statsmodels.distributions.empirical_distribution import ECDF
-
-def BH_selection(p_values, level):
-
-    m = p_values.shape[0]
-    p_sorted = np.sort(p_values)
-    indices = np.arange(m)
-    indices_order = np.argsort(p_values)
-    order_sig = np.max(indices[p_sorted - np.true_divide(level * (np.arange(m) + 1.), m) <= 0])
-    E_sel = indices_order[:(order_sig+1)]
-
-    active = np.zeros(m, np.bool)
-    active[E_sel] = 1
-    return order_sig+1, active, p_values[indices_order[order_sig+1]]
-
-def orthogonal_BH_approx(n=100, s=3, signal=3, randomization_scale=1., sigma = 1., level=0.10):
-
-    while True:
-        beta = np.zeros(n)
-
-        signal = np.atleast_1d(signal)
-        if signal.shape == (1,):
-            beta[:s] = signal[0] * (1 + np.fabs(np.random.standard_normal(s)))
-        else:
-            beta[:s] = np.linspace(signal[0], signal[1], s)
-
-        y = sigma * (beta + np.random.standard_normal(n))
-        omega = randomization_scale * np.random.standard_normal(n)
-
-        p_values = 2.*(1. - ndist.cdf(np.abs(y+omega)/np.sqrt(1.+ randomization_scale**2.)))
-        K, active, p_threshold = BH_selection(p_values, level)
-
-        threshold = np.sqrt(1.+ randomization_scale**2.)*ndist.ppf(1.-np.max((K*level)/n, p_threshold))
-        target_observed = y[active]
-        target_transform = (-np.identity(K), np.zeros(K))
-        s = np.sign(target_observed + omega[active])
-        opt_transform = (np.identity(K)*s[None, :], threshold*s*np.ones(K))
-        nactive = np.sum(active)
-        feasible_point= np.ones(nactive)
-
-        if nactive >0:
-            true_target = beta[active]
-            print("true_target", true_target)
-            approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
-                                                         opt_transform,
-                                                         target_observed,
-                                                         feasible_point,
-                                                         sigma*np.identity(nactive),
-                                                         randomization_scale*np.identity(nactive))
-
-            print("approx sd", np.sqrt(np.diag(var)))
-            break
-
-    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
-
-
-def BH_approx(n=100, p=50, s=5, signal=5., randomization_scale=1., sigma=1., level=0.10):
-
-    while True:
-
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=sigma,
-                                                       random_signs=False, equicorrelated=False)
-
-        omega = randomization_scale * np.random.standard_normal(p)
-        p_values = 2.*(1. - ndist.cdf(np.abs(X.T.dot(y)+omega)/np.sqrt(1.+ randomization_scale**2.)))
-        K, active, p_threshold = BH_selection(p_values, level)
-        nactive = active.sum()
-
-        if nactive >0:
-
-            threshold = np.sqrt(1. + randomization_scale ** 2.) * ndist.ppf(1.-max((K*level)/n, p_threshold))
-
-            X_active_inv = np.linalg.inv(X[:, active].T.dot(X[:, active]))
-            projection_perp = np.identity(n) - X[:, active].dot(X_active_inv).dot(X[:, active].T)
-            observed_score_state = np.hstack(
-                [np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(y),
-                 X[:, ~active].T.dot(projection_perp).dot(y)])
-            target_observed = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(y)
-            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-            active_signs = np.sign(X[:, active].T.dot(y) + omega[active])
-
-            _opt_linear_term = np.vstack([np.diag(active_signs), np.zeros((p - nactive,nactive))])
-            _opt_affine_term = np.concatenate([threshold * active_signs, X[:, ~active].T.dot(y) + omega[~active]])
-            opt_transform = (_opt_linear_term, _opt_affine_term)
-
-            _score_linear_term = np.zeros((p, p))
-            _score_linear_term[:nactive, :nactive] = -X[:, active].T.dot(X[:, active])
-            _score_linear_term[nactive:, :nactive] = -X[:, ~active].T.dot(X[:, active])
-            _score_linear_term[nactive:, nactive:] = -np.identity(p - nactive)
-
-            score_cov = np.zeros((p, p))
-            score_cov[:nactive, :nactive] = X_active_inv
-            score_cov[nactive:, nactive:] = X[:, ~active].T.dot(projection_perp).dot(X[:, ~active])
-            score_target_cov = score_cov[:, :nactive]
-            target_cov = score_cov[:nactive, :nactive]
-
-            A = np.dot(_score_linear_term, score_target_cov).dot(np.linalg.inv(target_cov))
-            data_offset = _score_linear_term.dot(observed_score_state) - A.dot(target_observed)
-            target_transform = (A, data_offset)
-
-            feasible_point = np.ones(nactive)
-
-            approx_MLE, value, var, mle_map = solve_UMVU(target_transform,
-                                                         opt_transform,
-                                                         target_observed,
-                                                         feasible_point,
-                                                         sigma*np.identity(nactive),
-                                                         randomization_scale*np.identity(p))
-
-            #print("approx sd", np.sqrt(np.diag(var)))
-            break
-
-    return np.true_divide((approx_MLE - true_target),np.sqrt(np.diag(var))), (approx_MLE - true_target).sum() / float(nactive)
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 500
-#     bias = 0.
-#     pivot_obs_info= []
-#     for i in range(ndraw):
-#         approx = orthogonal_BH_approx(n=100, s=20, signal=2.5, randomization_scale=1., sigma = 1., level=0.10)
-#         if approx is not None:
-#             pivot = approx[0]
-#             bias += approx[1]
-#             print("bias in iteration", approx[1])
-#             pivot_obs_info.extend(pivot)
-#
-#         sys.stderr.write("iteration completed" + str(i) + "\n")
-#         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
-#
-#     plt.clf()
-#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-#     grid = np.linspace(0, 1, 101)
-#     plt.plot(grid, ecdf(grid), c='red', marker='^')
-#     plt.plot(grid, grid, 'k--')
-#     plt.show()
-
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 500
-    bias = 0.
-    pivot_obs_info= []
-    for i in range(ndraw):
-        approx = BH_approx(n=1000, p=2000, s=100, signal=3.5, randomization_scale=1., sigma=1., level=0.10)
-        if approx is not None:
-            pivot = approx[0]
-            bias += approx[1]
-            print("bias in iteration", approx[1])
-            pivot_obs_info.extend(pivot)
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
-
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    grid = np.linspace(0, 1, 101)
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_MLE.py b/selection/adjusted_MLE/tests/test_MLE.py
deleted file mode 100644
index c98f13a38..000000000
--- a/selection/adjusted_MLE/tests/test_MLE.py
+++ /dev/null
@@ -1,253 +0,0 @@
-from __future__ import print_function
-import numpy as np, sys
-
-import regreg.api as rr
-from selection.tests.instance import gaussian_instance
-from scipy.stats import norm as ndist
-from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from statsmodels.distributions.empirical_distribution import ECDF
-
-
-def test_lasso(n=100, p=50, s=5, signal=5., B= 500, seed_n = 0, lam_frac=1., randomization_scale=1.):
-
-    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
-    n, p = X.shape
-    if p>1:
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-    else:
-        lam = 2.
-
-    loss = rr.glm.gaussian(X, y)
-    epsilon = 1. / np.sqrt(n)
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
-
-    M_est.solve_map()
-    active = M_est._overall
-
-    true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-    # true_target = beta[active]
-    nactive = np.sum(active)
-    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
-    if nactive > 0:
-
-        approx_MLE, var, mle_map, _, _ = solve_UMVU(M_est.target_transform,
-                                                    M_est.opt_transform,
-                                                    M_est.target_observed,
-                                                    M_est.feasible_point,
-                                                    M_est.target_cov,
-                                                    M_est.randomizer_precision)
-
-        boot_sample = np.zeros((B, nactive))
-        beta_obs = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(y)
-        resid = y - X[:, active].dot(beta_obs)
-        for b in range(B):
-            boot_indices = np.random.choice(n, n, replace=True)
-            boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
-            target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + beta_obs
-            boot_sample[b, :] = mle_map(target_boot)[0]
-
-        print("estimated sd", boot_sample.std(0))
-        return np.true_divide((approx_MLE - true_target), boot_sample.std(0)),\
-               ((approx_MLE - true_target).sum()) / float(nactive)
-
-    else:
-        return None
-
-def test_lasso_approx_var(n=100, p=50, s=5, signal=5., lam_frac=1., randomization_scale=1.):
-
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.70, signal=signal, sigma=1.,
-                                                       random_signs=True, equicorrelated=False)
-        n, p = X.shape
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-        nactive = np.sum(active)
-        coverage = np.zeros(nactive)
-
-        if nactive > 0:
-
-            approx_MLE, var, mle_map, _, _, _ = solve_UMVU(M_est.target_transform,
-                                                           M_est.opt_transform,
-                                                           M_est.target_observed,
-                                                           M_est.feasible_point,
-                                                           M_est.target_cov,
-                                                           M_est.randomizer_precision)
-
-
-            print("approx sd", np.sqrt(np.diag(var)))
-            approx_sd = np.sqrt(np.diag(var))
-            print("approx sd", approx_sd)
-            for j in range(nactive):
-                if (approx_MLE[j] - (1.65 * approx_sd[j])) <= true_target[j] and true_target[j]<= (approx_MLE[j] + (1.65 * approx_sd[j])):
-                    coverage[j] += 1
-            break
-
-    return np.true_divide((approx_MLE - true_target),approx_sd), (approx_MLE - true_target).sum()/float(nactive), \
-           coverage.sum()/float(nactive)
-
-def orthogonal_lasso_approx(n=100, p=5, s=3, signal=3, lam_frac=1., randomization_scale=1., sigma = 1.):
-
-    while True:
-        beta = np.zeros(p)
-
-        signal = np.atleast_1d(signal)
-        if signal.shape == (1,):
-            beta[:s] = signal[0] * (1 + np.fabs(np.random.standard_normal(s)))
-        else:
-            beta[:s] = np.linspace(signal[0], signal[1], s)
-
-        X = np.linalg.svd(np.random.standard_normal((n,p)))[0][:,:p]
-
-        y = sigma * (X.dot(beta) + np.random.standard_normal(n))
-
-        lam = sigma * lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        loss = rr.glm.gaussian(X, y)
-        epsilon = sigma / np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        nactive = np.sum(active)
-        print('nactive', nactive)
-        coverage = np.zeros(nactive)
-        if nactive >0:
-            true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-            print("true_target", true_target)
-            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                  M_est.opt_transform,
-                                                  M_est.target_observed,
-                                                  M_est.feasible_point,
-                                                  M_est.target_cov,
-                                                  M_est.randomizer_precision)
-
-            approx_sd = np.sqrt(np.diag(var))
-            print("approx sd", approx_sd)
-            for j in range(nactive):
-                if (approx_MLE[j]-(1.65*approx_sd[j]))<= true_target[j] and (approx_MLE[j] + (1.65*approx_sd[j])) >= true_target[j]:
-                    coverage[j] += 1
-            break
-
-    return np.true_divide((approx_MLE - true_target),approx_sd), (approx_MLE - true_target).sum()/float(nactive), \
-           coverage.sum()/float(nactive)
-
-def test_bias_lasso(nsim=2000):
-    bias = 0
-    for _ in range(nsim):
-        bias += test_lasso(n=100, p=50, s=5, signal=2.5, seed_n=0, lam_frac=1., randomization_scale=1.)[0]
-
-    print(bias / nsim)
-
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 100
-#     boot_pivot = []
-#     bias = 0.
-#     for i in range(ndraw):
-#         boot = test_lasso(n=300, p=1, s=1, signal=5., B=1000, seed_n=i)
-#         if boot is not None:
-#             pivot = boot[0]
-#             bias += boot[1]
-#             for j in range(pivot.shape[0]):
-#                 boot_pivot.append(pivot[j])
-#
-#         sys.stderr.write("iteration completed" + str(i) + "\n")
-#         sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
-#         if i % 10 == 0:
-#             plt.clf()
-#             ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
-#             grid = np.linspace(0, 1, 101)
-#             print("ecdf", ecdf(grid))
-#             plt.plot(grid, ecdf(grid), c='red', marker='^')
-#             plt.plot(grid, grid, 'k--')
-
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 500
-    bias = 0.
-    pivot_obs_info= []
-    coverage = 0.
-    for i in range(ndraw):
-        approx = test_lasso_approx_var(n=500, p=100, s=5, signal=3.)
-        if approx is not None:
-            pivot = approx[0]
-            bias += approx[1]
-            coverage += approx[2]
-            #for j in range(pivot.shape[0]):
-            #    pivot_obs_info.append(pivot[j])
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        if i % 10 == 0:
-            plt.clf()
-            ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-            grid = np.linspace(0, 1, 101)
-            print("ecdf", ecdf(grid))
-            plt.plot(grid, ecdf(grid), c='red', marker='^')
-            plt.plot(grid, grid, 'k--')
-            plt.savefig("approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
-
-        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
-        sys.stderr.write("coverage so far" + str(coverage / float(i + 1)) + "\n")
-
-    # plt.clf()
-    # ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    # grid = np.linspace(0, 1, 101)
-    # print("ecdf", ecdf(grid))
-    # plt.plot(grid, ecdf(grid), c='red', marker='^')
-    # plt.plot(grid, grid, 'k--')
-    # #plt.show()
-    # plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p4000_n5000_amp_3.5.png")
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 500
-#     bias = 0.
-#     pivot_obs_info= []
-#     for i in range(ndraw):
-#         approx = orthogonal_lasso_approx(n=300, p=20, s=5, signal=2.8, lam_frac=0.8)
-#         if approx is not None:
-#             pivot = approx[0]
-#             bias += approx[1]
-#             print("bias in iteration", approx[1])
-#             pivot_obs_info.extend(pivot)
-#
-#         sys.stderr.write("iteration completed" + str(i) + "\n")
-#         sys.stderr.write("overall_bias" + str(bias / float(i)) + "\n")
-#
-#     plt.clf()
-#     ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-#     grid = np.linspace(0, 1, 101)
-#     print("ecdf", ecdf(grid))
-#     plt.plot(grid, ecdf(grid), c='red', marker='^')
-#     plt.plot(grid, grid, 'k--')
-#     plt.show()
-#     #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p5_amp5.png")
->>>>>>> 627a7179dff61c0037e2a1ccb248fd2f262393cc
diff --git a/selection/adjusted_MLE/tests/test_MLE_boot.py b/selection/adjusted_MLE/tests/test_MLE_boot.py
deleted file mode 100644
index ca0d4b825..000000000
--- a/selection/adjusted_MLE/tests/test_MLE_boot.py
+++ /dev/null
@@ -1,217 +0,0 @@
-from __future__ import print_function
-import numpy as np, sys
-
-import regreg.api as rr
-from selection.tests.instance import gaussian_instance
-from scipy.stats import norm as ndist
-from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from statsmodels.distributions.empirical_distribution import ECDF
-from rpy2.robjects.packages import importr
-from rpy2 import robjects
-from scipy.stats import t as tdist
-import statsmodels.api as sm
-
-glmnet = importr('glmnet')
-import rpy2.robjects.numpy2ri
-
-rpy2.robjects.numpy2ri.activate()
-
-def glmnet_sigma(X, y):
-    robjects.r('''
-                glmnet_cv = function(X,y){
-                y = as.matrix(y)
-                X = as.matrix(X)
-
-                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                lam_1se = out$lambda.1se
-                return(lam_1se)
-                }''')
-
-    try:
-        lambda_cv_R = robjects.globalenv['glmnet_cv']
-        n, p = X.shape
-        r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-        r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-
-        lam_1se = lambda_cv_R(r_X, r_y)
-        return lam_1se*n
-    except:
-        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-
-def boot_lasso_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=0.7, sigma= 1.):
-
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=sigma,
-                                                       random_signs=True, equicorrelated=False)
-        n, p = X.shape
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-        nactive = np.sum(active)
-
-        if nactive > 0:
-            approx_MLE, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                  M_est.opt_transform,
-                                                  M_est.target_observed,
-                                                  M_est.feasible_point,
-                                                  M_est.target_cov,
-                                                  M_est.randomizer_precision)
-
-            boot_sample = np.zeros((B, nactive))
-            resid = y - X[:, active].dot(M_est.target_observed)
-            for b in range(B):
-                boot_indices = np.random.choice(n, n, replace=True)
-                boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
-                target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
-                boot_sample[b, :] = mle_map(target_boot)[0]
-
-            print("estimated sd", boot_sample.std(0), np.sqrt(np.diag(var)))
-            return np.true_divide((approx_MLE - true_target), boot_sample.std(0)), \
-                   ((approx_MLE - true_target).sum()) / float(nactive), \
-                   np.true_divide((approx_MLE - true_target), np.sqrt(np.diag(var)))
-
-            break
-
-def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=np.sqrt(0.25),
-                          sigma= 1.):
-
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.35, signal=signal, sigma=sigma,
-                                                       random_signs=True, equicorrelated=False)
-        n, p = X.shape
-
-        if p>n:
-            sigma_est = np.std(y)/2.
-            print("sigma est", sigma_est)
-        else:
-            ols_fit = sm.OLS(y, X).fit()
-            sigma_est = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1.)
-            print("sigma est", sigma_est)
-
-        #lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        lam = glmnet_sigma(X, y)
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, M= np.identity(p), target="partial", randomization_scale=randomization_scale, sigma=1.)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-        nactive = np.sum(active)
-        print("number of variables selected by randomized LASSO", nactive)
-
-        coverage = np.zeros(nactive)
-
-        if nactive > 0:
-            approx_MLE, var, mle_map, _, _, _ = solve_UMVU(M_est.target_transform,
-                                                           M_est.opt_transform,
-                                                           M_est.target_observed,
-                                                           M_est.feasible_point,
-                                                           M_est.target_cov,
-                                                           M_est.randomizer_precision)
-
-            boot_pivot = np.zeros((B, nactive))
-            resid = y - X[:, active].dot(M_est.target_observed)
-            for b in range(B):
-                boot_indices = np.random.choice(n, n, replace=True)
-                boot_vector = (X[boot_indices, :][:, active]).T.dot(resid[boot_indices])
-                target_boot = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(boot_vector) + M_est.target_observed
-                boot_mle = mle_map(target_boot)
-                #print("boot mle", boot_mle[0], approx_MLE)
-                boot_pivot[b, :] = np.true_divide(boot_mle[0]- approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-                #sys.stderr.write("bootstrap sample" + str(b) + "\n")
-
-            boot_std = boot_pivot.std(0)
-            for j in range(nactive):
-                if (approx_MLE[j] - (1.65 * boot_std[j])) <= true_target[j] and true_target[j] <= (approx_MLE[j] + (1.65 * boot_std[j])):
-                    coverage[j] += 1
-                print("intervals", (approx_MLE[j] - (1.65 * boot_std[j])), (approx_MLE[j] + (1.65 * boot_std[j])))
-            break
-
-    return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0), \
-           np.true_divide(approx_MLE - true_target, boot_pivot.std(0)), (approx_MLE - true_target).sum() / float(nactive),\
-           coverage.sum() / float(nactive)
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 100
-#     bias = 0.
-#     pivot_obs_info= []
-#     pivot_bootstrap = []
-#     for i in range(ndraw):
-#         approx = boot_lasso_approx_var(n=300, p=50, s=5, signal=3.5)
-#         if approx is not None:
-#             pivot_boot = approx[0]
-#             pivot_approx_info = approx[2]
-#             bias += approx[1]
-#             for j in range(pivot_boot.shape[0]):
-#                 pivot_obs_info.append(pivot_approx_info[j])
-#                 pivot_bootstrap.append(pivot_boot[j])
-#
-#         sys.stderr.write("iteration completed" + str(i) + "\n")
-#         sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
-#         #print("pivots", pivot_approx_info, pivot_boot)
-#
-#     #if i % 10 == 0:
-#     plt.clf()
-#     ecdf_approx = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-#     ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_bootstrap)))
-#     grid = np.linspace(0, 1, 101)
-#     print("ecdf", ecdf_boot(grid))
-#     plt.plot(grid, ecdf_approx(grid), c='red', marker='^')
-#     plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
-#     plt.plot(grid, grid, 'k--')
-#     plt.show()
-#     #plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n2000_p2000_amp3.5_sigma1.png")
-
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 100
-    bias = 0.
-    pivot_obs_info = []
-    coverage = 0.
-
-    for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=100, p=1000, s=5, signal=1.42, B=500)
-        if approx is not None:
-            pivot_boot = approx[3]
-            bias += approx[4]
-            coverage += approx[5]
-
-            for j in range(pivot_boot.shape[0]):
-                pivot_obs_info.append(pivot_boot[j])
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
-        sys.stderr.write("overall coverage" + str(coverage / float(i + 1)) + "\n")
-
-    # plt.clf()
-    # ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    # grid = np.linspace(0, 1, 101)
-    # print("ecdf", ecdf_boot(grid))
-    # plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
-    # plt.plot(grid, grid, 'k--')
-    # #plt.show()
-    # plt.savefig("/Users/snigdhapanigrahi/Desktop/Boot_pivot_n2000_p4000_amp3.5_rho_0.2_sigma1.png")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_MLE_univariate.py b/selection/adjusted_MLE/tests/test_MLE_univariate.py
deleted file mode 100644
index 8b05c28a7..000000000
--- a/selection/adjusted_MLE/tests/test_MLE_univariate.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from __future__ import print_function
-import numpy as np, sys
-
-import regreg.api as rr
-from selection.tests.instance import gaussian_instance
-from scipy.stats import norm as ndist
-from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from statsmodels.distributions.empirical_distribution import ECDF
-
-def boot_lasso(n=100, p=50, s=5, signal=5., B=1000, seed_n = 0, lam_frac=1., randomization_scale=1.):
-
-    X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.)
-    n, p = X.shape
-
-    lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-    loss = rr.glm.gaussian(X, y)
-    epsilon = 1. / np.sqrt(n)
-    W = np.ones(p) * lam
-    penalty = rr.group_lasso(np.arange(p),
-                             weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-    randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-    M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
-    active = M_est._overall
-    nactive = np.sum(active)
-    sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
-
-    true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-
-    if nactive > 0:
-        boot_sample = np.zeros((B, nactive))
-        for k in range(nactive):
-            M_est.solve_map_univariate_target(k)
-            approx_MLE, value, mle_map = solve_UMVU(M_est.target_transform,
-                                                    M_est.opt_transform,
-                                                    np.array([M_est.target_observed]),
-                                                    M_est.feasible_point,
-                                                    M_est.target_cov[k,k],
-                                                    M_est.randomizer_precision)
-
-            for b in range(B):
-                boot_indices = np.random.choice(n, n, replace=True)
-                boot_vector = (X[boot_indices, :]).T.dot(y[boot_indices])
-                target_boot = ((np.linalg.inv(X[:, active].T.dot(X[:, active]))).dot(boot_vector[active]))[j]
-                boot_sample[b,k] = (mle_map(target_boot))[0]
-
-            sys.stderr.write("iteration completed" + str(k) + "\n")
-
-        centered_boot_sample = boot_sample - boot_sample.mean(0)[None, :]
-        std_boot_sample = centered_boot_sample / (boot_sample.std(0)[None, :])
-
-        return std_boot_sample.reshape((B * nactive,))
-    else:
-        return None
-
-def approx_lasso(n=100, p=50, s=5, signal=5., seed_n = 0, lam_frac=1., randomization_scale=1.):
-
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0., signal=signal, sigma=1.,
-                                                       random_signs=False, equicorrelated=False)
-        n, p = X.shape
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale)
-
-        active = M_est._overall
-        nactive = np.sum(active)
-        sys.stderr.write("number of active selected by lasso" + str(nactive) + "\n")
-
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-        selective_MLE = np.zeros(nactive)
-        var_MLE = np.zeros(nactive)
-        if nactive > 0:
-            for k in range(nactive):
-                M_est.solve_map_univariate_target(k)
-                approx_MLE, value, var, mle_map = solve_UMVU(M_est.target_transform,
-                                                             M_est.opt_transform,
-                                                             M_est.target_observed[k]*np.identity(1).reshape((1,)),
-                                                             M_est.feasible_point,
-                                                             M_est.target_cov[k, k]*np.identity(1),
-                                                             M_est.randomizer_precision)
-
-                selective_MLE[k] = approx_MLE
-                var_MLE[k] = var
-            break
-
-    print("selective_MLE, approx_sd", selective_MLE, np.sqrt(var_MLE))
-    return np.true_divide((selective_MLE - true_target), np.sqrt(var_MLE)), (selective_MLE - true_target).sum()/float(nactive)
-
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 500
-    bias = 0.
-    pivot_obs_info= []
-    for i in range(ndraw):
-        approx = approx_lasso(n=300, p=200, s=10, signal=3.5)
-        if approx is not None:
-            pivot = approx[0]
-            bias += approx[1]
-            for j in range(pivot.shape[0]):
-                pivot_obs_info.append(pivot[j])
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i+1)) + "\n")
-
-    #if i % 10 == 0:
-    plt.clf()
-    ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    grid = np.linspace(0, 1, 101)
-    print("ecdf", ecdf(grid))
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
-    #plt.savefig("/Users/snigdhapanigrahi/Desktop/approx_info_selective_MLE_lasso_p1000_n3000_amp_0_AR1_0.2.png")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_boot_selective.py b/selection/adjusted_MLE/tests/test_boot_selective.py
deleted file mode 100644
index 0659fbc82..000000000
--- a/selection/adjusted_MLE/tests/test_boot_selective.py
+++ /dev/null
@@ -1,125 +0,0 @@
-from __future__ import print_function
-import numpy as np, sys
-
-import regreg.api as rr
-from selection.tests.instance import gaussian_instance
-from scipy.stats import norm as ndist
-from selection.randomized.api import randomization
-from selection.adjusted_MLE.selective_MLE import M_estimator_map, solve_UMVU
-from statsmodels.distributions.empirical_distribution import ECDF
-import selection.constraints.affine as AC
-
-from rpy2.robjects.packages import importr
-from rpy2 import robjects
-from scipy.stats import t as tdist
-
-glmnet = importr('glmnet')
-import rpy2.robjects.numpy2ri
-
-rpy2.robjects.numpy2ri.activate()
-
-def glmnet_sigma(X, y):
-    robjects.r('''
-                glmnet_cv = function(X,y){
-                y = as.matrix(y)
-                X = as.matrix(X)
-
-                out = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE)
-                lam_minCV = out$lambda.min
-                return(lam_minCV)
-                }''')
-
-    try:
-        lambda_cv_R = robjects.globalenv['glmnet_cv']
-        n, p = X.shape
-        r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-        r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-
-        lam_minCV = lambda_cv_R(r_X, r_y)
-        return lam_minCV
-    except:
-        return 0.75 * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-
-def boot_pivot_approx_var(n=100, p=50, s=5, signal=5., B=1000, lam_frac=1., randomization_scale=1., sigma= 1.):
-
-    while True:
-        X, y, beta, nonzero, sigma = gaussian_instance(n=n, p=p, s=s, rho=0.2, signal=signal, sigma=sigma,
-                                                       random_signs=True, equicorrelated=False)
-        n, p = X.shape
-        sigma_est = np.std(y) / np.sqrt(2.)
-        lam = lam_frac * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) * sigma_est
-        #lam = glmnet_sigma(X, y)
-
-        loss = rr.glm.gaussian(X, y)
-        epsilon = 1./np.sqrt(n)
-        W = np.ones(p) * lam
-        penalty = rr.group_lasso(np.arange(p),
-                                 weights=dict(zip(np.arange(p), W)), lagrange=1.)
-
-        randomizer = randomization.isotropic_gaussian((p,), scale=randomization_scale)
-        M_est = M_estimator_map(loss, epsilon, penalty, randomizer, randomization_scale=randomization_scale, sigma=sigma_est)
-
-        M_est.solve_map()
-        active = M_est._overall
-
-        true_target = np.linalg.inv(X[:, active].T.dot(X[:, active])).dot(X[:, active].T).dot(X.dot(beta))
-        nactive = np.sum(active)
-        print("number of variables selected by LASSO", nactive)
-
-        if nactive > 0:
-            approx_MLE, var, mle_map, implied_cov, implied_mean, _ = solve_UMVU(M_est.target_transform,
-                                                                                M_est.opt_transform,
-                                                                                M_est.target_observed,
-                                                                                M_est.feasible_point,
-                                                                                M_est.target_cov,
-                                                                                M_est.randomizer_precision)
-
-            A = np.hstack([np.zeros((nactive, nactive)), -np.identity(nactive)])
-            b = np.zeros(nactive)
-            con = AC.constraints(A, b, covariance=implied_cov, mean= implied_mean)
-            sample = AC.sample_from_constraints(con, np.ones(2*nactive), ndraw=B, burnin=300)
-            boot_pivot = np.zeros((B, nactive))
-            boot_mle_vec = np.zeros((B, nactive))
-            for b in range(B):
-                boot_mle = mle_map((sample[b,:])[:nactive])
-                boot_pivot[b, :] = np.true_divide(boot_mle[0] - approx_MLE, np.sqrt(np.diag(boot_mle[1])))
-                boot_mle_vec[b, :] = boot_mle[0]
-            break
-
-    return boot_pivot.reshape((B*nactive,)), boot_pivot.mean(0).sum()/nactive, boot_pivot.std(0), \
-           np.true_divide(approx_MLE - true_target, boot_pivot.std(0)), np.true_divide(approx_MLE - true_target, boot_mle_vec.std(0)),\
-           (approx_MLE - true_target).sum() / float(nactive)
-
-
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 50
-    bias = 0.
-    pivot_obs_info = []
-    pivot_mle = []
-
-    for i in range(ndraw):
-        approx = boot_pivot_approx_var(n=2000, p=4000, s=20, signal=3.5, B=2000)
-        if approx is not None:
-            pivot_boot = approx[3]
-            mle_boot = approx[4]
-            bias += approx[5]
-
-            for j in range(pivot_boot.shape[0]):
-                pivot_obs_info.append(pivot_boot[j])
-                pivot_mle.append(mle_boot[j])
-
-        sys.stderr.write("iteration completed" + str(i) + "\n")
-        sys.stderr.write("overall_bias" + str(bias / float(i + 1)) + "\n")
-
-    plt.clf()
-    ecdf_boot = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-    ecdf_mle = ECDF(ndist.cdf(np.asarray(pivot_mle)))
-    grid = np.linspace(0, 1, 101)
-    #print("ecdf", ecdf_boot(grid))
-    plt.plot(grid, ecdf_boot(grid), c='blue', marker='^')
-    plt.plot(grid, ecdf_mle(grid), c='red', marker='^')
-    plt.plot(grid, grid, 'k--')
-    #plt.show()
-    plt.savefig("/Users/snigdhapanigrahi/Desktop/selective_Boot_pivot_n2000_p4000_amp3.5_rho_0.2_sigma1.png")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_simple_problem.py b/selection/adjusted_MLE/tests/test_simple_problem.py
deleted file mode 100644
index df48acae9..000000000
--- a/selection/adjusted_MLE/tests/test_simple_problem.py
+++ /dev/null
@@ -1,223 +0,0 @@
-from __future__ import print_function
-import numpy as np, sys
-
-from scipy.stats import norm as ndist
-from selection.adjusted_MLE.selective_MLE import solve_UMVU
-from selection.adjusted_MLE.tests.exact_MLE import grad_CGF, fisher_info
-from statsmodels.distributions.empirical_distribution import ECDF
-from selection.adjusted_MLE.tests.approx_MLE import approx_fisher_info
-
-def simple_problem(target_observed=2, n=1, threshold=2, randomization_scale=1., epsilon = 0.05):
-    """
-    Simple problem: randomizaiton of sd 1 and thresholded at 2 (default args)
-    """
-    target_observed = np.atleast_1d(target_observed)
-    target_transform = (-np.identity(n), np.zeros(n))
-    opt_transform = (np.identity(n)+ epsilon, np.ones(n) * threshold)
-    feasible_point = np.ones(n)
-    randomizer_precision = np.identity(n) / randomization_scale ** 2
-    target_cov = np.identity(n)
-
-    return solve_UMVU(target_transform,
-                      opt_transform,
-                      target_observed,
-                      feasible_point,
-                      target_cov,
-                      randomizer_precision)
-
-
-def sim_simple_problem(true_mean, threshold=2, randomization_scale=1., epsilon = 0.05):
-    while True:
-        Z, W = np.random.standard_normal(2)
-        Z += true_mean
-        W *= randomization_scale
-        if ((Z + W) - threshold)/(1.+epsilon)>0.:
-            return Z
-
-
-def check_unbiased(true_mean, threshold=2, randomization_scale=1., nsim=5000, epsilon = 0.05):
-    bias = 0
-    for _ in range(nsim):
-        Z = sim_simple_problem(true_mean, threshold, randomization_scale)
-        est = simple_problem(Z, threshold=threshold, randomization_scale=randomization_scale)[0]
-        bias += est - true_mean
-
-    return bias / nsim
-
-#print(check_unbiased(-1., threshold=2, randomization_scale=1., nsim=5000, epsilon = 0.05))
-
-def test_orthogonal_lasso(n=5):
-    Zval = np.random.normal(0, 1, n)
-    print("observed Z" + str(Zval) + "\n")
-    approx_MLE = simple_problem(Zval, threshold=2, randomization_scale=1.)[0]
-
-    approx_MLE2 = [simple_problem(z, threshold=2, randomization_scale=1.)[0] for z in Zval]
-    mu_seq = np.linspace(-6, 6, 2500)
-    grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
-
-    exact_MLE = []
-    for k in range(Zval.shape[0]):
-        mle = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
-        exact_MLE.append(mle)
-
-    return approx_MLE, np.asarray(exact_MLE), np.asarray(approx_MLE2)
-
-
-def bootstrap_simple(n= 100, B=100, true_mean=0., threshold=2.):
-
-    resid_matrix = np.identity(n) - np.ones((n,n)) / n
-    U, D, V = np.linalg.svd(resid_matrix)
-    U = U[:,:-1]
-
-    while True:
-        target_Z, omega = np.random.standard_normal(2)
-        target_Z += true_mean * np.sqrt(n)
-        if target_Z + omega > threshold:
-            Zval = U.dot(np.random.standard_normal(n-1))
-            Zval += target_Z * np.ones(n) / np.sqrt(n)
-            break
-
-    approx_MLE, value, mle_map = simple_problem(target_Z, n=1, threshold=2, randomization_scale=1.)
-
-    boot_sample = []
-    for b in range(B):
-        Zval_boot = np.sum(Zval[np.random.choice(n, n, replace=True)]) / np.sqrt(n)
-        boot_sample.append(mle_map(Zval_boot)[0])
-
-    print("approx_MLE", approx_MLE, np.std(boot_sample), true_mean)
-    return boot_sample, np.mean(boot_sample), np.std(boot_sample), \
-           np.squeeze((boot_sample - np.mean(boot_sample)) / np.std(boot_sample)), \
-           np.true_divide(approx_MLE - np.sqrt(n)*true_mean, np.std(boot_sample))
-
-def check_approx_fisher_simple(true_mean, threshold=2, randomization_scale=1., nsim=200):
-    diff = 0.
-    for _ in range(nsim):
-        Z = sim_simple_problem(true_mean, threshold, randomization_scale)
-        approx = simple_problem(Z, threshold=threshold, randomization_scale=randomization_scale)
-        approx_std = np.sqrt(np.diag(approx[2]))
-
-        exact_std = 1./np.sqrt(fisher_info(approx[0], randomization_scale = 1., threshold = 2))
-        diff += np.abs(exact_std-approx_std)
-        print("difference", np.abs(exact_std-approx_std))
-
-    print(diff/float(nsim))
-
-def pivot_approx_fisher_simple(n=100, true_mean = 0., threshold=2, epsilon = 0.2):
-
-    while True:
-        target_Z, omega = np.random.standard_normal(2)
-        target_Z += true_mean * np.sqrt(n)
-        if ((target_Z + omega) - threshold)/(1.+epsilon)>0.:
-            break
-
-    n1 =1
-    target_observed = np.atleast_1d(target_Z)
-    target_transform = (-np.identity(n1), np.zeros(n1))
-    #s = np.asscalar(np.sign(target_Z + omega))
-    opt_transform = ((np.identity(n1)+epsilon), np.ones(n1) * (threshold))
-    print("shapes", (np.ones(n1) * (threshold)).shape, (np.identity(n1)+epsilon).shape, np.identity(n1).shape,
-          np.zeros(n1).shape, target_observed.shape)
-    feasible_point = np.ones(n1)
-    randomization_scale = 1.
-    randomizer_precision = np.identity(n1) / randomization_scale ** 2
-    target_cov = np.identity(n1)
-    simple_var = 1./approx_fisher_info(target_observed, randomization_scale=1., threshold=2)
-
-    approx_MLE, var, mle_map, _, _ = solve_UMVU(target_transform,
-                                                opt_transform,
-                                                target_observed,
-                                                feasible_point,
-                                                target_cov,
-                                                randomizer_precision)
-
-    print("approx MLE", approx_MLE, np.sqrt(n)*true_mean, var)
-    print("diff", simple_var- var)
-    return np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(var)), approx_MLE - np.sqrt(n)*true_mean, \
-           np.squeeze((approx_MLE - np.sqrt(n)*true_mean)/np.sqrt(simple_var)), simple_var- var
-
-
-#test_matrices_simple(true_mean=2., threshold=2, epsilon=0.2)
-
-# if __name__ == "__main__":
-#     n = 1000
-#     Zval = np.random.normal(0, 1, n)
-#     sys.stderr.write("observed Z" + str(Zval) + "\n")
-#     MLE = simple_problem(Zval, n=n, threshold=2, randomization_scale=1.)[0]
-#     #print(MLE)
-#
-#     mu_seq = np.linspace(-6, 6, 200)
-#     grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
-#
-#     exact_MLE = []
-#     for k in range(Zval.shape[0]):
-#         mle = mu_seq[np.argmin(np.abs(grad_partition - Zval[k]))]
-#         exact_MLE.append(mle)
-#
-#     np.testing.assert_allclose(MLE, exact_MLE, rtol=2.0)
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     plt.clf()
-#     Zval = np.linspace(-5, 5, 51)
-#     MLE = np.array([simple_problem(z)[0] for z in Zval])
-#
-#     mu_seq = np.linspace(-6, 6, 200)
-#     grad_partition = np.array([grad_CGF(mu, randomization_scale=1., threshold=2) for mu in mu_seq])
-#
-#     plt.plot(Zval, MLE, label='+2')
-#     plt.plot(grad_partition, mu_seq, 'r--', label='MLE')
-#     plt.legend()
-#     plt.show()
-
-# if __name__ == "__main__":
-#     import matplotlib.pyplot as plt
-#
-#     ndraw = 200
-#     boot_pivot=[]
-#     for i in range(ndraw):
-#         boot_result = bootstrap_simple(n=300, B=5000, true_mean=0., threshold=2.)
-#         boot_pivot.append(boot_result[4])
-#
-#         print("boot sample", np.asarray(boot_pivot).shape, boot_pivot)
-#         ecdf = ECDF(ndist.cdf(np.asarray(boot_pivot)))
-#         grid = np.linspace(0, 1, 101)
-#
-#         if i % 10 == 0:
-#             plt.clf()
-#             print("ecdf", ecdf(grid))
-#             plt.plot(grid, ecdf(grid), c='red', marker='^')
-#             plt.plot([0,1],[0,1], 'k--')
-#             plt.savefig('bootstrap_simple.png')
-
-if __name__ == "__main__":
-    import matplotlib.pyplot as plt
-
-    ndraw = 500
-    pivot_obs_info=[]
-    bias = 0.
-    diff = 0.
-    for i in range(ndraw):
-        result = pivot_approx_fisher_simple(n=300, true_mean = -0.1, threshold=2)
-        pivot_obs_info.append(result[0])
-        diff += result[3]
-        bias += result[1]
-        sys.stderr.write("bias" + str(bias / float(i)) + "\n")
-
-        if i % 10 == 0:
-            plt.clf()
-            ecdf = ECDF(ndist.cdf(np.asarray(pivot_obs_info)))
-            grid = np.linspace(0, 1, 101)
-            print("ecdf", ecdf(grid))
-            plt.plot(grid, ecdf(grid), c='red', marker='^')
-            plt.plot([0,1],[0,1], 'k--')
-            plt.savefig('bootstrap_simple.png')
-
-    sys.stderr.write("overall_bias" + str(bias / float(ndraw)) + "\n")
-    sys.stderr.write("difference between variances" + str(diff / float(ndraw)) + "\n")
-
-    plt.clf()
-    plt.plot(grid, ecdf(grid), c='red', marker='^')
-    plt.plot([0,1],[0,1], 'k--')
-
-

From c85a736a7b96866182f0cf4cb0f6ed20a4efe3ed Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 19 Mar 2018 12:37:00 -0700
Subject: [PATCH 513/617] adding new test

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 118 ++++++++++++++++++
 .../tests/test_selective_MLE_high.py          |   6 +-
 2 files changed, 122 insertions(+), 2 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/test_risk_coverage.py

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
new file mode 100644
index 000000000..44dac8cce
--- /dev/null
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -0,0 +1,118 @@
+import numpy as np, sys
+from rpy2 import robjects
+import rpy2.robjects.numpy2ri
+rpy2.robjects.numpy2ri.activate()
+
+import selection.randomized.lasso as L; reload(L)
+from selection.randomized.lasso import highdim
+
+def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
+    robjects.r('''
+    library(bestsubset)
+    sim_xy = bestsubset::sim.xy
+    ''')
+
+    r_simulate = robjects.globalenv['sim_xy']
+    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
+    X = np.array(sim.rx2('x'))
+    y = np.array(sim.rx2('y'))
+    X_val = np.array(sim.rx2('xval'))
+    y_val = np.array(sim.rx2('yval'))
+    Sigma = np.array(sim.rx2('Sigma'))
+    beta = np.array(sim.rx2('beta'))
+    sigma = np.array(sim.rx2('sigma'))
+
+    return X, y, X_val, y_val, Sigma, beta, sigma
+
+def tuned_lasso(X, y, X_val,y_val):
+    robjects.r('''
+        tuned_lasso_estimator = function(X,Y,X.val,Y.val){
+        Y = as.matrix(Y)
+        X = as.matrix(X)
+        Y.val = as.vector(Y.val)
+        X.val = as.matrix(X.val)
+        rel.LASSO = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
+        LASSO = lasso(X,Y,intercept=FALSE,nlam=50)
+        beta.hat.rellasso = as.matrix(coef(rel.LASSO))
+        beta.hat.lasso = as.matrix(coef(LASSO))
+        min.lam = min(rel.LASSO$lambda)
+        max.lam = max(rel.LASSO$lambda)
+        lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
+        muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
+        muhat.val.lasso = as.matrix(predict(LASSO, X.val))
+        err.val.rellasso = colMeans((muhat.val.rellasso - Y.val)^2)
+        err.val.lasso = colMeans((muhat.val.lasso - Y.val)^2)
+        #print(err.val.rellasso)
+        opt_lam = ceiling(which.min(err.val.rellasso)/10)
+        lambda.tuned = lam.seq[opt_lam]
+        return(list(beta.hat.rellasso = beta.hat.rellasso[,which.min(err.val.rellasso)],
+        beta.hat.lasso = beta.hat.lasso[,which.min(err.val.lasso)],
+        lambda.tuned = lambda.tuned, lambda.seq = lam.seq))
+        }''')
+
+    r_lasso = robjects.globalenv['tuned_lasso_estimator']
+
+    n, p = X.shape
+    nval, _ = X_val.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+    r_X_val = robjects.r.matrix(X_val, nrow=nval, ncol=p)
+    r_y_val = robjects.r.matrix(y_val, nrow=nval, ncol=1)
+
+    tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
+    estimator_rellasso = np.array(tuned_est.rx2('beta.hat.rellasso'))
+    estimator_lasso = np.array(tuned_est.rx2('beta.hat.lasso'))
+    lam_tuned = np.array(tuned_est.rx2('lambda.tuned'))
+    lam_seq = np.array(tuned_est.rx2('lambda.seq'))
+    return estimator_rellasso, estimator_lasso, lam_tuned, lam_seq
+
+def relative_risk(est, truth, Sigma):
+
+    return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
+
+def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+                              randomizer_scale=np.sqrt(0.25), target = "selected",
+                              full_dispersion = True):
+
+    X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
+                                                    s=s, beta_type=beta_type, snr=snr)
+    rel_LASSO, est_LASSO, lam_tuned, lam_seq = tuned_lasso(X, y, X_val, y_val)
+    active_nonrand = (rel_LASSO != 0)
+    nactive_nonrand = active_nonrand.sum()
+    true_mean = X.dot(beta)
+
+    dispersion = None
+    if full_dispersion:
+        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y)))**2 / (n - p)
+
+    sigma_ = np.std(y)
+
+    _y = y
+    y = y - y.mean()
+    y_val = y_val - y_val.mean()
+
+    const = highdim.gaussian
+    lam_seq = sigma_* np.linspace(0.75, 2.75, num=100) * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+    err = np.zeros(100)
+    for k in range(100):
+        W = lam_seq[k]
+        conv = const(X,
+                     y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma_)
+        signs = conv.fit()
+        nonzero = signs != 0
+        estimate, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+
+        full_estimate = np.zeros(p)
+        full_estimate[nonzero] = estimate
+        err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+
+    lam = lam_seq[np.argmin(err)]
+    #sys.stderr.write("lambda from tuned relaxed LASSO" + str(lam_tuned) + "\n")
+    sys.stderr.write("lambda from randomized LASSO" + str(lam) + "\n")
+
+
+comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+                          randomizer_scale=np.sqrt(0.25), target = "selected", full_dispersion = True)
+
diff --git a/selection/randomized/tests/test_selective_MLE_high.py b/selection/randomized/tests/test_selective_MLE_high.py
index d912675de..233875ec8 100644
--- a/selection/randomized/tests/test_selective_MLE_high.py
+++ b/selection/randomized/tests/test_selective_MLE_high.py
@@ -2,7 +2,7 @@
 import nose.tools as nt
 import rpy2.robjects as rpy
 from rpy2.robjects import numpy2ri
-rpy.r('library(selectiveInference)')
+#rpy.r('library(selectiveInference)')
 
 import selection.randomized.lasso as L; reload(L)
 from selection.randomized.lasso import highdim 
@@ -87,6 +87,8 @@ def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4,
     coverage = (beta_target > intervals[:,0]) * (beta_target < intervals[:,1])
     return pval[beta_target == 0], pval[beta_target != 0], coverage
 
+print(test_selected_targets())
+
 def main(nsim=500, full=True, full_dispersion=False):
 
     P0, PA, cover = [], [], []
@@ -116,5 +118,5 @@ def main(nsim=500, full=True, full_dispersion=False):
             plt.savefig("plot.pdf")
     plt.show()
 
-main()
+#main()
 

From eacf8c83b49b24001b609ed609c9e49b10b6aa4e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 20 Mar 2018 12:38:33 -0700
Subject: [PATCH 514/617] updated test

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 30 ++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 44dac8cce..a130de660 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -81,16 +81,22 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     nactive_nonrand = active_nonrand.sum()
     true_mean = X.dot(beta)
 
-    dispersion = None
-    if full_dispersion:
-        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y)))**2 / (n - p)
+    X -= X.mean(0)[None, :]
+    X /= (X.std(0)[None, :] * np.sqrt(n))
+    X_val -= X_val.mean(0)[None, :]
+    X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
     sigma_ = np.std(y)
+    print("naive estimate of sigma_", sigma_)
 
     _y = y
     y = y - y.mean()
     y_val = y_val - y_val.mean()
 
+    dispersion = None
+    if full_dispersion:
+        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+
     const = highdim.gaussian
     lam_seq = sigma_* np.linspace(0.75, 2.75, num=100) * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
     err = np.zeros(100)
@@ -109,9 +115,25 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
     lam = lam_seq[np.argmin(err)]
-    #sys.stderr.write("lambda from tuned relaxed LASSO" + str(lam_tuned) + "\n")
+    sys.stderr.write("lambda from tuned relaxed LASSO" + str(sigma_*lam_tuned) + "\n")
     sys.stderr.write("lambda from randomized LASSO" + str(lam) + "\n")
 
+    randomized_lasso = const(X,
+                             y,
+                             lam,
+                             randomizer_scale=randomizer_scale * sigma_)
+
+    signs = randomized_lasso.fit()
+    nonzero = signs != 0
+
+    print("nonzero", nonzero.sum())
+    sel_MLE = np.zeros(p)
+    estimate, _, _, pval, intervals = randomized_lasso.selective_MLE(target=target, dispersion=dispersion)
+    sel_MLE[nonzero] = estimate / np.sqrt(n)
+
+    sys.stderr.write("overall_selrisk" + str(relative_risk(rel_LASSO, beta, Sigma)) + "\n")
+    sys.stderr.write("overall_relLASSOrisk" + str(relative_risk(sel_MLE, beta, Sigma)) + "\n")
+
 
 comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
                           randomizer_scale=np.sqrt(0.25), target = "selected", full_dispersion = True)

From 7b4dedcff1efe3488262c11e46c8cbf7179d0d37 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 20 Mar 2018 13:14:50 -0700
Subject: [PATCH 515/617] added a return of indep est to the func.
 selective_MLE

---
 .../adjusted_MLE/tests/test_risk_coverage.py   | 18 +++++++++++-------
 selection/randomized/query.py                  |  7 ++++---
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index a130de660..bcb591eaa 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -86,9 +86,6 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     X_val -= X_val.mean(0)[None, :]
     X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
-    sigma_ = np.std(y)
-    print("naive estimate of sigma_", sigma_)
-
     _y = y
     y = y - y.mean()
     y_val = y_val - y_val.mean()
@@ -97,6 +94,9 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     if full_dispersion:
         dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
 
+    sigma_ = np.std(y)
+    print("naive estimate of sigma_", sigma_)
+
     const = highdim.gaussian
     lam_seq = sigma_* np.linspace(0.75, 2.75, num=100) * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
     err = np.zeros(100)
@@ -108,7 +108,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
                      randomizer_scale=randomizer_scale * sigma_)
         signs = conv.fit()
         nonzero = signs != 0
-        estimate, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+        estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
 
         full_estimate = np.zeros(p)
         full_estimate[nonzero] = estimate
@@ -128,11 +128,15 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
     print("nonzero", nonzero.sum())
     sel_MLE = np.zeros(p)
-    estimate, _, _, pval, intervals = randomized_lasso.selective_MLE(target=target, dispersion=dispersion)
+    estimate, _, _, pval, intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target, dispersion=dispersion)
     sel_MLE[nonzero] = estimate / np.sqrt(n)
+    ind_estimator = np.zeros(p)
+    ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
 
-    sys.stderr.write("overall_selrisk" + str(relative_risk(rel_LASSO, beta, Sigma)) + "\n")
-    sys.stderr.write("overall_relLASSOrisk" + str(relative_risk(sel_MLE, beta, Sigma)) + "\n")
+    sys.stderr.write("selMLE risk" + str(relative_risk(sel_MLE, beta, Sigma)) + "\n")
+    sys.stderr.write("indep est risk" + str(relative_risk(ind_estimator, beta, Sigma)) + "\n")
+    sys.stderr.write("relLASSO risk" + str(relative_risk(rel_LASSO, beta, Sigma)) + "\n")
+    sys.stderr.write("LASSO risk" + str(relative_risk(est_LASSO, beta, Sigma)) + "\n")
 
 
 comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 92801be46..393229964 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -494,14 +494,15 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
 
         conjugate_arg = prec_opt.dot(self.affine_con.mean)
 
-        feasible_point = np.ones(prec_opt.shape[0])
+        #feasible_point = np.ones(prec_opt.shape[0])
         val, soln, hess = solve_barrier_nonneg(conjugate_arg,
                                                prec_opt,
                                                feasible_point,
                                                **solve_args)
 
         final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(self.affine_con.mean - soln)))
-
+        ind_unbiased_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(self.affine_con.mean
+                                                                                                - feasible_point)))
         L = target_lin.T.dot(prec_opt)
         observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
         observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
@@ -513,7 +514,7 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
         quantile = ndist.ppf(1 - alpha / 2.)
         intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
                                final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
-        return final_estimator, observed_info_mean, Z_scores, pvalues, intervals
+        return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator
 
 class optimization_intervals(object):
 

From 5f23a90afbc0482b34f1dfa672c31fce4506be03 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 20 Mar 2018 13:29:27 -0700
Subject: [PATCH 516/617] finished adding risks

---
 selection/adjusted_MLE/tests/test_risk_coverage.py | 3 ++-
 selection/randomized/query.py                      | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index bcb591eaa..88f759e6b 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -95,7 +95,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
 
     sigma_ = np.std(y)
-    print("naive estimate of sigma_", sigma_)
+    print("naive estimate of sigma", sigma_)
 
     const = highdim.gaussian
     lam_seq = sigma_* np.linspace(0.75, 2.75, num=100) * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
@@ -135,6 +135,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
     sys.stderr.write("selMLE risk" + str(relative_risk(sel_MLE, beta, Sigma)) + "\n")
     sys.stderr.write("indep est risk" + str(relative_risk(ind_estimator, beta, Sigma)) + "\n")
+    sys.stderr.write("randomized LASSO est risk" + str(relative_risk(randomized_lasso.initial_soln/np.sqrt(n), beta, Sigma)) + "\n")
     sys.stderr.write("relLASSO risk" + str(relative_risk(rel_LASSO, beta, Sigma)) + "\n")
     sys.stderr.write("LASSO risk" + str(relative_risk(est_LASSO, beta, Sigma)) + "\n")
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 393229964..b2e65aa42 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -494,10 +494,10 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
 
         conjugate_arg = prec_opt.dot(self.affine_con.mean)
 
-        #feasible_point = np.ones(prec_opt.shape[0])
+        init_soln = np.ones(prec_opt.shape[0])
         val, soln, hess = solve_barrier_nonneg(conjugate_arg,
                                                prec_opt,
-                                               feasible_point,
+                                               init_soln,
                                                **solve_args)
 
         final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(self.affine_con.mean - soln)))

From 04b51c54a676f3d60ad8eb7a70c590eafd5efe5c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 21 Mar 2018 22:35:17 -0700
Subject: [PATCH 517/617] tuned lasso and lasso in python not having same solns

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 97 ++++++++++++++-----
 1 file changed, 74 insertions(+), 23 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 88f759e6b..1ac10ce42 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -4,7 +4,27 @@
 rpy2.robjects.numpy2ri.activate()
 
 import selection.randomized.lasso as L; reload(L)
-from selection.randomized.lasso import highdim
+from selection.randomized.lasso import lasso, highdim
+
+def glmnet_lasso(X, y, lambda_val):
+    robjects.r('''
+                glmnet_LASSO = function(X,y,lambda){
+                y = as.matrix(y)
+                X = as.matrix(X)
+                lam = as.matrix(lambda)[1,1]
+                n = nrow(X)
+                fit = glmnet(X, y, standardize=TRUE, intercept=FALSE)
+                estimate = coef(fit, s=lam)[-1]
+                return(list(estimate = estimate))
+                }''')
+
+    lambda_R = robjects.globalenv['glmnet_LASSO']
+    n, p = X.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+    r_lam = robjects.r.matrix(lambda_val, nrow=1, ncol=1)
+    estimate = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate'))
+    return estimate
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
     robjects.r('''
@@ -31,23 +51,34 @@ def tuned_lasso(X, y, X_val,y_val):
         X = as.matrix(X)
         Y.val = as.vector(Y.val)
         X.val = as.matrix(X.val)
-        rel.LASSO = lasso(X,Y,intercept=FALSE, nrelax=10, nlam=50)
-        LASSO = lasso(X,Y,intercept=FALSE,nlam=50)
+        rel.LASSO = lasso(X,Y,intercept=TRUE, nrelax=10, nlam=50, standardize=TRUE)
+        LASSO = lasso(X,Y,intercept=TRUE,nlam=50, standardize=TRUE)
         beta.hat.rellasso = as.matrix(coef(rel.LASSO))
         beta.hat.lasso = as.matrix(coef(LASSO))
         min.lam = min(rel.LASSO$lambda)
         max.lam = max(rel.LASSO$lambda)
+        print(paste("max and min values of lambda", max.lam, min.lam))
+
         lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
         muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
         muhat.val.lasso = as.matrix(predict(LASSO, X.val))
         err.val.rellasso = colMeans((muhat.val.rellasso - Y.val)^2)
         err.val.lasso = colMeans((muhat.val.lasso - Y.val)^2)
-        #print(err.val.rellasso)
+
         opt_lam = ceiling(which.min(err.val.rellasso)/10)
-        lambda.tuned = lam.seq[opt_lam]
-        return(list(beta.hat.rellasso = beta.hat.rellasso[,which.min(err.val.rellasso)],
-        beta.hat.lasso = beta.hat.lasso[,which.min(err.val.lasso)],
-        lambda.tuned = lambda.tuned, lambda.seq = lam.seq))
+        lambda.tuned.rellasso = lam.seq[opt_lam]
+        lambda.tuned.lasso = lam.seq[which.min(err.val.lasso)]
+
+        fit = glmnet(X, Y, standardize=TRUE, intercept=TRUE)
+        estimate.tuned = coef(fit, s=lambda.tuned.lasso)[-1]
+
+        print(paste("compare estimates", max(abs(estimate.tuned-(beta.hat.lasso[,which.min(err.val.lasso)])[-1])),
+        length(which(estimate.tuned!=0)), length(which((beta.hat.lasso[,which.min(err.val.lasso)])[-1]!=0))))
+
+        return(list(beta.hat.rellasso = (beta.hat.rellasso[,which.min(err.val.rellasso)])[-1],
+        beta.hat.lasso = (beta.hat.lasso[,which.min(err.val.lasso)])[-1],
+        lambda.tuned.rellasso = lambda.tuned.rellasso, lambda.tuned.lasso= lambda.tuned.lasso,
+        lambda.seq = lam.seq))
         }''')
 
     r_lasso = robjects.globalenv['tuned_lasso_estimator']
@@ -62,9 +93,10 @@ def tuned_lasso(X, y, X_val,y_val):
     tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
     estimator_rellasso = np.array(tuned_est.rx2('beta.hat.rellasso'))
     estimator_lasso = np.array(tuned_est.rx2('beta.hat.lasso'))
-    lam_tuned = np.array(tuned_est.rx2('lambda.tuned'))
+    lam_tuned_rellasso = np.array(tuned_est.rx2('lambda.tuned.rellasso'))
+    lam_tuned_lasso = np.array(tuned_est.rx2('lambda.tuned.lasso'))
     lam_seq = np.array(tuned_est.rx2('lambda.seq'))
-    return estimator_rellasso, estimator_lasso, lam_tuned, lam_seq
+    return estimator_rellasso, estimator_lasso, lam_tuned_rellasso, lam_tuned_lasso, lam_seq
 
 def relative_risk(est, truth, Sigma):
 
@@ -76,11 +108,12 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
     X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
                                                     s=s, beta_type=beta_type, snr=snr)
-    rel_LASSO, est_LASSO, lam_tuned, lam_seq = tuned_lasso(X, y, X_val, y_val)
-    active_nonrand = (rel_LASSO != 0)
+    rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
+    active_nonrand = (est_LASSO != 0)
     nactive_nonrand = active_nonrand.sum()
     true_mean = X.dot(beta)
 
+    _X = X
     X -= X.mean(0)[None, :]
     X /= (X.std(0)[None, :] * np.sqrt(n))
     X_val -= X_val.mean(0)[None, :]
@@ -95,10 +128,23 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
 
     sigma_ = np.std(y)
-    print("naive estimate of sigma", sigma_)
+    LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma**2.)*lam_tuned_lasso))
+    soln = LASSO_py.fit()
+    #print("compare solns", soln, est_LASSO)
+    active_LASSO = (soln != 0)
+    nactive_LASSO = active_LASSO.sum()
+
+    # LASSO_rand0 = highdim.gaussian(X,
+    #                                y,
+    #                                np.asscalar((sigma_**2)*lam_tuned_lasso),
+    #                                randomizer_scale=0.00000001)
+    # signs_rand0 = LASSO_rand0.fit()
+
+    #glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
 
     const = highdim.gaussian
-    lam_seq = sigma_* np.linspace(0.75, 2.75, num=100) * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+    lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
+              np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
     err = np.zeros(100)
     for k in range(100):
         W = lam_seq[k]
@@ -115,8 +161,8 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
     lam = lam_seq[np.argmin(err)]
-    sys.stderr.write("lambda from tuned relaxed LASSO" + str(sigma_*lam_tuned) + "\n")
-    sys.stderr.write("lambda from randomized LASSO" + str(lam) + "\n")
+    sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
+    sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
 
     randomized_lasso = const(X,
                              y,
@@ -125,21 +171,26 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
     signs = randomized_lasso.fit()
     nonzero = signs != 0
+    sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
+    sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO)+ "\n")
+    #sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO!=0).sum()) + "\n")
+    sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n")
 
-    print("nonzero", nonzero.sum())
     sel_MLE = np.zeros(p)
     estimate, _, _, pval, intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target, dispersion=dispersion)
     sel_MLE[nonzero] = estimate / np.sqrt(n)
     ind_estimator = np.zeros(p)
     ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
 
-    sys.stderr.write("selMLE risk" + str(relative_risk(sel_MLE, beta, Sigma)) + "\n")
-    sys.stderr.write("indep est risk" + str(relative_risk(ind_estimator, beta, Sigma)) + "\n")
-    sys.stderr.write("randomized LASSO est risk" + str(relative_risk(randomized_lasso.initial_soln/np.sqrt(n), beta, Sigma)) + "\n")
-    sys.stderr.write("relLASSO risk" + str(relative_risk(rel_LASSO, beta, Sigma)) + "\n")
-    sys.stderr.write("LASSO risk" + str(relative_risk(est_LASSO, beta, Sigma)) + "\n")
+    sys.stderr.write("selMLE risk " + str(relative_risk(sel_MLE, beta, Sigma)) + "\n")
+    sys.stderr.write("indep est risk " + str(relative_risk(ind_estimator, beta, Sigma)) + "\n")
+    sys.stderr.write("randomized LASSO est risk " + str(relative_risk(randomized_lasso.initial_soln/np.sqrt(n), beta, Sigma)) + "\n")
+    sys.stderr.write("relaxed rand LASSO est risk " + str(relative_risk(randomized_lasso._beta_full/np.sqrt(n), beta, Sigma))+ "\n"+"\n")
 
+    sys.stderr.write("relLASSO risk " + str(relative_risk(rel_LASSO, beta, Sigma)) + "\n")
+    sys.stderr.write("LASSO risk " + str(relative_risk(est_LASSO, beta, Sigma)) + "\n")
 
 comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
-                          randomizer_scale=np.sqrt(0.25), target = "selected", full_dispersion = True)
+                          randomizer_scale=np.sqrt(0.25), target = "selected",
+                          full_dispersion = True)
 

From 4ea51eb7cee3ff594de2b388317204324d350228 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 22 Mar 2018 00:20:49 -0700
Subject: [PATCH 518/617] comparison of risks

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 51 +++++++++++++++----
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 1ac10ce42..d96161a24 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -57,7 +57,7 @@ def tuned_lasso(X, y, X_val,y_val):
         beta.hat.lasso = as.matrix(coef(LASSO))
         min.lam = min(rel.LASSO$lambda)
         max.lam = max(rel.LASSO$lambda)
-        print(paste("max and min values of lambda", max.lam, min.lam))
+        #print(paste("max and min values of lambda", max.lam, min.lam))
 
         lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
         muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
@@ -72,7 +72,7 @@ def tuned_lasso(X, y, X_val,y_val):
         fit = glmnet(X, Y, standardize=TRUE, intercept=TRUE)
         estimate.tuned = coef(fit, s=lambda.tuned.lasso)[-1]
 
-        print(paste("compare estimates", max(abs(estimate.tuned-(beta.hat.lasso[,which.min(err.val.lasso)])[-1])),
+        #print(paste("compare estimates", max(abs(estimate.tuned-(beta.hat.lasso[,which.min(err.val.lasso)])[-1])),
         length(which(estimate.tuned!=0)), length(which((beta.hat.lasso[,which.min(err.val.lasso)])[-1]!=0))))
 
         return(list(beta.hat.rellasso = (beta.hat.rellasso[,which.min(err.val.rellasso)])[-1],
@@ -182,15 +182,44 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     ind_estimator = np.zeros(p)
     ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
 
-    sys.stderr.write("selMLE risk " + str(relative_risk(sel_MLE, beta, Sigma)) + "\n")
-    sys.stderr.write("indep est risk " + str(relative_risk(ind_estimator, beta, Sigma)) + "\n")
-    sys.stderr.write("randomized LASSO est risk " + str(relative_risk(randomized_lasso.initial_soln/np.sqrt(n), beta, Sigma)) + "\n")
-    sys.stderr.write("relaxed rand LASSO est risk " + str(relative_risk(randomized_lasso._beta_full/np.sqrt(n), beta, Sigma))+ "\n"+"\n")
+    return relative_risk(sel_MLE, beta, Sigma),\
+           relative_risk(ind_estimator, beta, Sigma),\
+           relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma),\
+           relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
+           relative_risk(rel_LASSO, beta, Sigma),\
+           relative_risk(est_LASSO, beta, Sigma)
 
-    sys.stderr.write("relLASSO risk " + str(relative_risk(rel_LASSO, beta, Sigma)) + "\n")
-    sys.stderr.write("LASSO risk " + str(relative_risk(est_LASSO, beta, Sigma)) + "\n")
+if __name__ == "__main__":
+
+    ndraw = 50
+    bias = 0.
+    risk_selMLE = 0.
+    risk_indest = 0.
+    risk_LASSO_rand = 0.
+    risk_relLASSO_rand = 0.
+
+    risk_relLASSO_nonrand = 0.
+    risk_LASSO_nonrand = 0.
+
+    for i in range(ndraw):
+        output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+                                           randomizer_scale=np.sqrt(0.25), target="selected", full_dispersion=True)
+
+        risk_selMLE += output[0]
+        risk_indest += output[1]
+        risk_LASSO_rand += output[2]
+        risk_relLASSO_rand += output[3]
+        risk_relLASSO_nonrand += output[4]
+        risk_LASSO_nonrand += output[5]
+
+        sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
+        sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
+        sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+        sys.stderr.write("overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n"+ "\n")
+
+        sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+        sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+
+        sys.stderr.write("iteration completed" + str(i+1) + "\n")
 
-comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
-                          randomizer_scale=np.sqrt(0.25), target = "selected",
-                          full_dispersion = True)
 

From 01bb21200d3be54451a78e1fb8bc0094622e3577 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 22 Mar 2018 00:39:24 -0700
Subject: [PATCH 519/617] removed redundant print

---
 selection/adjusted_MLE/tests/test_risk_coverage.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index d96161a24..189562a05 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -73,7 +73,7 @@ def tuned_lasso(X, y, X_val,y_val):
         estimate.tuned = coef(fit, s=lambda.tuned.lasso)[-1]
 
         #print(paste("compare estimates", max(abs(estimate.tuned-(beta.hat.lasso[,which.min(err.val.lasso)])[-1])),
-        length(which(estimate.tuned!=0)), length(which((beta.hat.lasso[,which.min(err.val.lasso)])[-1]!=0))))
+        #length(which(estimate.tuned!=0)), length(which((beta.hat.lasso[,which.min(err.val.lasso)])[-1]!=0))))
 
         return(list(beta.hat.rellasso = (beta.hat.rellasso[,which.min(err.val.rellasso)])[-1],
         beta.hat.lasso = (beta.hat.lasso[,which.min(err.val.lasso)])[-1],
@@ -202,7 +202,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     risk_LASSO_nonrand = 0.
 
     for i in range(ndraw):
-        output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+        output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=3, snr=0.2,
                                            randomizer_scale=np.sqrt(0.25), target="selected", full_dispersion=True)
 
         risk_selMLE += output[0]

From cb03e75855a0679129c9c684381cdc5211744c92 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 22 Mar 2018 15:18:33 -0700
Subject: [PATCH 520/617] commit changes in test

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 48 +++++++++++++++----
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 189562a05..e7db66fc7 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -4,7 +4,8 @@
 rpy2.robjects.numpy2ri.activate()
 
 import selection.randomized.lasso as L; reload(L)
-from selection.randomized.lasso import lasso, highdim
+from selection.randomized.lasso import highdim
+from selection.algorithms.lasso import lasso
 
 def glmnet_lasso(X, y, lambda_val):
     robjects.r('''
@@ -128,11 +129,15 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
 
     sigma_ = np.std(y)
-    LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma**2.)*lam_tuned_lasso))
+    LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.)*lam_tuned_lasso), np.asscalar(sigma_))
     soln = LASSO_py.fit()
     #print("compare solns", soln, est_LASSO)
     active_LASSO = (soln != 0)
     nactive_LASSO = active_LASSO.sum()
+    Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
+    Lee_intervals = np.zeros((nactive_LASSO,2))
+    Lee_intervals[:,0] = np.asarray(Lee['lower_confidence'])
+    Lee_intervals[:,1] = np.asarray(Lee['upper_confidence'])
 
     # LASSO_rand0 = highdim.gaussian(X,
     #                                y,
@@ -140,7 +145,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     #                                randomizer_scale=0.00000001)
     # signs_rand0 = LASSO_rand0.fit()
 
-    #glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
+    glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
 
     const = highdim.gaussian
     lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
@@ -161,7 +166,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
     lam = lam_seq[np.argmin(err)]
-    sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
+    sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma**2.)*lam_tuned_lasso) + "\n")
     sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
 
     randomized_lasso = const(X,
@@ -173,21 +178,37 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     nonzero = signs != 0
     sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
     sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO)+ "\n")
-    #sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO!=0).sum()) + "\n")
-    sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n")
+    sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO!=0).sum()) + "\n")
+    sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n"+"\n")
 
     sel_MLE = np.zeros(p)
-    estimate, _, _, pval, intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target, dispersion=dispersion)
+    estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
+                                                                                             dispersion=dispersion)
     sel_MLE[nonzero] = estimate / np.sqrt(n)
     ind_estimator = np.zeros(p)
     ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
 
+    if target == "selected":
+        beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
+        beta_target_nonrand = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
+
+    elif target == "full":
+        beta_target_rand = beta[nonzero]
+        beta_target_nonrand = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
+
+    coverage_selective = ((beta_target_rand > sel_intervals[:, 0])
+                          * (beta_target_rand < sel_intervals[:, 1])).sum()/float(nonzero.sum())
+    coverage_Lee = ((beta_target_nonrand > Lee_intervals[:, 0])
+                    *(beta_target_nonrand < Lee_intervals[:, 1])).sum()/float(nactive_LASSO)
+
     return relative_risk(sel_MLE, beta, Sigma),\
            relative_risk(ind_estimator, beta, Sigma),\
            relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma),\
            relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
            relative_risk(rel_LASSO, beta, Sigma),\
-           relative_risk(est_LASSO, beta, Sigma)
+           relative_risk(est_LASSO, beta, Sigma), \
+           coverage_selective, \
+           coverage_Lee
 
 if __name__ == "__main__":
 
@@ -201,8 +222,11 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     risk_relLASSO_nonrand = 0.
     risk_LASSO_nonrand = 0.
 
+    coverage_selMLE = 0.
+    coverage_Lee = 0.
+
     for i in range(ndraw):
-        output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=3, snr=0.2,
+        output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.25,
                                            randomizer_scale=np.sqrt(0.25), target="selected", full_dispersion=True)
 
         risk_selMLE += output[0]
@@ -212,6 +236,9 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         risk_relLASSO_nonrand += output[4]
         risk_LASSO_nonrand += output[5]
 
+        coverage_selMLE += output[6]
+        coverage_Lee += output[7]
+
         sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
         sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
@@ -220,6 +247,9 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
         sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
 
+        sys.stderr.write("overall selective coverage " + str(coverage_selMLE/ float(i + 1)) + "\n" )
+        sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n" + "\n")
+
         sys.stderr.write("iteration completed" + str(i+1) + "\n")
 
 

From 9f059251aa2ddac88b6b0218cbbef9a8f9e07f1c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 26 Mar 2018 18:28:55 -0700
Subject: [PATCH 521/617] reorganized test

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 235 ++++++++++--------
 1 file changed, 132 insertions(+), 103 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index e7db66fc7..147a56204 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -103,112 +103,126 @@ def relative_risk(est, truth, Sigma):
 
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
+def coverage(intervals, truth, npars):
+
+    return ((truth > intervals[:, 0])*(truth < intervals[:, 1])).sum() / float(npars)
+
 def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
                               randomizer_scale=np.sqrt(0.25), target = "selected",
                               full_dispersion = True):
 
-    X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
-                                                    s=s, beta_type=beta_type, snr=snr)
-    rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
-    active_nonrand = (est_LASSO != 0)
-    nactive_nonrand = active_nonrand.sum()
-    true_mean = X.dot(beta)
-
-    _X = X
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n))
-    X_val -= X_val.mean(0)[None, :]
-    X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
-
-    _y = y
-    y = y - y.mean()
-    y_val = y_val - y_val.mean()
-
-    dispersion = None
-    if full_dispersion:
-        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-
-    sigma_ = np.std(y)
-    LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.)*lam_tuned_lasso), np.asscalar(sigma_))
-    soln = LASSO_py.fit()
-    #print("compare solns", soln, est_LASSO)
-    active_LASSO = (soln != 0)
-    nactive_LASSO = active_LASSO.sum()
-    Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-    Lee_intervals = np.zeros((nactive_LASSO,2))
-    Lee_intervals[:,0] = np.asarray(Lee['lower_confidence'])
-    Lee_intervals[:,1] = np.asarray(Lee['upper_confidence'])
-
-    # LASSO_rand0 = highdim.gaussian(X,
-    #                                y,
-    #                                np.asscalar((sigma_**2)*lam_tuned_lasso),
-    #                                randomizer_scale=0.00000001)
-    # signs_rand0 = LASSO_rand0.fit()
-
-    glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
-
-    const = highdim.gaussian
-    lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
-              np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-    err = np.zeros(100)
-    for k in range(100):
-        W = lam_seq[k]
-        conv = const(X,
-                     y,
-                     W,
-                     randomizer_scale=randomizer_scale * sigma_)
-        signs = conv.fit()
+    while True:
+        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
+                                                        s=s, beta_type=beta_type, snr=snr)
+        rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
+        active_nonrand = (est_LASSO != 0)
+        nactive_nonrand = active_nonrand.sum()
+        true_mean = X.dot(beta)
+
+        _X = X
+        X -= X.mean(0)[None, :]
+        X /= (X.std(0)[None, :] * np.sqrt(n))
+        X_val -= X_val.mean(0)[None, :]
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
+
+        _y = y
+        y = y - y.mean()
+        y_val = y_val - y_val.mean()
+
+        dispersion = None
+        if full_dispersion:
+            dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+
+        sigma_ = np.std(y)
+        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_ ** 2.) * lam_tuned_lasso), np.asscalar(sigma_))
+        soln = LASSO_py.fit()
+        active_LASSO = (soln != 0)
+        nactive_LASSO = active_LASSO.sum()
+        glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
+
+        const = highdim.gaussian
+        lam_seq = (sigma_ **2.) * np.linspace(0.25, 2.75, num=100) * \
+                  np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+        err = np.zeros(100)
+        for k in range(100):
+            W = lam_seq[k]
+            conv = const(X,
+                         y,
+                         W,
+                         randomizer_scale=randomizer_scale * sigma_)
+            signs = conv.fit()
+            nonzero = signs != 0
+            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+
+            full_estimate = np.zeros(p)
+            full_estimate[nonzero] = estimate
+            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+
+        lam = lam_seq[np.argmin(err)]
+        sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma **2.)*lam_tuned_lasso) + "\n")
+        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
+
+        randomized_lasso = const(X,
+                                 y,
+                                 lam,
+                                 randomizer_scale=randomizer_scale * sigma_)
+
+        signs = randomized_lasso.fit()
         nonzero = signs != 0
-        estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
-
-        full_estimate = np.zeros(p)
-        full_estimate[nonzero] = estimate
-        err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
-
-    lam = lam_seq[np.argmin(err)]
-    sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma**2.)*lam_tuned_lasso) + "\n")
-    sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
-
-    randomized_lasso = const(X,
-                             y,
-                             lam,
-                             randomizer_scale=randomizer_scale * sigma_)
-
-    signs = randomized_lasso.fit()
-    nonzero = signs != 0
-    sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
-    sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO)+ "\n")
-    sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO!=0).sum()) + "\n")
-    sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n"+"\n")
-
-    sel_MLE = np.zeros(p)
-    estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
-                                                                                             dispersion=dispersion)
-    sel_MLE[nonzero] = estimate / np.sqrt(n)
-    ind_estimator = np.zeros(p)
-    ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
-
-    if target == "selected":
-        beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
-        beta_target_nonrand = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
-
-    elif target == "full":
-        beta_target_rand = beta[nonzero]
-        beta_target_nonrand = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
-
-    coverage_selective = ((beta_target_rand > sel_intervals[:, 0])
-                          * (beta_target_rand < sel_intervals[:, 1])).sum()/float(nonzero.sum())
-    coverage_Lee = ((beta_target_nonrand > Lee_intervals[:, 0])
-                    *(beta_target_nonrand < Lee_intervals[:, 1])).sum()/float(nactive_LASSO)
-
-    return relative_risk(sel_MLE, beta, Sigma),\
-           relative_risk(ind_estimator, beta, Sigma),\
-           relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma),\
-           relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
-           relative_risk(rel_LASSO, beta, Sigma),\
-           relative_risk(est_LASSO, beta, Sigma), \
-           coverage_selective, \
-           coverage_Lee
+        sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
+        sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
+        sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
+        sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
+
+        if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
+            Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
+            Lee_intervals = np.zeros((nactive_LASSO, 2))
+            Lee_intervals[:, 0] = np.asarray(Lee['lower_confidence'])
+            Lee_intervals[:, 1] = np.asarray(Lee['upper_confidence'])
+
+            sel_MLE = np.zeros(p)
+            estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
+                                                                                                         dispersion=dispersion)
+            sel_MLE[nonzero] = estimate / np.sqrt(n)
+            ind_estimator = np.zeros(p)
+            ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
+
+            if target == "selected":
+                beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
+                beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
+                beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
+
+                post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+                unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
+                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                            post_LASSO_OLS + 1.65 * unad_sd]).T
+
+            elif target == "full":
+                beta_target_rand = beta[nonzero]
+                beta_target_nonrand_py = beta[active_LASSO]
+                beta_target_nonrand = beta[active_nonrand]
+
+                post_LASSO_OLS = np.linalg.pinv(X)[active_nonrand].dot(y)
+                unad_sd = sigma_ * np.sqrt(
+                    np.diag((np.linalg.pinv(X)[active_nonrand].T.dot(np.linalg.pinv(X)[active_nonrand]))))
+                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                            post_LASSO_OLS + 1.65 * unad_sd]).T
+
+            break
+
+    if True:
+        return relative_risk(sel_MLE, beta, Sigma), \
+               relative_risk(ind_estimator, beta, Sigma), \
+               relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
+               relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
+               relative_risk(rel_LASSO, beta, Sigma), \
+               relative_risk(est_LASSO, beta, Sigma), \
+               coverage(sel_intervals, beta_target_rand, nonzero.sum()), \
+               coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO), \
+               coverage(unad_intervals, beta_target_nonrand, nactive_nonrand), \
+               (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
+               (Lee_intervals[:, 1] - Lee_intervals[:, 0]).sum() / float(nactive_LASSO), \
+               (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand)
 
 if __name__ == "__main__":
 
@@ -224,6 +238,11 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
     coverage_selMLE = 0.
     coverage_Lee = 0.
+    coverage_unad = 0.
+
+    length_sel = 0.
+    length_Lee = 0.
+    length_unad = 0.
 
     for i in range(ndraw):
         output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.25,
@@ -238,6 +257,11 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
         coverage_selMLE += output[6]
         coverage_Lee += output[7]
+        coverage_unad += output[8]
+
+        length_sel += output[9]
+        length_Lee += output[10]
+        length_unad += output[11]
 
         sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
@@ -248,8 +272,13 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
 
         sys.stderr.write("overall selective coverage " + str(coverage_selMLE/ float(i + 1)) + "\n" )
-        sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n" + "\n")
+        sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) +  "\n")
+        sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+
+        sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+        sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
+        sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
 
-        sys.stderr.write("iteration completed" + str(i+1) + "\n")
+        sys.stderr.write("iteration completed " + str(i+1) + "\n")
 
 

From c249aed7d42af33f6df67e24206d900a7e32a187 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 26 Mar 2018 22:33:55 -0700
Subject: [PATCH 522/617] commit changes

---
 selection/adjusted_MLE/tests/test_risk_coverage.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 147a56204..9efda3184 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -141,7 +141,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
 
         const = highdim.gaussian
-        lam_seq = (sigma_ **2.) * np.linspace(0.25, 2.75, num=100) * \
+        lam_seq = sigma_ * np.linspace(0.25, 2.75, num=100) * \
                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
         err = np.zeros(100)
         for k in range(100):
@@ -159,7 +159,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
             err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
-        sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma **2.)*lam_tuned_lasso) + "\n")
+        sys.stderr.write("lambda from tuned relaxed LASSO " + str(n*lam_tuned_lasso) + "\n")
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
 
         randomized_lasso = const(X,

From 1d267563e7d213141622b46553065b71bfeb30a5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 26 Mar 2018 23:36:27 -0700
Subject: [PATCH 523/617] added inferential powers

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 54 +++++++++++++++----
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 9efda3184..b23f212a9 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -103,9 +103,10 @@ def relative_risk(est, truth, Sigma):
 
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
-def coverage(intervals, truth, npars):
+def coverage(intervals, truth, npars, active_bool):
 
-    return ((truth > intervals[:, 0])*(truth < intervals[:, 1])).sum() / float(npars)
+    return ((truth > intervals[:, 0])*(truth < intervals[:, 1])).sum() / float(npars),\
+           ((active_bool)*(np.logical_or((0. < intervals[:, 0]),(0. > intervals[:,1])))).sum()/ float(npars)
 
 def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
                               randomizer_scale=np.sqrt(0.25), target = "selected",
@@ -159,7 +160,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
             err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
-        sys.stderr.write("lambda from tuned relaxed LASSO " + str(n*lam_tuned_lasso) + "\n")
+        sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
 
         randomized_lasso = const(X,
@@ -208,6 +209,26 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
                 unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
                                             post_LASSO_OLS + 1.65 * unad_sd]).T
 
+            true_signals = np.zeros(p, np.bool)
+            true_signals[beta != 0] = 1
+            true_set = np.asarray([u for u in range(p) if true_signals[u]])
+            active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+            active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
+            active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+
+            active_rand_bool = np.zeros(nonzero.sum(), np.bool)
+            for x in range(nonzero.sum()):
+                active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
+            active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
+            for w in range(nactive_nonrand):
+                active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
+            active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
+            for z in range(nactive_LASSO):
+                active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
+
+            cov_sel, power_sel = coverage(sel_intervals, beta_target_rand, nonzero.sum(), active_rand_bool)
+            cov_Lee, power_Lee = coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO,  active_LASSO_bool)
+            cov_unad, power_unad = coverage(unad_intervals, beta_target_nonrand, nactive_nonrand, active_nonrand_bool)
             break
 
     if True:
@@ -217,16 +238,19 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
                relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
                relative_risk(rel_LASSO, beta, Sigma), \
                relative_risk(est_LASSO, beta, Sigma), \
-               coverage(sel_intervals, beta_target_rand, nonzero.sum()), \
-               coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO), \
-               coverage(unad_intervals, beta_target_nonrand, nactive_nonrand), \
+               cov_sel,\
+               cov_Lee,\
+               cov_unad,\
                (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
                (Lee_intervals[:, 1] - Lee_intervals[:, 0]).sum() / float(nactive_LASSO), \
-               (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand)
+               (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
+               power_sel,  \
+               power_Lee, \
+               power_unad
 
 if __name__ == "__main__":
 
-    ndraw = 50
+    ndraw = 10
     bias = 0.
     risk_selMLE = 0.
     risk_indest = 0.
@@ -244,8 +268,12 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     length_Lee = 0.
     length_unad = 0.
 
+    power_sel = 0.
+    power_Lee = 0.
+    power_unad = 0.
+
     for i in range(ndraw):
-        output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.25,
+        output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=.25,
                                            randomizer_scale=np.sqrt(0.25), target="selected", full_dispersion=True)
 
         risk_selMLE += output[0]
@@ -263,6 +291,10 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         length_Lee += output[10]
         length_unad += output[11]
 
+        power_sel += output[12]
+        power_Lee += output[13]
+        power_unad += output[14]
+
         sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
         sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
@@ -279,6 +311,10 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
         sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
 
+        sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+        sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
+        sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+
         sys.stderr.write("iteration completed " + str(i+1) + "\n")
 
 

From 3fc57f922ae0ef86ecaf6e8e163fe4c79e6bb04e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 27 Mar 2018 00:40:09 -0700
Subject: [PATCH 524/617] correction in power

---
 selection/adjusted_MLE/tests/test_risk_coverage.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index b23f212a9..67316fbbd 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -106,7 +106,7 @@ def relative_risk(est, truth, Sigma):
 def coverage(intervals, truth, npars, active_bool):
 
     return ((truth > intervals[:, 0])*(truth < intervals[:, 1])).sum() / float(npars),\
-           ((active_bool)*(np.logical_or((0. < intervals[:, 0]),(0. > intervals[:,1])))).sum()/ float(npars)
+           ((active_bool)*(np.logical_or((0. < intervals[:, 0]),(0. > intervals[:,1])))).sum()
 
 def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
                               randomizer_scale=np.sqrt(0.25), target = "selected",
@@ -244,13 +244,13 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
                (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
                (Lee_intervals[:, 1] - Lee_intervals[:, 0]).sum() / float(nactive_LASSO), \
                (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
-               power_sel,  \
-               power_Lee, \
-               power_unad
+               power_sel/float((beta != 0).sum()),  \
+               power_Lee/float((beta != 0).sum()), \
+               power_unad/float((beta != 0).sum())
 
 if __name__ == "__main__":
 
-    ndraw = 10
+    ndraw = 50
     bias = 0.
     risk_selMLE = 0.
     risk_indest = 0.

From 60e25d00f030b7b378a191a361abc74adc4857f3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 27 Mar 2018 14:36:40 -0700
Subject: [PATCH 525/617] adding Liu et al to library

---
 selection/algorithms/lasso.py            | 612 +++++++++++++++++++----
 selection/algorithms/tests/test_lasso.py |  39 ++
 2 files changed, 561 insertions(+), 90 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index a80ea0403..9f220345b 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -26,7 +26,9 @@
                         weighted_l1norm, 
                         simple_problem,
                         coxph as coxph_obj,
-                        smooth_sum)
+                        smooth_sum,
+                        squared_error,
+                        identity_quadratic)
 
 from .sqrt_lasso import solve_sqrt_lasso, estimate_sigma
 
@@ -37,6 +39,7 @@
                                  stack)
 
 from ..distributions.discrete_family import discrete_family
+from ..truncated.gaussian import truncated_gaussian_old as TG
 from ..randomized.glm import pairs_bootstrap_glm
 
 class lasso(object):
@@ -58,7 +61,8 @@ class lasso(object):
     alpha = 0.05
     UMAU = False
 
-    def __init__(self, loglike, 
+    def __init__(self, 
+                 loglike, 
                  feature_weights,
                  covariance_estimator=None,
                  ignore_inactive_constraints=False):
@@ -258,6 +262,91 @@ def fit(self, lasso_solution=None, solve_args={'tol':1.e-12, 'min_its':50}):
             self._inactive_constraints = None
         return self.lasso_solution
 
+    def summary(self, alternative='twosided', alpha=0.05,
+                compute_intervals=False):
+        """
+        Summary table for inference adjusted for selection.
+
+        Parameters
+        ----------
+
+        alternative : str
+            One of ["twosided","onesided"]
+
+        alpha : float
+            Form (1-alpha)*100% selective confidence intervals.
+
+        compute_intervals : bool
+            Should we compute confidence intervals?
+
+        Returns
+        -------
+
+        pval_summary : np.recarray
+            Array with one entry per active variable.
+            Columns are 'variable', 'pval', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'.
+
+        """
+
+        if alternative not in ['twosided', 'onesided']:
+            raise ValueError("alternative must be one of ['twosided', 'onesided']")
+
+        result = []
+        C = self.constraints
+        if C is not None:
+            one_step = self.onestep_estimator
+            for i in range(one_step.shape[0]):
+                eta = np.zeros_like(one_step)
+                eta[i] = self.active_signs[i]
+                _alt = {"onesided":'greater',
+                        'twosided':"twosided"}[alternative]
+                if C.linear_part.shape[0] > 0: # there were some constraints
+                    _pval = C.pivot(eta, one_step, alternative=_alt)
+                else:
+                    obs = (eta * one_step).sum()
+                    sd = np.sqrt((eta * C.covariance.dot(eta)))
+                    Z = obs / sd
+                    _pval = 2 * ndist.sf(np.fabs(Z))
+
+                if compute_intervals:
+                    if C.linear_part.shape[0] > 0: # there were some constraints
+                        _interval = C.interval(eta, one_step,
+                                               alpha=alpha)
+                        _interval = sorted([_interval[0] * self.active_signs[i],
+                                            _interval[1] * self.active_signs[i]])
+                    else:
+                        _interval = (obs - ndist.ppf(1 - alpha / 2) * sd,
+                                     obs + ndist.ppf(1 - alpha / 2) * sd)
+                else:
+                    _interval = [np.nan, np.nan]
+                _bounds = np.array(C.bounds(eta, one_step))
+                sd = _bounds[-1]
+                lower_trunc, est, upper_trunc = sorted(_bounds[:3] * self.active_signs[i])
+
+                result.append((self.active[i],
+                               _pval,
+                               self.lasso_solution[self.active[i]],
+                               one_step[i],
+                               _interval[0],
+                               _interval[1],
+                               lower_trunc,
+                               upper_trunc,
+                               sd))
+                
+        df = pd.DataFrame(index=self.active,
+                          data=dict([(n, d) for n, d in zip(['variable',
+                                                             'pval', 
+                                                             'lasso', 
+                                                             'onestep', 
+                                                             'lower_confidence', 
+                                                             'upper_confidence',
+                                                             'lower_trunc',
+                                                             'upper_trunc',
+                                                             'sd'], 
+                                                            np.array(result).T)]))
+        df['variable'] = df['variable'].astype(int)
+        return df
+
     @property
     def soln(self):
         """
@@ -720,94 +809,6 @@ def sqrt_lasso(X,
 
         return L
 
-    def summary(self, alternative='twosided', alpha=0.05, UMAU=False,
-                compute_intervals=False):
-        """
-        Summary table for inference adjusted for selection.
-
-        Parameters
-        ----------
-
-        alternative : str
-            One of ["twosided","onesided"]
-
-        Returns
-        -------
-
-        pval_summary : np.recarray
-            Array with one entry per active variable.
-            Columns are 'variable', 'pval', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'.
-
-        alpha : float
-            Form (1-alpha)*100% selective confidence intervals.
-
-        UMAU : bool
-            If True, form the UMAU intervals (slow, perhaps less stable).
-
-        compute_intervals : bool
-            Should we compute confidence intervals?
-
-        """
-
-        if alternative not in ['twosided', 'onesided']:
-            raise ValueError("alternative must be one of ['twosided', 'onesided']")
-
-        result = []
-        C = self.constraints
-        if C is not None:
-            one_step = self.onestep_estimator
-            for i in range(one_step.shape[0]):
-                eta = np.zeros_like(one_step)
-                eta[i] = self.active_signs[i]
-                _alt = {"onesided":'greater',
-                        'twosided':"twosided"}[alternative]
-                if C.linear_part.shape[0] > 0: # there were some constraints
-                    _pval = C.pivot(eta, one_step, alternative=_alt)
-                else:
-                    obs = (eta * one_step).sum()
-                    sd = np.sqrt((eta * C.covariance.dot(eta)))
-                    Z = obs / sd
-                    _pval = 2 * ndist.sf(np.fabs(Z))
-
-                if compute_intervals:
-                    if C.linear_part.shape[0] > 0: # there were some constraints
-                        _interval = C.interval(eta, one_step,
-                                               alpha=alpha,
-                                               UMAU=UMAU)
-                        _interval = sorted([_interval[0] * self.active_signs[i],
-                                            _interval[1] * self.active_signs[i]])
-                    else:
-                        _interval = (obs - ndist.ppf(1 - alpha / 2) * sd,
-                                     obs + ndist.ppf(1 - alpha / 2) * sd)
-                else:
-                    _interval = [np.nan, np.nan]
-                _bounds = np.array(C.bounds(eta, one_step))
-                sd = _bounds[-1]
-                lower_trunc, est, upper_trunc = sorted(_bounds[:3] * self.active_signs[i])
-
-                result.append((self.active[i],
-                               _pval,
-                               self.lasso_solution[self.active[i]],
-                               one_step[i],
-                               _interval[0],
-                               _interval[1],
-                               lower_trunc,
-                               upper_trunc,
-                               sd))
-                
-        df = pd.DataFrame(index=self.active,
-                          data=dict([(n, d) for n, d in zip(['variable',
-                                                             'pval', 
-                                                             'lasso', 
-                                                             'onestep', 
-                                                             'lower_confidence', 
-                                                             'upper_confidence',
-                                                             'lower_trunc',
-                                                             'upper_trunc',
-                                                             'sd'], 
-                                                            np.array(result).T)]))
-        df['variable'] = df['variable'].astype(int)
-        return df
 
 
 def nominal_intervals(lasso_obj):
@@ -1837,3 +1838,434 @@ def additive_noise(X,
                pvalues,
                intervals), randomized_lasso
 
+## Liu, Markovic and Tibshirani method based on full model
+## conditioning only on the event j \in E for each active j
+
+# Liu, Markovic, Tibs selection
+# put this into library!
+
+def _solve_restricted_problem(Qbeta_bar, X, W, lagrange, initial=None):
+    p = Qbeta_bar.shape[0]
+
+    loss = squared_error(X * np.sqrt(W)[:, None], np.zeros(X.shape[0]))
+    loss.quadratic = identity_quadratic(0, 
+                                        0, 
+                                        -Qbeta_bar, 
+                                        0)
+    lagrange = np.asarray(lagrange)
+    if lagrange.shape in [(), (1,)]:
+        lagrange = np.ones(p) * lagrange
+    pen = weighted_l1norm(lagrange, lagrange=1.)
+    problem = simple_problem(loss, pen)
+    if initial is not None:
+        problem.coefs[:] = initial
+    soln = problem.solve(tol=1.e-12, min_its=30)
+    return soln
+
+def _truncation_interval(Qbeta_bar, X, W, Qi_jj, j, beta_barj, lagrange):
+    if lagrange[j] != 0:
+        lagrange_cp = lagrange.copy()
+    else:
+        return -np.inf, np.inf
+    lagrange_cp[j] = np.inf
+    restricted_soln = _solve_restricted_problem(Qbeta_bar, X, W, lagrange_cp)
+
+    p = Qbeta_bar.shape[0]
+    Ij = np.zeros(p)
+    Ij[j] = 1.
+    nuisance = Qbeta_bar - Ij / Qi_jj * beta_barj
+    
+    Qj = X.T.dot(X[:,j] * W)
+    center = nuisance[j] - Qj.dot(restricted_soln)
+    upper = (lagrange[j] - center) * Qi_jj
+    lower = (-lagrange[j] - center) * Qi_jj
+
+    if not (beta_barj < lower or beta_barj > upper):
+        warnings.warn("implied KKT constraint not satisfied")
+
+    return lower, upper
+
+class lasso_full(lasso):
+
+    r"""
+    A class for the LASSO for post-selection inference.
+    The problem solved is
+
+    .. math::
+
+        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
+            \lambda \|\beta\|_1
+
+    where $\lambda$ is `lam`.
+
+    """
+
+    # level for coverage is 1-alpha
+    alpha = 0.05
+
+    def __init__(self, 
+                 loglike, 
+                 feature_weights):
+        r"""
+
+        Create a new post-selection for the LASSO problem
+
+        Parameters
+        ----------
+
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is brodcast to all features.
+
+        """
+
+        self.loglike = loglike
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+    def fit(self, lasso_solution=None, solve_args={'tol':1.e-12, 'min_its':50}):
+        """
+        Fit the lasso using `regreg`.
+        This sets the attributes `soln`, `onestep` and
+        forms the constraints necessary for post-selection inference
+        by calling `form_constraints()`.
+
+        Parameters
+        ----------
+
+        lasso_solution : optional
+
+             If not None, this is taken to be the solution
+             of the optimization problem. No checks
+             are done, though the implied affine
+             constraints will generally not be satisfied.
+
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+
+        Returns
+        -------
+
+        soln : np.float
+             Solution to lasso.
+             
+        Notes
+        -----
+
+        If `self` already has an attribute `lasso_solution`
+        this will be taken to be the solution and 
+        no optimization problem will be solved. Supplying
+        the optional argument `lasso_solution` will
+        overwrite `self`'s `lasso_solution`.
+
+        """
+
+        self._penalty = weighted_l1norm(self.feature_weights, lagrange=1.)
+        if lasso_solution is None and not hasattr(self, "lasso_solution"):
+            problem = simple_problem(self.loglike, self._penalty)
+            self.lasso_solution = problem.solve(**solve_args)
+        elif lasso_solution is not None:
+            self.lasso_solution = lasso_solution
+
+        lasso_solution = self.lasso_solution # shorthand after setting it correctly above
+
+        if not np.all(lasso_solution == 0):
+
+            self.active = np.nonzero(lasso_solution != 0)[0]
+            self.inactive = lasso_solution == 0
+            self.active_signs = np.sign(lasso_solution[self.active])
+            self._active_soln = lasso_solution[self.active]
+
+            X, y = self.loglike.data # presuming GLM here
+            n, p = X.shape
+
+            W = self.loglike.saturated_loss.hessian(X.dot(lasso_solution))
+
+            # Needed for finding truncation intervals
+
+            self._Qbeta_bar = X.T.dot(W * X.dot(lasso_solution)) - self.loglike.smooth_objective(lasso_solution, 'grad')
+            self._W = W
+
+            if n > p:
+                Q = self.loglike.hessian(lasso_solution)
+                E = self.active
+                Qi = np.linalg.inv(Q)
+                self._QiE = Qi[E][:,E]
+                self._beta_barE = Qi[E].dot(self._Qbeta_bar)
+                one_step = self._beta_barE
+                self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X[:,self.active].dot(one_step)))**2 / self._W).sum() / (n - len(self.active)))
+            else:
+                raise NotImplementedError('debiased LASSO goes here')
+        else:
+            self.active = []
+            self.inactive = np.arange(lasso_solution.shape[0])
+        return self.lasso_solution
+
+    def summary(self, alpha=0.05,
+                compute_intervals=False):
+        """
+        Summary table for inference adjusted for selection.
+
+        Parameters
+        ----------
+
+        alpha : float
+            Form (1-alpha)*100% selective confidence intervals.
+
+        compute_intervals : bool
+            Should we compute confidence intervals?
+
+        Returns
+        -------
+
+        pval_summary : np.recarray
+            Array with one entry per active variable.
+            Columns are 'variable', 'pval', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'.
+
+        """
+
+        X, y = self.loglike.data
+        W, sigma = self._W, self._sigma
+        active_set, QiE, beta_barE, Qbeta_bar = self.active, self._QiE, self._beta_barE, self._Qbeta_bar
+
+        result = [] 
+
+        for j in range(len(active_set)):
+            idx = self.active[j]
+            lower, upper = _truncation_interval(Qbeta_bar, X, W, QiE[j,j], idx, beta_barE[j], self.feature_weights)
+
+            sd = sigma * np.sqrt(QiE[j,j])
+            tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
+            pvalue = tg.cdf(beta_barE[j])
+            pvalue = float(2 * min(pvalue, 1 - pvalue))
+
+            if compute_intervals:
+                l, u = tg.equal_tailed_interval(beta_barE[j], alpha=alpha)
+            else:
+                l, u = np.nan, np.nan
+
+            result.append((idx, pvalue, self.lasso_solution[idx], beta_barE[j], sd, l, u))
+
+        df = pd.DataFrame(index=self.active,
+                          data=dict([(n, d) for n, d in zip(['variable',
+                                                             'pval', 
+                                                             'lasso', 
+                                                             'onestep', 
+                                                             'lower_confidence', 
+                                                             'upper_confidence',
+                                                             'sd'], 
+                                                            np.array(result).T)]))
+        df['variable'] = df['variable'].astype(int)
+        return df
+
+    @property
+    def soln(self):
+        """
+        Solution to the lasso problem, set by `fit` method.
+        """
+        if not hasattr(self, "lasso_solution"):
+            self.fit()
+        return self.lasso_solution
+
+    @staticmethod
+    def gaussian(X, 
+                 Y, 
+                 feature_weights, 
+                 sigma=1., 
+                 covariance_estimator=None,
+                 quadratic=None):
+        r"""
+        Squared-error LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        sigma : float (optional)
+            Noise variance. Set to 1 if `covariance_estimator` is not None.
+            This scales the loglikelihood by `sigma**(-2)`.
+
+        covariance_estimator : callable (optional)
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.algorithms.lasso.lasso`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of some of the
+        rows and columns of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        if covariance_estimator is not None:
+            sigma = 1.
+        loglike = glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
+        return lasso_full(loglike, np.asarray(feature_weights) / sigma**2)
+
+    @staticmethod
+    def logistic(X, 
+                 successes, 
+                 feature_weights, 
+                 trials=None, 
+                 covariance_estimator=None,
+                 quadratic=None):
+        r"""
+        Logistic LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell$ is the negative of the logistic 
+        log-likelihood (half the logistic deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y. 
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.algorithms.lasso.lasso`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        loglike = glm.logistic(X, successes, trials=trials, quadratic=quadratic)
+        return lasso_full(loglike, feature_weights)
+
+    @staticmethod
+    def poisson(X, 
+                counts, 
+                feature_weights, 
+                covariance_estimator=None,
+                quadratic=None):
+        r"""
+        Poisson log-linear LASSO with feature weights.
+
+        Objective function is 
+        $$
+        \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+
+        where $\ell^{\text{Poisson}}$ is the negative
+        of the log of the Poisson likelihood (half the deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        counts : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized 
+            features are handled by setting those entries of 
+            `feature_weights` to 0. If `feature_weights` is 
+            a float, then all parameters are penalized equally.
+
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic 
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.algorithms.lasso.lasso`
+        
+        Notes
+        -----
+
+        If not None, `covariance_estimator` should 
+        take arguments (beta, active, inactive)
+        and return an estimate of the covariance of
+        $(\bar{\beta}_E, \nabla \ell(\bar{\beta}_E)_{-E})$,
+        the unpenalized estimator and the inactive
+        coordinates of the gradient of the likelihood at
+        the unpenalized estimator.
+
+        """
+        loglike = glm.poisson(X, counts, quadratic=quadratic)
+        return lasso_full(loglike, feature_weights)
diff --git a/selection/algorithms/tests/test_lasso.py b/selection/algorithms/tests/test_lasso.py
index e29a6cc23..17739a9eb 100644
--- a/selection/algorithms/tests/test_lasso.py
+++ b/selection/algorithms/tests/test_lasso.py
@@ -10,6 +10,7 @@
 import selection.tests.reports as reports
 
 from selection.algorithms.lasso import (lasso, 
+                                        lasso_full,
                                         data_carving, 
                                         data_splitting,
                                         split_model, 
@@ -754,6 +755,44 @@ def test_equivalence_sqrtlasso(n=200, p=400, s=10, sigma=3.):
     np.testing.assert_allclose(G1[3:], G2[3:])
     np.testing.assert_allclose(soln1, soln2)
     
+def test_gaussian_full(n=100, p=20):
+
+    y = np.random.standard_normal(n)
+    X = np.random.standard_normal((n,p))
+
+    lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0))
+    Q = rr.identity_quadratic(0.01, 0, np.ones(p), 0)
+
+    weights_with_zeros = 0.5*lam_theor * np.ones(p)
+    weights_with_zeros[:3] = 0.
+
+    L = lasso_full.gaussian(X, y, weights_with_zeros, 1., quadratic=Q)
+    L.fit()
+    print(L.summary(compute_intervals=True))
+
+def test_logistic_full():
+
+    for Y, T in [(np.random.binomial(1,0.5,size=(10,)),
+                  np.ones(10)),
+                 (np.random.binomial(1,0.5,size=(10,)),
+                  None),
+                 (np.random.binomial(3,0.5,size=(10,)),
+                  3*np.ones(10))]:
+        X = np.random.standard_normal((10,5))
+
+        L = lasso_full.logistic(X, Y, 0.1, trials=T)
+        L.fit()
+        L.summary(compute_intervals=True)
+
+def test_poisson_full():
+
+    X = np.random.standard_normal((10,5))
+    Y = np.random.poisson(10, size=(10,))
+
+    L = lasso_full.poisson(X, Y, 0.1)
+    L.fit()
+    L.summary(compute_intervals=True)
+
 def report(niter=50, **kwargs):
 
     # these are all our null tests

From dbc393d7a58155d5fbadbb8937de6a6b20efacb5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 29 Mar 2018 12:37:09 -0700
Subject: [PATCH 526/617] WIP: fixing Liu

---
 selection/algorithms/lasso.py                 | 13 ++--
 selection/algorithms/tests/test_lasso_full.py | 76 +++++++++++++++++++
 2 files changed, 84 insertions(+), 5 deletions(-)
 create mode 100644 selection/algorithms/tests/test_lasso_full.py

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 9f220345b..46051dace 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -1844,7 +1844,8 @@ def additive_noise(X,
 # Liu, Markovic, Tibs selection
 # put this into library!
 
-def _solve_restricted_problem(Qbeta_bar, X, W, lagrange, initial=None):
+def _solve_restricted_problem(Qbeta_bar, X, W, lagrange, initial=None,
+                              min_its=30, tol=1.e-12):
     p = Qbeta_bar.shape[0]
 
     loss = squared_error(X * np.sqrt(W)[:, None], np.zeros(X.shape[0]))
@@ -1859,7 +1860,7 @@ def _solve_restricted_problem(Qbeta_bar, X, W, lagrange, initial=None):
     problem = simple_problem(loss, pen)
     if initial is not None:
         problem.coefs[:] = initial
-    soln = problem.solve(tol=1.e-12, min_its=30)
+    soln = problem.solve(tol=tol, min_its=min_its)
     return soln
 
 def _truncation_interval(Qbeta_bar, X, W, Qi_jj, j, beta_barj, lagrange):
@@ -2048,16 +2049,18 @@ def summary(self, alpha=0.05,
             else:
                 l, u = np.nan, np.nan
 
-            result.append((idx, pvalue, self.lasso_solution[idx], beta_barE[j], sd, l, u))
+            result.append((idx, pvalue, self.lasso_solution[idx], beta_barE[j], sd, l, u, lower, upper))
 
         df = pd.DataFrame(index=self.active,
                           data=dict([(n, d) for n, d in zip(['variable',
                                                              'pval', 
                                                              'lasso', 
-                                                             'onestep', 
+                                                             'onestep',
+                                                             'sd',
                                                              'lower_confidence', 
                                                              'upper_confidence',
-                                                             'sd'], 
+                                                             'lower_truncation', 
+                                                             'upper_truncation'], 
                                                             np.array(result).T)]))
         df['variable'] = df['variable'].astype(int)
         return df
diff --git a/selection/algorithms/tests/test_lasso_full.py b/selection/algorithms/tests/test_lasso_full.py
new file mode 100644
index 000000000..f6cbe76f1
--- /dev/null
+++ b/selection/algorithms/tests/test_lasso_full.py
@@ -0,0 +1,76 @@
+from __future__ import print_function
+
+import numpy as np, regreg.api as rr
+
+from ...tests.instance import gaussian_instance
+
+from ..lasso import (lasso_full,
+                     _truncation_interval,
+                     _solve_restricted_problem)
+
+# earlier implmentation
+
+def solve_problem(Qbeta_bar, Q, lagrange, initial=None):
+    p = Qbeta_bar.shape[0]
+    loss = rr.quadratic_loss((p,), Q=Q, quadratic=rr.identity_quadratic(0, 
+                                                                        0, 
+                                                                        -Qbeta_bar, 
+                                                                        0))
+    lagrange = np.asarray(lagrange)
+    if lagrange.shape in [(), (1,)]:
+        lagrange = np.ones(p) * lagrange
+    pen = rr.weighted_l1norm(lagrange, lagrange=1.)
+    problem = rr.simple_problem(loss, pen)
+    if initial is not None:
+        problem.coefs[:] = initial
+    soln = problem.solve(tol=1.e12, min_its=100)
+    return soln
+
+def truncation_interval(Qbeta_bar, Q, Qi_jj, j, beta_barj, lagrange):
+    if lagrange[j] != 0:
+        lagrange_cp = lagrange.copy()
+    lagrange_cp[j] = np.inf
+    restricted_soln = solve_problem(Qbeta_bar, Q, lagrange_cp)
+
+    p = Qbeta_bar.shape[0]
+    I = np.identity(p)
+    nuisance = Qbeta_bar - I[:,j] / Qi_jj * beta_barj
+    
+    center = nuisance[j] - Q[j].dot(restricted_soln)
+    upper = (lagrange[j] + center) * Qi_jj
+    lower = (lagrange[j] - center) * Qi_jj
+
+    return lower, upper
+
+def test_agreement(n=200, p=100, s=4):
+
+    X, y, beta = gaussian_instance(n=n,
+                                   p=p,
+                                   s=s)[:3]
+
+    lagrange = 10. * np.ones(p)
+
+    LF = lasso_full.gaussian(X, y, lagrange)
+    LF.fit()
+
+    Q = X.T.dot(X)
+    Qbeta_bar = X.T.dot(y)
+    beta_hat = solve_problem(Qbeta_bar, Q, lagrange)
+    beta_hat2 = _solve_restricted_problem(Qbeta_bar, X, np.ones(X.shape[0]), 
+                                          lagrange, min_its=100)
+
+    Qi = np.linalg.inv(Q)
+    beta_bar = np.linalg.pinv(X).dot(y)
+    sigma = np.linalg.norm(y - X.dot(beta_bar)) / np.sqrt(n - p)
+
+    E = LF.active
+    QiE = Qi[E][:,E]
+    beta_barE = beta_bar[E]
+
+    S = LF.summary()
+
+    for i, j in enumerate(LF.active):
+        print(np.array(S['lower_truncation'])[i], np.array(S['upper_truncation'])[i]) 
+        lower, upper =  truncation_interval(Qbeta_bar, Q, QiE[i,i], j, beta_barE[i], lagrange)
+        print(lower, upper, 'old')
+    stop

From b6dbab5994fcb21bd43c2ab913b97168f51633bb Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 29 Mar 2018 12:59:14 -0700
Subject: [PATCH 527/617] BF: fixed Liu full lasso

---
 selection/algorithms/lasso.py                 |  9 +++++++--
 selection/algorithms/tests/test_lasso_full.py | 15 +++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 46051dace..e8ec0d8b0 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -1996,9 +1996,12 @@ def fit(self, lasso_solution=None, solve_args={'tol':1.e-12, 'min_its':50}):
                 E = self.active
                 Qi = np.linalg.inv(Q)
                 self._QiE = Qi[E][:,E]
-                self._beta_barE = Qi[E].dot(self._Qbeta_bar)
+                self._beta_bar = Qi.dot(self._Qbeta_bar)
+                self._beta_barE = self._beta_bar[E]
                 one_step = self._beta_barE
-                self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X[:,self.active].dot(one_step)))**2 / self._W).sum() / (n - len(self.active)))
+                self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X.dot(self._beta_bar)))**2 / self._W).sum() / (n - p))
+                
+                print(self._sigma, 'sigma')
             else:
                 raise NotImplementedError('debiased LASSO goes here')
         else:
@@ -2039,8 +2042,10 @@ def summary(self, alpha=0.05,
             idx = self.active[j]
             lower, upper = _truncation_interval(Qbeta_bar, X, W, QiE[j,j], idx, beta_barE[j], self.feature_weights)
 
+
             sd = sigma * np.sqrt(QiE[j,j])
             tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
+            print(sd, 'sd', j)
             pvalue = tg.cdf(beta_barE[j])
             pvalue = float(2 * min(pvalue, 1 - pvalue))
 
diff --git a/selection/algorithms/tests/test_lasso_full.py b/selection/algorithms/tests/test_lasso_full.py
index f6cbe76f1..2a14daf89 100644
--- a/selection/algorithms/tests/test_lasso_full.py
+++ b/selection/algorithms/tests/test_lasso_full.py
@@ -35,10 +35,10 @@ def truncation_interval(Qbeta_bar, Q, Qi_jj, j, beta_barj, lagrange):
     p = Qbeta_bar.shape[0]
     I = np.identity(p)
     nuisance = Qbeta_bar - I[:,j] / Qi_jj * beta_barj
-    
+
     center = nuisance[j] - Q[j].dot(restricted_soln)
-    upper = (lagrange[j] + center) * Qi_jj
-    lower = (lagrange[j] - center) * Qi_jj
+    upper = (lagrange[j] - center) * Qi_jj
+    lower = (-lagrange[j] - center) * Qi_jj
 
     return lower, upper
 
@@ -70,7 +70,10 @@ def test_agreement(n=200, p=100, s=4):
     S = LF.summary()
 
     for i, j in enumerate(LF.active):
-        print(np.array(S['lower_truncation'])[i], np.array(S['upper_truncation'])[i]) 
+        l, u = (np.array(S['lower_truncation'])[i], 
+                np.array(S['upper_truncation'])[i]) 
         lower, upper =  truncation_interval(Qbeta_bar, Q, QiE[i,i], j, beta_barE[i], lagrange)
-        print(lower, upper, 'old')
-    stop
+        np.testing.assert_allclose(l, lower)
+        np.testing.assert_allclose(u, upper)
+
+

From 504deb2212dd49655322f1ee5a35f5822b14f372 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 29 Mar 2018 13:02:27 -0700
Subject: [PATCH 528/617] removing print statements

---
 selection/algorithms/lasso.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index e8ec0d8b0..7e5560d46 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -2001,7 +2001,6 @@ def fit(self, lasso_solution=None, solve_args={'tol':1.e-12, 'min_its':50}):
                 one_step = self._beta_barE
                 self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X.dot(self._beta_bar)))**2 / self._W).sum() / (n - p))
                 
-                print(self._sigma, 'sigma')
             else:
                 raise NotImplementedError('debiased LASSO goes here')
         else:
@@ -2045,7 +2044,6 @@ def summary(self, alpha=0.05,
 
             sd = sigma * np.sqrt(QiE[j,j])
             tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
-            print(sd, 'sd', j)
             pvalue = tg.cdf(beta_barE[j])
             pvalue = float(2 * min(pvalue, 1 - pvalue))
 

From 13182c4f50ca0e79d4a94ad12cb9792013491ba7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 29 Mar 2018 13:23:08 -0700
Subject: [PATCH 529/617] WIP: debiased version of Liu (not tested fully)

---
 selection/algorithms/lasso.py | 30 ++++++++++++++++++++++++++----
 selection/randomized/lasso.py |  5 ++++-
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 7e5560d46..d3ae221f7 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -31,6 +31,7 @@
                         identity_quadratic)
 
 from .sqrt_lasso import solve_sqrt_lasso, estimate_sigma
+from .debiased_lasso import debiasing_matrix
 
 from ..constraints.affine import (constraints, selection_interval,
                                  interval_constraints,
@@ -1928,7 +1929,10 @@ def __init__(self,
             feature_weights = np.ones(loglike.shape) * feature_weights
         self.feature_weights = np.asarray(feature_weights)
 
-    def fit(self, lasso_solution=None, solve_args={'tol':1.e-12, 'min_its':50}):
+    def fit(self, 
+            lasso_solution=None, 
+            solve_args={'tol':1.e-12, 'min_its':50},
+            debiasing_args={}):
         """
         Fit the lasso using `regreg`.
         This sets the attributes `soln`, `onestep` and
@@ -1996,13 +2000,31 @@ def fit(self, lasso_solution=None, solve_args={'tol':1.e-12, 'min_its':50}):
                 E = self.active
                 Qi = np.linalg.inv(Q)
                 self._QiE = Qi[E][:,E]
-                self._beta_bar = Qi.dot(self._Qbeta_bar)
+                _beta_bar = Qi.dot(self._Qbeta_bar)
                 self._beta_barE = self._beta_bar[E]
                 one_step = self._beta_barE
-                self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X.dot(self._beta_bar)))**2 / self._W).sum() / (n - p))
+                self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X.dot(_beta_bar)))**2 / self._W).sum() / (n - p))
                 
             else:
-                raise NotImplementedError('debiased LASSO goes here')
+
+                X, y = self.loglike.data
+
+                # target is one-step estimator
+
+                G = self.loglike.smooth_objective(lasso_solution, 'grad')
+                Qinv_hat = np.atleast_2d(debiasing_matrix(
+                                             X * np.sqrt(self._W)[:, None], 
+                                             self.active,
+                                             **debiasing_args)) / n
+                observed_target = lasso_solution[self.active] - Qinv_hat.dot(G)
+                M1 = Qinv_hat.dot(X.T)
+                self._QiE = (M1 * self._W[None,:]).dot(M1.T)
+                Xfeat = X[:,self.active]
+                Qrelax = Xfeat.T.dot(self._W[:, None] * Xfeat)
+                relaxed_soln = lasso_solution[self.active] - np.linalg.inv(Qrelax).dot(G[self.active])
+                self._beta_barE = observed_target
+                self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / self._W).sum() / (n - len(self.active)))
+
         else:
             self.active = []
             self.inactive = np.arange(lasso_solution.shape[0])
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 8358f7b8b..8f0e7a3f8 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1786,7 +1786,10 @@ def full_targets(self, features=None, dispersion=None):
         alternatives = ['twosided'] * features.sum()
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
-    def debiased_targets(self, features=None, dispersion=None, **debiasing_args):
+    def debiased_targets(self, 
+                         features=None, 
+                         dispersion=None, 
+                         debiasing_args={}):
 
         if features is None:
             features = self._overall

From c621eb902720d73164c6893bd6efa739376ab48a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 29 Mar 2018 14:49:57 -0700
Subject: [PATCH 530/617] BF: missing ref

---
 selection/algorithms/lasso.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index d3ae221f7..344ce1385 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -2001,7 +2001,7 @@ def fit(self,
                 Qi = np.linalg.inv(Q)
                 self._QiE = Qi[E][:,E]
                 _beta_bar = Qi.dot(self._Qbeta_bar)
-                self._beta_barE = self._beta_bar[E]
+                self._beta_barE = _beta_bar[E]
                 one_step = self._beta_barE
                 self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X.dot(_beta_bar)))**2 / self._W).sum() / (n - p))
                 

From 44bd1174e46ac2f7812ccdf4c40d79cc3d3833ad Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 30 Mar 2018 12:04:28 -0700
Subject: [PATCH 531/617] WIP: some cleanup in randomized

---
 selection/randomized/base.py                  |  37 ----
 selection/randomized/estimation.py            | 198 ------------------
 .../{ => sandbox}/M_estimator_group_lasso.py  |   0
 .../{ => sandbox}/M_estimator_nonrandom.py    |   0
 selection/randomized/umvu.py                  |  94 ---------
 5 files changed, 329 deletions(-)
 delete mode 100644 selection/randomized/base.py
 delete mode 100644 selection/randomized/estimation.py
 rename selection/randomized/{ => sandbox}/M_estimator_group_lasso.py (100%)
 rename selection/randomized/{ => sandbox}/M_estimator_nonrandom.py (100%)
 delete mode 100644 selection/randomized/umvu.py

diff --git a/selection/randomized/base.py b/selection/randomized/base.py
deleted file mode 100644
index dc6db4230..000000000
--- a/selection/randomized/base.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import regreg.api as rr
-import regreg.affine as ra
-
-def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
-    """
-    Fit a restricted model using only columns `active`.
-
-    Parameters
-    ----------
-
-    Mest_loss : objective function
-        A GLM loss.
-
-    active : ndarray
-        Which columns to use.
-
-    solve_args : dict
-        Passed to `solve`.
-
-    Returns
-    -------
-
-    soln : ndarray
-        Solution to restricted problem.
-
-    """
-    X, Y = loss.data
-
-    if not loss._is_transform and hasattr(loss, 'saturated_loss'): # M_est is a glm
-        X_restricted = X[:,active]
-        loss_restricted = rr.affine_smooth(loss.saturated_loss, X_restricted)
-    else:
-        I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
-        loss_restricted = rr.affine_smooth(loss, I_restricted.T)
-    beta_E = loss_restricted.solve(**solve_args)
-    
-    return beta_E
diff --git a/selection/randomized/estimation.py b/selection/randomized/estimation.py
deleted file mode 100644
index bf61e147c..000000000
--- a/selection/randomized/estimation.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import numpy as np
-from scipy.optimize import minimize
-
-class estimation(object):
-
-    def __init__(self, X, y, active, betaE, cube, epsilon, lam, sigma, tau):
-
-        (self.X, self.y,
-         self.active,
-         self.betaE, self.cube,
-         self.epsilon,
-         self.lam,
-         self.sigma,
-         self.tau) = (X, y,
-                      active,
-                      betaE, cube,
-                      epsilon,
-                      lam,
-                      sigma,
-                      tau)
-
-        self.sigma_sq = self.sigma **2
-
-        self.signs = np.sign(self.betaE)
-        self.n, self.p = X.shape
-        self.nactive = np.sum(active)
-        self.ninactive = self.p-self.nactive
-        self.XE_pinv = np.linalg.pinv(self.X[:, self.active])
-
-        self.Sigma_inv = [np.array((self.p + 1, self.p + 1)) for _ in range(self.nactive)]
-        self.Sigma_full = [np.array((self.p + 1, self.p + 1)) for _ in range(self.nactive)]
-        self.Sigma_inv_mu = [np.zeros(self.p + 1) for _ in range(self.nactive)]
-
-        self.eta_norm_sq = np.zeros(self.nactive)
-        for j in range(self.nactive):
-            eta = self.XE_pinv[j, :]
-            self.eta_norm_sq[j] = np.linalg.norm(eta)**2
-
-        self.observed_vec = np.zeros(self.p+1)
-        self.observed_vec[1:] = np.concatenate((self.betaE, self.cube), axis=0)
-
-
-        self.mle = np.zeros(self.nactive)
-
-
-    def setup_joint_Gaussian_parameters(self, j):
-        """
-        Sigma_inv_mu computed for beta_{E,j}^*=0
-        """
-        eta = self.XE_pinv[j, :]
-
-        c = np.true_divide(eta, self.eta_norm_sq[j])
-        A = np.zeros((self.p, self.p + 1))
-        A[:, 0] = -np.dot(self.X.T, c)
-        A[:, 1:(self.nactive + 1)] = np.dot(self.X.T, self.X[:, self.active])
-        A[:self.nactive, 1:(self.nactive + 1)] += self.epsilon * np.identity(self.nactive)
-        A[self.nactive:, (self.nactive + 1):] = self.lam * np.identity(self.ninactive)
-        fixed_part = np.dot(np.identity(self.n) - np.outer(c, eta), self.y)
-        gamma = -np.dot(self.X.T, fixed_part)
-        gamma[:self.nactive] += self.lam * self.signs
-
-        v = np.zeros(self.p + 1)
-        v[0] = 1
-        self.Sigma_inv[j] = (np.true_divide(np.dot(A.T, A), self.tau ** 2) +
-                             np.true_divide(np.outer(v, v), \
-                                 self.eta_norm_sq[j] * (self.sigma ** 2)))
-        self.Sigma_full[j] = np.linalg.inv(self.Sigma_inv[j])
-        self.Sigma_inv_mu[j] = np.true_divide(np.dot(A.T, gamma), self.tau ** 2)
-
-        return self.Sigma_inv[j], self.Sigma_inv_mu[j]
-
-    def log_selection_probability(self, param, j, method="barrier"):
-
-        # print 'param value', param
-        Sigma_inv_mu_modified = self.Sigma_inv_mu[j].copy()
-        Sigma_inv_mu_modified[0] += param / (self.eta_norm_sq[j] * (self.sigma ** 2))
-
-        initial_guess = np.zeros(self.p + 1)
-        initial_guess[1:(self.nactive + 1)] = self.betaE
-        initial_guess[(self.nactive + 1):] = np.random.uniform(-1, 1, self.ninactive)
-
-        bounds = ((None, None),)
-        for i in range(self.nactive):
-            if self.signs[i] < 0:
-                bounds += ((None, 0),)
-            else:
-                bounds += ((0, None),)
-            bounds += ((-1, 1),) * self.ninactive
-
-
-        def chernoff(x):
-            return np.inner(x, self.Sigma_inv[j].dot(x)) / 2 - np.inner(Sigma_inv_mu_modified, x)
-
-        def barrier(x):
-            # Ax\leq b
-            A = np.zeros((self.p+self.ninactive, 1 + self.p))
-            A[:self.nactive, 1:(self.nactive + 1)] = -np.diag(self.signs)
-            A[self.nactive:self.p, (self.nactive + 1):] = np.identity(self.ninactive)
-            A[self.p:, (self.nactive + 1):] = -np.identity(self.ninactive)
-            b = np.zeros(self.p+self.ninactive)
-            b[self.nactive:] = 1
-
-            if all(b - np.dot(A, x) >= np.power(10, -9)):
-                return np.sum(np.log(1 + np.true_divide(1, b - np.dot(A, x))))
-
-            return b.shape[0] * np.log(1 + 10 ** 9)
-
-        def objective(x):
-            return chernoff(x) + barrier(x)
-
-        if method == "barrier":
-            res = minimize(objective, x0=initial_guess)
-        else:
-            if method == "chernoff":
-                res = minimize(chernoff, x0=initial_guess, bounds=bounds)
-            else:
-                raise ValueError('wrong method')
-
-        mu = np.dot(self.Sigma_full[j], Sigma_inv_mu_modified)
-        return - np.true_divide(np.inner(mu, Sigma_inv_mu_modified), 2) - res.fun
-
-
-    def compute_mle(self, j):
-
-        observed_vector = self.observed_vec.copy()
-        observed_vector[0] = np.inner(self.XE_pinv[j, :], self.y)
-
-        def objective_mle(param):
-            Sigma_inv_mu_modified = self.Sigma_inv_mu[j].copy()
-            Sigma_inv_mu_modified[0] += param / (self.eta_norm_sq[j] * (self.sigma ** 2))
-            mu = np.dot(self.Sigma_full[j], Sigma_inv_mu_modified)
-            return -np.inner(observed_vector, Sigma_inv_mu_modified) + \
-                np.true_divide(np.inner(mu, Sigma_inv_mu_modified), 2) + \
-                self.log_selection_probability(param, j)
-
-        initial_guess_mle = 0
-        res_mle = minimize(objective_mle, x0=initial_guess_mle)
-        self.mle[j] = res_mle.x
-        return self.mle[j]
-
-
-    def compute_mle_all(self):
-
-        for j in range(self.nactive):
-            self.setup_joint_Gaussian_parameters(j)
-            self.compute_mle(j)
-
-        return self.mle
-
-    def mse_mle(self, true_vec):
-        return (np.linalg.norm(self.mle-true_vec))**2
-
-
-class instance(object):
-
-    def __init__(self,
-                 n,
-                 p,
-                 s,
-                 snr=5,
-                 sigma=1.,
-                 rho=0,
-                 random_signs=True,
-                 scale=True,
-                 center=True):
-        (self.n,
-         self.p,
-         self.s,
-         self.snr,
-         self.sigma,
-         self.rho) = (n,
-                      p,
-                      s,
-                      snr,
-                      sigma,
-                      rho)
-
-        self.X = (np.sqrt(1 - self.rho) * np.random.standard_normal((self.n, self.p)) +
-                  np.sqrt(self.rho) * np.random.standard_normal(self.n)[:, None])
-        if center:
-            self.X -= self.X.mean(0)[None, :]
-        if scale:
-            self.X /= (self.X.std(0)[None, :] * np.sqrt(self.n))
-
-        self.beta = np.zeros(p)
-        self.beta[:self.s] = self.snr
-        if random_signs:
-            self.beta[:self.s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
-        self.active = np.zeros(p, np.bool)
-        self.active[:self.s] = True
-
-    def _noise(self):
-        return np.random.standard_normal(self.n)
-
-    def generate_response(self):
-
-        Y = (self.X.dot(self.beta) + self._noise()) * self.sigma
-        return self.X, Y, self.beta * self.sigma, np.nonzero(self.active)[0], self.sigma
diff --git a/selection/randomized/M_estimator_group_lasso.py b/selection/randomized/sandbox/M_estimator_group_lasso.py
similarity index 100%
rename from selection/randomized/M_estimator_group_lasso.py
rename to selection/randomized/sandbox/M_estimator_group_lasso.py
diff --git a/selection/randomized/M_estimator_nonrandom.py b/selection/randomized/sandbox/M_estimator_nonrandom.py
similarity index 100%
rename from selection/randomized/M_estimator_nonrandom.py
rename to selection/randomized/sandbox/M_estimator_nonrandom.py
diff --git a/selection/randomized/umvu.py b/selection/randomized/umvu.py
deleted file mode 100644
index 5137644b6..000000000
--- a/selection/randomized/umvu.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from __future__ import print_function
-import numpy as np
-from scipy.optimize import minimize
-
-from selection.randomized.estimation import estimation, instance
-
-class umvu(estimation):
-
-    def __init__(self, X, y, active, betaE, cube, epsilon, lam, sigma, tau):
-        estimation.__init__(self, X, y, active, betaE, cube, epsilon, lam, sigma, tau)
-        estimation.compute_mle_all(self)
-        self.unbiased = np.zeros(self.nactive)
-        self.umvu = np.zeros(self.nactive)
-
-    def log_selection_probability_umvu(self, mu, Sigma, method="barrier"):
-
-        Sigma_inv = np.linalg.inv(Sigma)
-        Sigma_inv_mu = np.dot(Sigma_inv, mu)
-
-        initial_guess = np.zeros(self.p)
-        initial_guess[:self.nactive] = self.betaE
-        initial_guess[self.nactive:] = np.random.uniform(-1, 1, self.ninactive)
-
-        bounds = ((None, None),)
-        for i in range(self.nactive):
-            if self.signs[i] < 0:
-                bounds += ((None, 0),)
-            else:
-                bounds += ((0, None),)
-            bounds += ((-1, 1),) * self.ninactive
-
-        def chernoff(x):
-            return np.inner(x, Sigma_inv.dot(x)) / 2 - np.inner(Sigma_inv_mu, x)
-
-        def barrier(x):
-            # Ax\leq b
-            A = np.zeros((self.p + self.ninactive, self.p))
-            A[:self.nactive,:self.nactive] = -np.diag(self.signs)
-            A[self.nactive:self.p, self.nactive:] = np.identity(self.ninactive)
-            A[self.p:, self.nactive:] = -np.identity(self.ninactive)
-            b = np.zeros(self.p + self.ninactive)
-            b[self.nactive:] = 1
-
-            if all(b - np.dot(A, x) >= np.power(10, -9)):
-                return np.sum(np.log(1 + np.true_divide(1, b - np.dot(A, x))))
-
-            return b.shape[0] * np.log(1 + 10 ** 9)
-
-        def objective(x):
-            return chernoff(x) + barrier(x)
-
-        if method == "barrier":
-            res = minimize(objective, x0=initial_guess)
-        else:
-            if method == "chernoff":
-                res = minimize(chernoff, x0=initial_guess, bounds=bounds)
-            else:
-                raise ValueError('wrong method')
-
-        return res.x
-
-
-    def compute_unbiased(self, j):
-
-        Sigma22_inv_Sigma21 = np.dot(np.linalg.inv(self.Sigma_full[j][1:, 1:]), self.Sigma_full[j][0, 1:])
-
-        schur = self.Sigma_full[j][0, 0] - np.inner(self.Sigma_full[j][0, 1:], Sigma22_inv_Sigma21)
-        c = np.true_divide(self.sigma_sq * self.eta_norm_sq[j], schur)
-        a = self.sigma_sq * self.eta_norm_sq[j] * self.Sigma_inv_mu[j][0]
-
-        observed_vector = self.observed_vec.copy()
-        observed_vector[0] = np.inner(self.XE_pinv[j, :], self.y)
-
-        self.unbiased[j] = c * (observed_vector[0] - np.inner(Sigma22_inv_Sigma21, observed_vector[1:])) - a
-
-        # starting umvu
-        Sigma_tilde = self.Sigma_full[j][1:, 1:]- np.true_divide(np.outer(self.Sigma_full[j][0, 1:], self.Sigma_full[j][0, 1:]), self.Sigma_full[j][0, 0])
-        mu_tilde = np.dot(Sigma_tilde.copy(), self.Sigma_inv_mu[j][1:])
-        mu_tilde += self.Sigma_full[j][0,1:]*observed_vector[0]/self.Sigma_full[j][0,0]
-        z_star = self.log_selection_probability_umvu(mu_tilde.copy(), Sigma_tilde.copy())
-
-        self.umvu[j] = c * (observed_vector[0] - np.inner(Sigma22_inv_Sigma21, z_star)) - a
-        return self.unbiased[j], self.umvu[j]
-
-
-    def compute_unbiased_all(self):
-        for j in range(self.nactive):
-            self.compute_unbiased(j)
-        return self.unbiased, self.umvu
-
-    def mse_unbiased(self, true_vec):
-        return (np.linalg.norm(self.unbiased-true_vec))**2, (np.linalg.norm(self.umvu-true_vec))**2
-
-

From ebc41b987cabf97af99e6be292a7b2aa5cfa322f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 30 Mar 2018 13:19:12 -0700
Subject: [PATCH 532/617] moving Mest test

---
 selection/randomized/tests/{ => sandbox}/test_Mest.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename selection/randomized/tests/{ => sandbox}/test_Mest.py (100%)

diff --git a/selection/randomized/tests/test_Mest.py b/selection/randomized/tests/sandbox/test_Mest.py
similarity index 100%
rename from selection/randomized/tests/test_Mest.py
rename to selection/randomized/tests/sandbox/test_Mest.py

From a4c97306302a4defeab53e83c0ad46cad48e8209 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 30 Mar 2018 13:19:45 -0700
Subject: [PATCH 533/617] WIP: cleaning up randomized

---
 selection/randomized/convenience.py | 8 +++-----
 selection/randomized/glm.py         | 3 +++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index bdb0897f7..2c5515f67 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -7,16 +7,14 @@
 import numpy as np
 import regreg.api as rr
 
-from .glm import (glm_group_lasso,
-                  glm_group_lasso_parametric,
-                  glm_greedy_step,
+from .glm import (glm_greedy_step,
                   glm_threshold_score,
-                  glm_nonparametric_bootstrap,
-                  glm_parametric_covariance,
                   pairs_bootstrap_glm)
 from .randomization import randomization
 from .query import multiple_queries
 
+from .lasso import highdim as lasso
+
 class step(lasso):
 
     r"""
diff --git a/selection/randomized/glm.py b/selection/randomized/glm.py
index 9a7cf95bc..e21bfdc91 100644
--- a/selection/randomized/glm.py
+++ b/selection/randomized/glm.py
@@ -9,6 +9,9 @@
 from .greedy_step import greedy_score_step
 from .threshold_score import threshold_score
 
+import regreg.api as rr
+import regreg.affine as ra
+
 def pairs_bootstrap_glm(glm_loss,
                         active, 
                         beta_full=None, 

From da24793451fdf1dd1f143c5ce8ea4888ddcb62b3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 30 Mar 2018 13:20:07 -0700
Subject: [PATCH 534/617] old base file

---
 selection/randomized/base.py | 37 ++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 selection/randomized/base.py

diff --git a/selection/randomized/base.py b/selection/randomized/base.py
new file mode 100644
index 000000000..dc6db4230
--- /dev/null
+++ b/selection/randomized/base.py
@@ -0,0 +1,37 @@
+import regreg.api as rr
+import regreg.affine as ra
+
+def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
+    """
+    Fit a restricted model using only columns `active`.
+
+    Parameters
+    ----------
+
+    Mest_loss : objective function
+        A GLM loss.
+
+    active : ndarray
+        Which columns to use.
+
+    solve_args : dict
+        Passed to `solve`.
+
+    Returns
+    -------
+
+    soln : ndarray
+        Solution to restricted problem.
+
+    """
+    X, Y = loss.data
+
+    if not loss._is_transform and hasattr(loss, 'saturated_loss'): # M_est is a glm
+        X_restricted = X[:,active]
+        loss_restricted = rr.affine_smooth(loss.saturated_loss, X_restricted)
+    else:
+        I_restricted = ra.selector(active, ra.astransform(X).input_shape[0], ra.identity((active.sum(),)))
+        loss_restricted = rr.affine_smooth(loss, I_restricted.T)
+    beta_E = loss_restricted.solve(**solve_args)
+    
+    return beta_E

From 8920d8d1fb282c80935f5aaca4d623436c6aafe6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 30 Mar 2018 13:20:19 -0700
Subject: [PATCH 535/617] other test

---
 selection/algorithms/tests/test_lasso_full.py | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 selection/algorithms/tests/test_lasso_full.py

diff --git a/selection/algorithms/tests/test_lasso_full.py b/selection/algorithms/tests/test_lasso_full.py
new file mode 100644
index 000000000..8f01eec28
--- /dev/null
+++ b/selection/algorithms/tests/test_lasso_full.py
@@ -0,0 +1,35 @@
+import numpy as np
+
+from ..lasso import lasso_full
+
+def solve_problem(Qbeta_bar, Q, lagrange, initial=None):
+    p = Qbeta_bar.shape[0]
+    loss = rr.quadratic_loss((p,), Q=Q, quadratic=rr.identity_quadratic(0, 
+                                                                        0, 
+                                                                        Qbeta_bar, 
+                                                                        0))
+    lagrange = np.asarray(lagrange)
+    if lagrange.shape in [(), (1,)]:
+        lagrange = np.ones(p) * lagrange
+    pen = rr.weighted_l1norm(lagrange, lagrange=1.)
+    problem = rr.simple_problem(loss, pen)
+    if initial is not None:
+        problem.coefs[:] = initial
+    soln = problem.solve(tol=1.e12, min_its=10)
+    return soln
+
+def truncation_interval(Qbeta_bar, Q, Qi_jj, j, beta_barj, lagrange):
+    if lagrange[j] != 0:
+        lagrange_cp = lagrange.copy()
+    lagrange_cp[j] = np.inf
+    restricted_soln = solve_problem(Qbeta_bar, Q, lagrange_cp)
+
+    p = Qbeta_bar.shape[0]
+    I = np.identity(p)
+    nuisance = Qbeta_bar - I[:,j] / Qi_jj * beta_barj
+    
+    center = nuisance[j] - Q[j].dot(restricted_soln)
+    upper = (lagrange[j] - center) * Qi_jj
+    lower = (lagrange[j] - center) * Qi_jj
+
+    return lower, upper

From 1cf17f6ac6acd427cb6eb861bbeb055e9bd6bce3 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 31 Mar 2018 16:47:18 -0700
Subject: [PATCH 536/617] coverage for debiased target is short of target

---
 .../adjusted_MLE/tests/test_risk_coverage.py  |  11 +-
 selection/randomized/lasso.py                 | 780 +++++++-----------
 2 files changed, 291 insertions(+), 500 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 67316fbbd..9f952b542 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -133,9 +133,11 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         dispersion = None
         if full_dispersion:
             dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+        else:
+            dispersion = np.std(y)
 
         sigma_ = np.std(y)
-        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_ ** 2.) * lam_tuned_lasso), np.asscalar(sigma_))
+        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
         soln = LASSO_py.fit()
         active_LASSO = (soln != 0)
         nactive_LASSO = active_LASSO.sum()
@@ -205,7 +207,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
                 post_LASSO_OLS = np.linalg.pinv(X)[active_nonrand].dot(y)
                 unad_sd = sigma_ * np.sqrt(
-                    np.diag((np.linalg.pinv(X)[active_nonrand].T.dot(np.linalg.pinv(X)[active_nonrand]))))
+                    np.diag((np.linalg.pinv(X)[active_nonrand].dot(np.linalg.pinv(X)[active_nonrand].T))))
                 unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
                                             post_LASSO_OLS + 1.65 * unad_sd]).T
 
@@ -273,8 +275,9 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     power_unad = 0.
 
     for i in range(ndraw):
-        output = comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=.25,
-                                           randomizer_scale=np.sqrt(0.25), target="selected", full_dispersion=True)
+        output = comparison_risk_inference(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2, snr=.30,
+                                           randomizer_scale=np.sqrt(0.25), target="selected",
+                                           full_dispersion=False)
 
         risk_selMLE += output[0]
         risk_indest += output[1]
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 8358f7b8b..010e5c2a8 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -5,13 +5,19 @@
 import numpy as np
 from scipy.stats import norm as ndist
 
+import functools
+from copy import copy
+
+import numpy as np
+from scipy.stats import norm as ndist
+
 import regreg.api as rr
 import regreg.affine as ra
 
 from ..constraints.affine import constraints
 from ..algorithms.sqrt_lasso import solve_sqrt_lasso, choose_lambda
 
-from .query import (query, 
+from .query import (query,
                     multiple_queries,
                     langevin_sampler,
                     affine_gaussian_sampler)
@@ -24,42 +30,33 @@
                   glm_parametric_covariance)
 from ..algorithms.debiased_lasso import debiasing_matrix
 
-class lasso_view(query):
 
-    def __init__(self, 
-                 loss, 
-                 epsilon, 
-                 penalty, 
-                 randomization, 
+class lasso_view(query):
+    def __init__(self,
+                 loss,
+                 epsilon,
+                 penalty,
+                 randomization,
                  perturb=None,
-                 solve_args={'min_its':50, 'tol':1.e-10}):
+                 solve_args={'min_its': 50, 'tol': 1.e-10}):
         """
         Fits the logistic regression to a candidate active set, without penalty.
         Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
-
-        Computes $\bar{\beta}_E$ which is the restricted 
+        Computes $\bar{\beta}_E$ which is the restricted
         M-estimator (i.e. subject to the constraint $\beta_{-E}=0$).
-
         Parameters:
         -----------
-
         active: np.bool
             The active set from fitting the logistic lasso
-
         solve_args: dict
             Arguments to be passed to regreg solver.
-
         Returns:
         --------
-
         None
-
         Notes:
         ------
-
         Sets self._beta_unpenalized which will be used in the covariance matrix calculation.
         Also computes Hessian of loss at restricted M-estimator as well as the bootstrap covariance.
-
         """
 
         query.__init__(self, randomization)
@@ -71,11 +68,11 @@ def __init__(self,
                                 epsilon,
                                 penalty,
                                 randomization)
-         
+
     # Methods needed for subclassing a query
 
     def solve(self, nboot=2000,
-              solve_args={'min_its':20, 'tol':1.e-10}, 
+              solve_args={'min_its': 20, 'tol': 1.e-10},
               perturb=None):
 
         self.randomize(perturb=perturb)
@@ -85,7 +82,7 @@ def solve(self, nboot=2000,
          epsilon,
          penalty,
          randomization) = (self.loss,
-                           self.randomized_loss, 
+                           self.randomized_loss,
                            self.epsilon,
                            self.penalty,
                            self.randomization)
@@ -121,15 +118,15 @@ def solve(self, nboot=2000,
         self._unpenalized = unpenalized
 
         _active_signs = active_signs.copy()
-        _active_signs[unpenalized] = np.nan # don't release sign of unpenalized variables
-        self.selection_variable = {'sign':_active_signs,
-                                   'variables':self._overall}
+        _active_signs[unpenalized] = np.nan  # don't release sign of unpenalized variables
+        self.selection_variable = {'sign': _active_signs,
+                                   'variables': self._overall}
 
         # initial state for opt variables
 
-        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') + 
-                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad')) 
-                          # the quadratic of a smooth_atom is not included in computing the smooth_objective
+        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
+                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad'))
+        # the quadratic of a smooth_atom is not included in computing the smooth_objective
         self.initial_subgrad = initial_subgrad
 
         initial_scalings = np.fabs(self.initial_soln[active])
@@ -200,9 +197,9 @@ def solve(self, nboot=2000,
         null_idx = np.arange(overall.sum(), p)
         inactive_idx = np.nonzero(inactive)[0]
         for _i, _n in zip(inactive_idx, null_idx):
-            _score_linear_term[_i,_n] = -1
+            _score_linear_term[_i, _n] = -1
 
-        # c_E piece 
+        # c_E piece
 
         def signed_basis_vector(p, j, s):
             v = np.zeros(p)
@@ -224,14 +221,14 @@ def signed_basis_vector(p, j, s):
         unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
         if unpenalized.sum():
             _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
-                                                      + epsilon * unpenalized_directions) 
+                                                      + epsilon * unpenalized_directions)
 
-        # subgrad piece
+            # subgrad piece
 
         subgrad_idx = range(active.sum() + unpenalized.sum(), active.sum() + inactive.sum() + unpenalized.sum())
         subgrad_slice = slice(active.sum() + unpenalized.sum(), active.sum() + inactive.sum() + unpenalized.sum())
         for _i, _s in zip(inactive_idx, subgrad_idx):
-            _opt_linear_term[_i,_s] = 1
+            _opt_linear_term[_i, _s] = 1
 
         # form affine part
 
@@ -240,7 +237,7 @@ def signed_basis_vector(p, j, s):
         _opt_affine_term[active] = active_signs[active] * self._lagrange[active]
 
         # two transforms that encode score and optimization
-        # variable roles 
+        # variable roles
 
         self.opt_transform = (_opt_linear_term, _opt_affine_term)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
@@ -273,18 +270,17 @@ def get_sampler(self):
             penalty, inactive = self.penalty, self._inactive
             inactive_lagrange = self.penalty.weights[inactive]
 
-            if not hasattr(self.randomization, "cov_prec"): # means randomization is not Gaussian
+            if not hasattr(self.randomization, "cov_prec"):  # means randomization is not Gaussian
 
                 dual = rr.weighted_supnorm(1. / inactive_lagrange, bound=1.)
 
                 def projection(dual, subgrad_slice, scaling_slice, opt_state):
                     """
                     Full projection for Langevin.
-
                     The state here will be only the state of the optimization variables.
                     """
 
-                    new_state = opt_state.copy() # not really necessary to copy
+                    new_state = opt_state.copy()  # not really necessary to copy
                     new_state[scaling_slice] = np.maximum(opt_state[scaling_slice], 0)
                     new_state[subgrad_slice] = dual.bound_prox(opt_state[subgrad_slice])
                     return new_state
@@ -347,6 +343,7 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                         mean_term = logdens_linear.dot(score.T + offset[:, None]).T
                     arg = opt + mean_term
                     return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
                 log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
 
                 # now make the constraints
@@ -377,7 +374,7 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                                         self.observed_score_state,
                                                         log_density,
                                                         logdens_transform,
-                                                        selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
+                                                        selection_info=self.selection_variable)  # should be signs and the subgradients we've conditioned on
 
         return self._sampler
 
@@ -386,7 +383,6 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
     def decompose_subgradient(self, condition=None, marginalize=None):
         """
         ADD DOCSTRING
-
         condition and marginalize should be disjoint
         """
 
@@ -446,20 +442,20 @@ def decompose_subgradient(self, condition=None, marginalize=None):
         new_offset[condition_inactive] += self.initial_subgrad[condition_inactive]
         new_opt_transform = (new_linear, new_offset)
 
-        if not hasattr(self.randomization, "cov_prec") or marginalize.sum(): # use Langevin -- not gaussian
+        if not hasattr(self.randomization, "cov_prec") or marginalize.sum():  # use Langevin -- not gaussian
 
             def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive):
                 return (np.divide(_pdf(full_state_plus) - _pdf(full_state_minus),
                                   _cdf(full_state_plus) - _cdf(full_state_minus)))[margin_inactive]
 
-            def new_grad_log_density(query, 
+            def new_grad_log_density(query,
                                      limits_marginal,
                                      margin_inactive,
                                      _cdf,
                                      _pdf,
                                      new_opt_transform,
                                      deriv_log_dens,
-                                     score_state, 
+                                     score_state,
                                      opt_state):
 
                 full_state = score_state + reconstruct_opt(new_opt_transform, opt_state)
@@ -467,7 +463,7 @@ def new_grad_log_density(query,
                 p = query.penalty.shape[0]
                 weights = np.zeros(p)
 
-                if margin_inactive.sum()>0:
+                if margin_inactive.sum() > 0:
                     full_state_plus = full_state + limits_marginal * margin_inactive
                     full_state_minus = full_state - limits_marginal * margin_inactive
                     weights[margin_inactive] = _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive)
@@ -483,7 +479,7 @@ def new_grad_log_density(query,
                                                      new_opt_transform,
                                                      self.randomization._derivative_log_density)
 
-            def new_log_density(query, 
+            def new_log_density(query,
                                 limits_marginal,
                                 margin_inactive,
                                 _cdf,
@@ -499,14 +495,15 @@ def new_log_density(query,
                 p = query.penalty.shape[0]
                 logdens = np.zeros(full_state.shape[0])
 
-                if margin_inactive.sum()>0:
+                if margin_inactive.sum() > 0:
                     full_state_plus = full_state + limits_marginal * margin_inactive
                     full_state_minus = full_state - limits_marginal * margin_inactive
-                    logdens += np.sum(np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:,margin_inactive], axis=1)
+                    logdens += np.sum(np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:, margin_inactive],
+                                      axis=1)
 
-                logdens += log_dens(full_state[:,~margin_inactive])
+                logdens += log_dens(full_state[:, ~margin_inactive])
 
-                return np.squeeze(logdens) # should this be negative to match the gradient log density?
+                return np.squeeze(logdens)  # should this be negative to match the gradient log density?
 
             new_log_density = functools.partial(new_log_density,
                                                 self,
@@ -602,13 +599,11 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                                     self.observed_score_state,
                                                     log_density,
                                                     logdens_transform,
-                                                    selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
+                                                    selection_info=self.selection_variable)  # should be signs and the subgradients we've conditioned on
 
 
 class glm_lasso(lasso_view):
-
-    def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
-
+    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}):
         bootstrap_score = pairs_bootstrap_glm(self.loss,
                                               self.selection_variable['variables'],
                                               beta_full=self._beta_full,
@@ -616,29 +611,25 @@ def setup_sampler(self, scaling=1., solve_args={'min_its':50, 'tol':1.e-10}):
 
         return bootstrap_score
 
-class glm_lasso_parametric(lasso_view):
 
+class glm_lasso_parametric(lasso_view):
     # this setup_sampler returns only the active set
 
     def setup_sampler(self):
-
         return self.selection_variable['variables']
 
 
 class fixedX_lasso(lasso_view):
-
-    def __init__(self, X, Y, epsilon, penalty, randomization, solve_args={'min_its':50, 'tol':1.e-10}):
-
+    def __init__(self, X, Y, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
         loss = glm.gaussian(X, Y)
         lasso_view.__init__(self,
-                            loss, 
-                            epsilon, 
-                            penalty, 
-                            randomization, 
+                            loss,
+                            epsilon,
+                            penalty,
+                            randomization,
                             solve_args=solve_args)
 
     def setup_sampler(self):
-
         X, Y = self.loss.data
 
         bootstrap_score = resid_bootstrap(self.loss,
@@ -646,26 +637,22 @@ def setup_sampler(self):
                                           ~self.selection_variable['variables'])[0]
         return bootstrap_score
 
+
 ##### The class for users
 
 class lasso(object):
-
     r"""
     A class for the LASSO for post-selection inference.
     The problem solved is
-
     .. math::
-
-        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
+        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 +
             \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
-
     where $\lambda$ is `lam`, $\omega$ is a randomization generated below
     and the last term is a small ridge penalty.
-
     """
 
-    def __init__(self, 
-                 loglike, 
+    def __init__(self,
+                 loglike,
                  feature_weights,
                  ridge_term,
                  randomizer_scale,
@@ -673,29 +660,20 @@ def __init__(self,
                  parametric_cov_estimator=False,
                  perturb=None):
         r"""
-
         Create a new post-selection object for the LASSO problem
-
         Parameters
         ----------
-
         loglike : `regreg.smooth.glm.glm`
             A (negative) log-likelihood as implemented in `regreg`.
-
         feature_weights : np.ndarray
             Feature weights for L-1 penalty. If a float,
             it is brodcast to all features.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomization.
-
         randomizer : str (optional)
             One of ['laplace', 'logistic', 'gaussian']
-
-
         """
 
         self.loglike = loglike
@@ -710,7 +688,7 @@ def __init__(self,
         if randomizer == 'laplace':
             self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
         elif randomizer == 'gaussian':
-            self.randomizer = randomization.isotropic_gaussian((p,),randomizer_scale)
+            self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
         elif randomizer == 'logistic':
             self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
 
@@ -720,32 +698,28 @@ def __init__(self,
 
         self._initial_omega = perturb
 
-    def fit(self, 
-            solve_args={'tol':1.e-12, 'min_its':50}, 
+    def fit(self,
+            solve_args={'tol': 1.e-12, 'min_its': 50},
             perturb=None,
             nboot=1000):
         """
         Fit the randomized lasso using `regreg`.
-
         Parameters
         ----------
-
         solve_args : keyword args
              Passed to `regreg.problems.simple_problem.solve`.
-
         Returns
         -------
-
         signs : np.float
              Support and non-zero signs of randomized lasso solution.
-             
+
         """
 
         if perturb is not None:
             self._initial_omega = perturb
 
         p = self.nfeature
-        if self.parametric_cov_estimator==True:
+        if self.parametric_cov_estimator == True:
             self._view = glm_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
         else:
             self._view = glm_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
@@ -759,36 +733,29 @@ def decompose_subgradient(self,
                               condition=None,
                               marginalize=None):
         """
-
         Marginalize over some if inactive part of subgradient
         if applicable.
-
         Parameters
         ----------
-
         condition : np.bool
              Which groups' subgradients should we condition on.
-
         marginalize : np.bool
              Which groups' subgradients should we marginalize over.
-
         Returns
         -------
-
         None
-
         """
 
         if not hasattr(self, "_view"):
             raise ValueError("fit method should be run first")
-        self._view.decompose_subgradient(condition=condition, 
+        self._view.decompose_subgradient(condition=condition,
                                          marginalize=marginalize)
 
     def summary(self,
                 selected_features,
                 parameter=None,
                 level=0.9,
-                ndraw=10000, 
+                ndraw=10000,
                 burnin=2000,
                 compute_intervals=False,
                 bootstrap_sampler=False,
@@ -796,29 +763,21 @@ def summary(self,
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
-
         Parameters
         ----------
-
         selected_features : np.bool
             Binary encoding of which features to use in final
             model and targets.
-
         parameter : np.array
             Hypothesized value for parameter -- defaults to 0.
-
         level : float
             Confidence level.
-
         ndraw : int (optional)
             Defaults to 1000.
-
         burnin : int (optional)
             Defaults to 1000.
-
         bootstrap : bool
             Use wild bootstrap instead of Gaussian plugin.
-
         """
         if not hasattr(self, "_view"):
             raise ValueError('run `fit` method before producing summary.')
@@ -844,11 +803,11 @@ def summary(self,
         for q in [self._view]:
             cov_info = q.setup_sampler()
             if self.parametric_cov_estimator == False:
-                target_cov, score_cov = form_covariances(target_info,  
+                target_cov, score_cov = form_covariances(target_info,
                                                          cross_terms=[cov_info],
                                                          nsample=q.nboot)
             else:
-                target_cov, score_cov = form_covariances(target_info,  
+                target_cov, score_cov = form_covariances(target_info,
                                                          cross_terms=[cov_info])
             opt_samplers.append(q.sampler)
 
@@ -856,26 +815,29 @@ def summary(self,
                                           burnin) for opt_sampler in opt_samplers]
 
         if subset is not None:
-            target_cov = target_cov[subset][:,subset]
+            target_cov = target_cov[subset][:, subset]
             score_cov = score_cov[subset]
             unpenalized_mle = unpenalized_mle[subset]
 
-        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter, sample=opt_samples[0])
+        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter,
+                                                     sample=opt_samples[0])
         if not np.all(parameter == 0):
-            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=np.zeros_like(parameter), sample=opt_samples[0])
+            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov,
+                                                          parameter=np.zeros_like(parameter), sample=opt_samples[0])
         else:
             pvalues = pivots
 
         intervals = None
         if compute_intervals:
-            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov, sample=opt_samples[0])
+            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov,
+                                                             sample=opt_samples[0])
 
         return pivots, pvalues, intervals
 
     @staticmethod
-    def gaussian(X, 
-                 Y, 
-                 feature_weights, 
+    def gaussian(X,
+                 Y,
+                 feature_weights,
                  sigma=1.,
                  parametric_cov_estimator=False,
                  quadratic=None,
@@ -885,79 +847,65 @@ def gaussian(X,
                  perturb=None):
         r"""
         Squared-error LASSO with feature weights.
-
-        Objective function (before randomizer) is 
+        Objective function (before randomizer) is
         $$
         \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
         where $\lambda$ is `feature_weights`. The ridge term
         is determined by the Hessian and `np.std(Y)` (scaled by $\sqrt{n/(n-1)}$) by default,
         as is the randomizer scale.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         Y : ndarray
             Shape (n,) -- the response.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
         sigma : float (optional)
             Noise variance. Set to 1 if `covariance_estimator` is not None.
             This scales the loglikelihood by `sigma**(-2)`.
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
 
         """
 
-        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
+        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic)
         n, p = X.shape
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
         if ridge_term is None:
             ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        return lasso(loglike, 
-                     np.asarray(feature_weights) / sigma**2,
-                     ridge_term, 
-                     randomizer_scale, 
+        return lasso(loglike,
+                     np.asarray(feature_weights) / sigma ** 2,
+                     ridge_term,
+                     randomizer_scale,
                      randomizer=randomizer,
                      parametric_cov_estimator=parametric_cov_estimator,
                      perturb=perturb)
 
     @staticmethod
-    def logistic(X, 
-                 successes, 
-                 feature_weights, 
+    def logistic(X,
+                 successes,
+                 feature_weights,
                  trials=None,
                  parametric_cov_estimator=False,
                  quadratic=None,
@@ -967,81 +915,67 @@ def logistic(X,
                  perturb=None):
         r"""
         Logistic LASSO with feature weights.
-
-        Objective function is 
+        Objective function is
         $$
         \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
-        where $\ell$ is the negative of the logistic 
+        where $\ell$ is the negative of the logistic
         log-likelihood (half the logistic deviance)
         and $\lambda$ is `feature_weights`.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         successes : ndarray
             Shape (n,) -- response vector. An integer number of successes.
             For data that is proportions, multiply the proportions
             by the number of trials first.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
         trials : ndarray (optional)
             Number of trials per response, defaults to
-            ones the same shape as Y. 
-
+            ones the same shape as Y.
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
 
         """
         n, p = X.shape
 
         loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
             ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 
+            randomizer_scale = np.sqrt(mean_diag) * 0.5
 
-        return lasso(loglike, feature_weights, 
-                     ridge_term, 
+        return lasso(loglike, feature_weights,
+                     ridge_term,
                      randomizer_scale,
                      parametric_cov_estimator=parametric_cov_estimator,
                      randomizer=randomizer,
                      perturb=perturb)
 
     @staticmethod
-    def coxph(X, 
-              times, 
-              status, 
+    def coxph(X,
+              times,
+              status,
               feature_weights,
               parametric_cov_estimator=False,
               quadratic=None,
@@ -1051,66 +985,50 @@ def coxph(X,
               perturb=None):
         r"""
         Cox proportional hazards LASSO with feature weights.
-
-        Objective function is 
+        Objective function is
         $$
         \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
-        where $\ell^{\text{Cox}}$ is the 
+        where $\ell^{\text{Cox}}$ is the
         negative of the log of the Cox partial
         likelihood and $\lambda$ is `feature_weights`.
-
         Uses Efron's tie breaking method.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         times : ndarray
             Shape (n,) -- the survival times.
-
         status : ndarray
             Shape (n,) -- the censoring status.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
         covariance_estimator : optional
             If None, use the parameteric
             covariance estimate of the selected model.
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
 
         """
         loglike = coxph_obj(X, times, status, quadratic=quadratic)
 
         # scale for randomization seems kind of meaningless here...
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
             ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1)
@@ -1118,17 +1036,17 @@ def coxph(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        return lasso(loglike, 
-                     feature_weights, 
+        return lasso(loglike,
+                     feature_weights,
                      ridge_term,
-                     randomizer_scale, 
+                     randomizer_scale,
                      randomizer=randomizer,
                      parametric_cov_estimator=parametric_cov_estimator,
                      perturb=perturb)
 
     @staticmethod
-    def poisson(X, 
-                counts, 
+    def poisson(X,
+                counts,
                 feature_weights,
                 parametric_cov_estimator=False,
                 quadratic=None,
@@ -1138,51 +1056,37 @@ def poisson(X,
                 perturb=None):
         r"""
         Poisson log-linear LASSO with feature weights.
-
-        Objective function is 
+        Objective function is
         $$
         \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
         where $\ell^{\text{Poisson}}$ is the negative
         of the log of the Poisson likelihood (half the deviance)
         and $\lambda$ is `feature_weights`.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         counts : ndarray
             Shape (n,) -- the response.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
 
         """
         n, p = X.shape
@@ -1190,7 +1094,7 @@ def poisson(X,
 
         # scale for randomizer seems kind of meaningless here...
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
             ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1)
@@ -1198,90 +1102,73 @@ def poisson(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
 
-        return lasso(loglike, 
-                     feature_weights, 
+        return lasso(loglike,
+                     feature_weights,
                      ridge_term,
-                     randomizer_scale, 
+                     randomizer_scale,
                      randomizer=randomizer,
                      parametric_cov_estimator=parametric_cov_estimator,
                      perturb=perturb)
 
     @staticmethod
-    def sqrt_lasso(X, 
-                   Y, 
-                   feature_weights, 
+    def sqrt_lasso(X,
+                   Y,
+                   feature_weights,
                    quadratic=None,
                    parametric_cov_estimator=False,
                    sigma_estimate='truncated',
-                   solve_args={'min_its':200},
+                   solve_args={'min_its': 200},
                    randomizer_scale=None,
                    perturb=None):
         r"""
         Use sqrt-LASSO to choose variables.
-
-        Objective function is 
+        Objective function is
         $$
         \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
         where $\lambda$ is `feature_weights`. After solving the problem
-        treat as if `gaussian` with implied variance and choice of 
+        treat as if `gaussian` with implied variance and choice of
         multiplier. See arxiv.org/abs/1504.08031 for details.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         Y : ndarray
             Shape (n,) -- the response.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         covariance : str
             One of 'parametric' or 'sandwich'. Method
             used to estimate covariance for inference
             in second stage.
-
         sigma_estimate : str
             One of 'truncated' or 'OLS'. Method
             used to estimate $\sigma$ when using
             parametric covariance.
-
         solve_args : dict
             Arguments passed to solver.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
+
         Notes
         -----
-
         Unlike other variants of LASSO, this
         solves the problem on construction as the active
         set is needed to find equivalent gaussian LASSO.
-
         Assumes parametric model is correct for inference,
         i.e. does not accept a covariance estimator.
-
         """
 
         n, p = X.shape
@@ -1289,7 +1176,7 @@ def sqrt_lasso(X,
         if np.asarray(feature_weights).shape == ():
             feature_weights = np.ones(loglike.shape) * feature_weights
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
         if ridge_term is None:
             ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1)
 
@@ -1299,30 +1186,31 @@ def sqrt_lasso(X,
         if perturb is None:
             perturb = np.random.standard_normal(p) * randomizer_scale
 
-        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0) # a ridge + linear term
+        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0)  # a ridge + linear term
 
         if quadratic is not None:
             totalQ = randomQ + quadratic
         else:
             totalQ = randomQ
 
-        soln, sqrt_loss = solve_sqrt_lasso(X, 
-                                           Y, 
-                                           weights=feature_weights, 
-                                           quadratic=totalQ, 
+        soln, sqrt_loss = solve_sqrt_lasso(X,
+                                           Y,
+                                           weights=feature_weights,
+                                           quadratic=totalQ,
                                            solve_args=solve_args,
                                            force_fat=True)
 
         denom = np.linalg.norm(Y - X.dot(soln))
 
         loglike = rr.glm.gaussian(X, Y)
-        
-        raise NotImplementedError('lasso_view needs to be modified so that the initial randomization can be set at construction time')
 
-        return lasso(loglike, 
-                     np.asarray(feature_weights) * denom, 
-                     ridge_term * denom, 
-                     randomizer_scale * denom, 
+        raise NotImplementedError(
+            'lasso_view needs to be modified so that the initial randomization can be set at construction time')
+
+        return lasso(loglike,
+                     np.asarray(feature_weights) * denom,
+                     ridge_term * denom,
+                     randomizer_scale * denom,
                      randomizer='gaussian',
                      parametric_cov_estimator=parametric_cov_estimator,
                      perturb=perturb)
@@ -1333,51 +1221,38 @@ def sqrt_lasso(X,
 #### - Gaussian randomization
 
 class highdim(lasso):
-
     r"""
     A class for the LASSO for post-selection inference.
     The problem solved is
-
     .. math::
-
-        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
+        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 +
             \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
-
     where $\lambda$ is `lam`, $\omega$ is a randomization generated below
     and the last term is a small ridge penalty.
-
     """
 
-    def __init__(self, 
-                 loglike, 
+    def __init__(self,
+                 loglike,
                  feature_weights,
                  ridge_term,
                  randomizer_scale,
                  perturb=None):
         r"""
-
         Create a new post-selection object for the LASSO problem
-
         Parameters
         ----------
-
         loglike : `regreg.smooth.glm.glm`
             A (negative) log-likelihood as implemented in `regreg`.
-
         feature_weights : np.ndarray
             Feature weights for L-1 penalty. If a float,
             it is brodcast to all features.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomization.
-
         perturb : np.ndarray
             Random perturbation subtracted as a linear
             term in the objective function.
-
         """
 
         self.loglike = loglike
@@ -1390,26 +1265,22 @@ def __init__(self,
         self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
         self.ridge_term = ridge_term
         self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
-        self._initial_omega = perturb # random perturbation
+        self._initial_omega = perturb  # random perturbation
 
-    def fit(self, 
-            solve_args={'tol':1.e-12, 'min_its':50}, 
+    def fit(self,
+            solve_args={'tol': 1.e-12, 'min_its': 50},
             perturb=None):
         """
         Fit the randomized lasso using `regreg`.
-
         Parameters
         ----------
-
         solve_args : keyword args
              Passed to `regreg.problems.simple_problem.solve`.
-
         Returns
         -------
-
         signs : np.float
              Support and non-zero signs of randomized lasso solution.
-             
+
         """
 
         p = self.nfeature
@@ -1437,14 +1308,14 @@ def fit(self,
         self._unpenalized = unpenalized
 
         _active_signs = active_signs.copy()
-        _active_signs[unpenalized] = np.nan # don't release sign of unpenalized variables
-        self.selection_variable = {'sign':_active_signs,
-                                   'variables':self._overall}
+        _active_signs[unpenalized] = np.nan  # don't release sign of unpenalized variables
+        self.selection_variable = {'sign': _active_signs,
+                                   'variables': self._overall}
 
         # initial state for opt variables
 
-        initial_subgrad = -(self.loglike.smooth_objective(self.initial_soln, 'grad') + 
-                            quad.objective(self.initial_soln, 'grad')) 
+        initial_subgrad = -(self.loglike.smooth_objective(self.initial_soln, 'grad') +
+                            quad.objective(self.initial_soln, 'grad'))
         self.initial_subgrad = initial_subgrad
 
         initial_scalings = np.fabs(self.initial_soln[active])
@@ -1510,10 +1381,10 @@ def signed_basis_vector(p, j, s):
         unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
         if unpenalized.sum():
             _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
-                                                      + self.ridge_term * unpenalized_directions) 
+                                                      + self.ridge_term * unpenalized_directions)
 
         # two transforms that encode score and optimization
-        # variable roles 
+        # variable roles
 
         self.opt_transform = (_opt_linear_term, self.initial_subgrad)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
@@ -1545,6 +1416,7 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                 mean_term = logdens_linear.dot(score.T + offset[:, None]).T
             arg = opt + mean_term
             return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
         log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
 
         # now make the constraints
@@ -1564,8 +1436,8 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                                self.observed_score_state,
                                                log_density,
                                                logdens_transform,
-                                               selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
-        
+                                               selection_info=self.selection_variable)  # should be signs and the subgradients we've conditioned on
+
         return active_signs
 
     def summary(self,
@@ -1573,79 +1445,72 @@ def summary(self,
                 features=None,
                 parameter=None,
                 level=0.9,
-                ndraw=10000, 
+                ndraw=10000,
                 burnin=2000,
                 compute_intervals=False,
                 dispersion=None):
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
-
         Parameters
         ----------
-
         target : one of ['selected', 'full']
-
         features : np.bool
             Binary encoding of which features to use in final
             model and targets.
-
         parameter : np.array
             Hypothesized value for parameter -- defaults to 0.
-
         level : float
             Confidence level.
-
         ndraw : int (optional)
             Defaults to 1000.
-
         burnin : int (optional)
             Defaults to 1000.
-
         compute_intervals : bool
             Compute confidence intervals?
-
         dispersion : float (optional)
             Use a known value for dispersion, or Pearson's X^2?
-
         """
 
         if parameter is None:
             parameter = np.zeros(self.loglike.shape[0])
 
         if target == 'selected':
-            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
+            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
+                                                                                                dispersion=dispersion)
         else:
             X, y = self.loglike.data
             n, p = X.shape
             if n > p and target == 'full':
-                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features, dispersion=dispersion)
+                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features,
+                                                                                                dispersion=dispersion)
             else:
-                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features, dispersion=dispersion)
+                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features,
+                                                                                                    dispersion=dispersion)
 
         if self._overall.sum() > 0:
-            opt_sample = self.sampler.sample(ndraw,  burnin)
+            opt_sample = self.sampler.sample(ndraw, burnin)
 
-            pivots = self.sampler.coefficient_pvalues(observed_target, 
-                                                      cov_target, 
-                                                      cov_target_score, 
-                                                      parameter=parameter, 
-                                                      sample=opt_sample, 
+            pivots = self.sampler.coefficient_pvalues(observed_target,
+                                                      cov_target,
+                                                      cov_target_score,
+                                                      parameter=parameter,
+                                                      sample=opt_sample,
                                                       alternatives=alternatives)
             if not np.all(parameter == 0):
-                pvalues = self.sampler.coefficient_pvalues(observed_target, 
-                                                           cov_target, 
-                                                           cov_target_score, 
-                                                           parameter=np.zeros_like(parameter), 
-                                                           sample=opt_sample, 
+                pvalues = self.sampler.coefficient_pvalues(observed_target,
+                                                           cov_target,
+                                                           cov_target_score,
+                                                           parameter=np.zeros_like(parameter),
+                                                           sample=opt_sample,
                                                            alternatives=alternatives)
             else:
                 pvalues = pivots
 
             intervals = None
             if compute_intervals:
-                intervals = self.sampler.confidence_intervals(observed_target, 
-                                                              cov_target, 
+                intervals = self.sampler.confidence_intervals(observed_target,
+                                                              cov_target,
                                                               cov_target_score,
                                                               sample=opt_sample)
 
@@ -1662,55 +1527,48 @@ def selective_MLE(self,
                       dispersion=None,
                       solve_args={}):
         """
-
         Parameters
         ----------
-
         target : one of ['selected', 'full']
-
         features : np.bool
             Binary encoding of which features to use in final
             model and targets.
-
         parameter : np.array
             Hypothesized value for parameter -- defaults to 0.
-
         level : float
             Confidence level.
-
         ndraw : int (optional)
             Defaults to 1000.
-
         burnin : int (optional)
             Defaults to 1000.
-
         compute_intervals : bool
             Compute confidence intervals?
-
         dispersion : float (optional)
             Use a known value for dispersion, or Pearson's X^2?
-
         """
 
         if parameter is None:
             parameter = np.zeros(self.loglike.shape[0])
 
         if target == 'selected':
-            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
+            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
+                                                                                                dispersion=dispersion)
         elif target == 'full':
             X, y = self.loglike.data
             n, p = X.shape
             if n > p:
-                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features, dispersion=dispersion)
+                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features,
+                                                                                                dispersion=dispersion)
             else:
-                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features, dispersion=dispersion)
+                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features,
+                                                                                                    dispersion=dispersion)
 
         # working out conditional law of opt variables given
         # target after decomposing score wrt target
 
-        return self.sampler.selective_MLE(observed_target, 
-                                          cov_target, 
-                                          cov_target_score, 
+        return self.sampler.selective_MLE(observed_target,
+                                          cov_target,
+                                          cov_target_score,
                                           self.observed_opt_state,
                                           solve_args=solve_args)
 
@@ -1733,8 +1591,9 @@ def selected_targets(self, features=None, dispersion=None):
             cov_target = np.linalg.inv(Q)
             observed_target = self._beta_full[overall]
             crosscov_target_score = score_linear.dot(cov_target)
-            Xfeat = X[:,overall]
-            alternatives = [{1:'greater', -1:'less'}[int(s)] for s in self.selection_variable['sign'][active]] + ['twosided'] * unpenalized.sum()
+            Xfeat = X[:, overall]
+            alternatives = [{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][active]] + [
+                                                                                                                       'twosided'] * unpenalized.sum()
 
         else:
 
@@ -1742,7 +1601,7 @@ def selected_targets(self, features=None, dispersion=None):
             features_b[features] = True
             features = features_b
 
-            Xfeat = X[:,features]
+            Xfeat = X[:, features]
             Qfeat = Xfeat.T.dot(self._W[:, None] * Xfeat)
             Gfeat = self.loglike.smooth_objective(self.initial_soln, 'grad')[features]
             Qfeat_inv = np.linalg.inv(Qfeat)
@@ -1753,8 +1612,9 @@ def selected_targets(self, features=None, dispersion=None):
             observed_target = one_step
             alternatives = ['twosided'] * features.sum()
 
-        if dispersion is None: # use Pearson's X^2
-            dispersion = ((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(observed_target)))**2 / self._W).sum() / (n - Xfeat.shape[1])
+        if dispersion is None:  # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(
+                Xfeat.dot(observed_target))) ** 2 / self._W).sum() / (n - Xfeat.shape[1])
 
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
@@ -1775,18 +1635,22 @@ def full_targets(self, features=None, dispersion=None):
         G = self.loglike.smooth_objective(self.initial_soln, 'grad')
         Qfull_inv = np.linalg.inv(Qfull)
         one_step = self.initial_soln - Qfull_inv.dot(G)
-        cov_target = Qfull_inv[features][:,features]
+        cov_target = Qfull_inv[features][:, features]
         observed_target = one_step[features]
         crosscov_target_score = np.zeros((p, cov_target.shape[0]))
         crosscov_target_score[features] = -np.identity(cov_target.shape[0])
 
-        if dispersion is None: # use Pearson's X^2
-            dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step)))**2 / self._W).sum() / (n - p)
+        if dispersion is None:  # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step))) ** 2 / self._W).sum() / (
+            n - p)
 
         alternatives = ['twosided'] * features.sum()
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
-    def debiased_targets(self, features=None, dispersion=None, **debiasing_args):
+    def debiased_targets(self,
+                         features=None,
+                         dispersion=None,
+                         debiasing_args={}):
 
         if features is None:
             features = self._overall
@@ -1800,315 +1664,258 @@ def debiased_targets(self, features=None, dispersion=None, **debiasing_args):
         # target is one-step estimator
 
         G = self.loglike.smooth_objective(self.initial_soln, 'grad')
-        Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(self._W)[:, None], 
+        Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(self._W)[:, None],
                                                   np.nonzero(features)[0],
                                                   **debiasing_args)) / n
         observed_target = self.initial_soln[features] - Qinv_hat.dot(G)
         if p > n:
             M1 = Qinv_hat.dot(X.T)
-            cov_target = (M1 * self._W[None,:]).dot(M1.T)
-            crosscov_target_score = -(M1 * self._W[None,:]).dot(X).T
+            cov_target = (M1 * self._W[None, :]).dot(M1.T)
+            crosscov_target_score = -(M1 * self._W[None, :]).dot(X).T
         else:
             Qfull = X.T.dot(self._W[:, None] * X)
             cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T))
             crosscov_target_score = -Qinv_hat.dot(Qfull).T
 
-        if dispersion is None: # use Pearson's X^2
-            Xfeat = X[:,features]
+        if dispersion is None:  # use Pearson's X^2
+            Xfeat = X[:, features]
             Qrelax = Xfeat.T.dot(self._W[:, None] * Xfeat)
             relaxed_soln = self.initial_soln[features] - np.linalg.inv(Qrelax).dot(G[features])
-            dispersion = ((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / self._W).sum() / (n - features.sum()) 
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(
+                Xfeat.dot(relaxed_soln))) ** 2 / self._W).sum() / (n - features.sum())
 
         alternatives = ['twosided'] * features.sum()
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     @staticmethod
-    def gaussian(X, 
-                 Y, 
-                 feature_weights, 
+    def gaussian(X,
+                 Y,
+                 feature_weights,
                  sigma=1.,
                  quadratic=None,
                  ridge_term=None,
                  randomizer_scale=None):
         r"""
         Squared-error LASSO with feature weights.
-
-        Objective function (before randomizer) is 
+        Objective function (before randomizer) is
         $$
         \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
         where $\lambda$ is `feature_weights`. The ridge term
         is determined by the Hessian and `np.std(Y)` by default,
         as is the randomizer scale.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         Y : ndarray
             Shape (n,) -- the response.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
         sigma : float (optional)
             Noise variance. Set to 1 if `covariance_estimator` is not None.
             This scales the loglikelihood by `sigma**(-2)`.
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
 
         """
 
-        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma**2, quadratic=quadratic)
+        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic)
         n, p = X.shape
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
         if ridge_term is None:
             ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        return highdim(loglike, np.asarray(feature_weights) / sigma**2,
+        return highdim(loglike, np.asarray(feature_weights) / sigma ** 2,
                        ridge_term, randomizer_scale)
 
     @staticmethod
-    def logistic(X, 
-                 successes, 
-                 feature_weights, 
+    def logistic(X,
+                 successes,
+                 feature_weights,
                  trials=None,
                  quadratic=None,
                  ridge_term=None,
                  randomizer_scale=None):
         r"""
         Logistic LASSO with feature weights.
-
-        Objective function is 
+        Objective function is
         $$
         \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
-        where $\ell$ is the negative of the logistic 
+        where $\ell$ is the negative of the logistic
         log-likelihood (half the logistic deviance)
         and $\lambda$ is `feature_weights`.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         successes : ndarray
             Shape (n,) -- response vector. An integer number of successes.
             For data that is proportions, multiply the proportions
             by the number of trials first.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
         trials : ndarray (optional)
             Number of trials per response, defaults to
-            ones the same shape as Y. 
-
+            ones the same shape as Y.
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
 
         """
         n, p = X.shape
 
         loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
             ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 
+            randomizer_scale = np.sqrt(mean_diag) * 0.5
 
         return highdim(loglike, np.asarray(feature_weights),
                        ridge_term, randomizer_scale)
 
     @staticmethod
-    def coxph(X, 
-              times, 
-              status, 
+    def coxph(X,
+              times,
+              status,
               feature_weights,
               quadratic=None,
               ridge_term=None,
               randomizer_scale=None):
         r"""
         Cox proportional hazards LASSO with feature weights.
-
-        Objective function is 
+        Objective function is
         $$
         \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
-        where $\ell^{\text{Cox}}$ is the 
+        where $\ell^{\text{Cox}}$ is the
         negative of the log of the Cox partial
         likelihood and $\lambda$ is `feature_weights`.
-
         Uses Efron's tie breaking method.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         times : ndarray
             Shape (n,) -- the survival times.
-
         status : ndarray
             Shape (n,) -- the censoring status.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
         covariance_estimator : optional
             If None, use the parameteric
             covariance estimate of the selected model.
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
 
         """
         loglike = coxph_obj(X, times, status, quadratic=quadratic)
 
         # scale for randomization seems kind of meaningless here...
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
             ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
- 
-        return lasso(loglike, 
-                     feature_weights, 
+
+        return lasso(loglike,
+                     feature_weights,
                      ridge_term,
                      randomizer_scale)
 
     @staticmethod
-    def poisson(X, 
-                counts, 
+    def poisson(X,
+                counts,
                 feature_weights,
                 quadratic=None,
                 ridge_term=None,
                 randomizer_scale=None):
         r"""
         Poisson log-linear LASSO with feature weights.
-
-        Objective function is 
+        Objective function is
         $$
         \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
         where $\ell^{\text{Poisson}}$ is the negative
         of the log of the Poisson likelihood (half the deviance)
         and $\lambda$ is `feature_weights`.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         counts : ndarray
             Shape (n,) -- the response.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
 
         """
         n, p = X.shape
@@ -2116,92 +1923,75 @@ def poisson(X,
 
         # scale for randomizer seems kind of meaningless here...
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n-1)
+            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
 
-        return lasso(loglike, 
-                     feature_weights, 
+        return lasso(loglike,
+                     feature_weights,
                      ridge_term,
                      randomizer_scale)
 
     @staticmethod
-    def sqrt_lasso(X, 
-                   Y, 
-                   feature_weights, 
+    def sqrt_lasso(X,
+                   Y,
+                   feature_weights,
                    quadratic=None,
                    ridge_term=None,
                    randomizer_scale=None,
-                   solve_args={'min_its':200},
+                   solve_args={'min_its': 200},
                    perturb=None):
         r"""
         Use sqrt-LASSO to choose variables.
-
-        Objective function is 
+        Objective function is
         $$
         \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
-
         where $\lambda$ is `feature_weights`. After solving the problem
-        treat as if `gaussian` with implied variance and choice of 
+        treat as if `gaussian` with implied variance and choice of
         multiplier. See arxiv.org/abs/1504.08031 for details.
-
         Parameters
         ----------
-
         X : ndarray
             Shape (n,p) -- the design matrix.
-
         Y : ndarray
             Shape (n,) -- the response.
-
         feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized 
-            features are handled by setting those entries of 
-            `feature_weights` to 0. If `feature_weights` is 
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
-
         quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
             An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic 
+            Can also be a linear term by setting quadratic
             coefficient to 0.
-
         covariance : str
             One of 'parametric' or 'sandwich'. Method
             used to estimate covariance for inference
             in second stage.
-
         solve_args : dict
             Arguments passed to solver.
-
         ridge_term : float
             How big a ridge term to add?
-
         randomizer_scale : float
             Scale for IID components of randomizer.
-
         randomizer : str
             One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
-
         L : `selection.randomized.convenience.lasso`
-        
+
         Notes
         -----
-
         Unlike other variants of LASSO, this
         solves the problem on construction as the active
         set is needed to find equivalent gaussian LASSO.
-
         Assumes parametric model is correct for inference,
         i.e. does not accept a covariance estimator.
-
         """
 
         n, p = X.shape
@@ -2209,39 +1999,37 @@ def sqrt_lasso(X,
         if np.asarray(feature_weights).shape == ():
             feature_weights = np.ones(p) * feature_weights
 
-        mean_diag = np.mean((X**2).sum(0))
+        mean_diag = np.mean((X ** 2).sum(0))
         if ridge_term is None:
             ridge_term = np.sqrt(mean_diag) / (n - 1)
 
         if randomizer_scale is None:
-            randomizer_scale = 0.5 * np.sqrt(mean_diag) / np.sqrt(n-1)
+            randomizer_scale = 0.5 * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if perturb is None:
             perturb = np.random.standard_normal(p) * randomizer_scale
 
-        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0) # a ridge + linear term
+        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0)  # a ridge + linear term
 
         if quadratic is not None:
             totalQ = randomQ + quadratic
         else:
             totalQ = randomQ
 
-        soln, sqrt_loss = solve_sqrt_lasso(X, 
-                                           Y, 
-                                           weights=feature_weights, 
-                                           quadratic=totalQ, 
+        soln, sqrt_loss = solve_sqrt_lasso(X,
+                                           Y,
+                                           weights=feature_weights,
+                                           quadratic=totalQ,
                                            solve_args=solve_args,
                                            force_fat=True)
 
         denom = np.linalg.norm(Y - X.dot(soln))
         loglike = rr.glm.gaussian(X, Y)
-        
+
         obj = highdim(loglike, np.asarray(feature_weights) * denom,
-                      ridge_term * denom, 
-                      randomizer_scale * denom, 
+                      ridge_term * denom,
+                      randomizer_scale * denom,
                       perturb=perturb * denom)
         obj._sqrt_soln = soln
 
         return obj
-
-

From 705fa9dd8d04fc95151559f8e3dee1ba55c291f3 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sun, 1 Apr 2018 04:15:36 -0700
Subject: [PATCH 537/617] coverage falling short of target for debiased

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 255 ++++++++++++++++--
 1 file changed, 232 insertions(+), 23 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 9f952b542..1132e5bbd 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -108,6 +108,219 @@ def coverage(intervals, truth, npars, active_bool):
     return ((truth > intervals[:, 0])*(truth < intervals[:, 1])).sum() / float(npars),\
            ((active_bool)*(np.logical_or((0. < intervals[:, 0]),(0. > intervals[:,1])))).sum()
 
+# def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+#                               randomizer_scale=np.sqrt(0.25), target = "selected",
+#                               full_dispersion = True):
+#
+#     while True:
+#         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
+#                                                         s=s, beta_type=beta_type, snr=snr)
+#         rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
+#         active_nonrand = (est_LASSO != 0)
+#         nactive_nonrand = active_nonrand.sum()
+#         true_mean = X.dot(beta)
+#
+#         _X = X
+#         X -= X.mean(0)[None, :]
+#         X /= (X.std(0)[None, :] * np.sqrt(n))
+#         X_val -= X_val.mean(0)[None, :]
+#         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
+#
+#         _y = y
+#         y = y - y.mean()
+#         y_val = y_val - y_val.mean()
+#
+#         dispersion = None
+#         if full_dispersion:
+#             dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+#         else:
+#             dispersion = np.std(y)
+#
+#         sigma_ = np.std(y)
+#         LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
+#         soln = LASSO_py.fit()
+#         active_LASSO = (soln != 0)
+#         nactive_LASSO = active_LASSO.sum()
+#         glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
+#
+#         const = highdim.gaussian
+#         lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
+#                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+#         err = np.zeros(100)
+#         for k in range(100):
+#             W = lam_seq[k]
+#             conv = const(X,
+#                          y,
+#                          W,
+#                          randomizer_scale=randomizer_scale * sigma_)
+#             signs = conv.fit()
+#             nonzero = signs != 0
+#             estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+#
+#             full_estimate = np.zeros(p)
+#             full_estimate[nonzero] = estimate
+#             err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+#
+#         lam = lam_seq[np.argmin(err)]
+#         sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
+#         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
+#
+#         randomized_lasso = const(X,
+#                                  y,
+#                                  lam,
+#                                  randomizer_scale=randomizer_scale * sigma_)
+#
+#         signs = randomized_lasso.fit()
+#         nonzero = signs != 0
+#         sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
+#         sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
+#         sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
+#         sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
+#
+#         if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
+#             Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
+#             Lee_intervals = np.zeros((nactive_LASSO, 2))
+#             Lee_intervals[:, 0] = np.asarray(Lee['lower_confidence'])
+#             Lee_intervals[:, 1] = np.asarray(Lee['upper_confidence'])
+#
+#             sel_MLE = np.zeros(p)
+#             estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
+#                                                                                                          dispersion=dispersion)
+#             sel_MLE[nonzero] = estimate / np.sqrt(n)
+#             ind_estimator = np.zeros(p)
+#             ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
+#
+#             if target == "selected":
+#                 beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
+#                 beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
+#                 beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
+#
+#                 post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+#                 unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
+#                 unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+#                                             post_LASSO_OLS + 1.65 * unad_sd]).T
+#
+#             elif target == "full":
+#                 beta_target_rand = beta[nonzero]
+#                 beta_target_nonrand_py = beta[active_LASSO]
+#                 beta_target_nonrand = beta[active_nonrand]
+#
+#                 post_LASSO_OLS = np.linalg.pinv(X)[active_nonrand].dot(y)
+#                 unad_sd = sigma_ * np.sqrt(
+#                     np.diag((np.linalg.pinv(X)[active_nonrand].dot(np.linalg.pinv(X)[active_nonrand].T))))
+#                 unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+#                                             post_LASSO_OLS + 1.65 * unad_sd]).T
+#
+#             true_signals = np.zeros(p, np.bool)
+#             true_signals[beta != 0] = 1
+#             true_set = np.asarray([u for u in range(p) if true_signals[u]])
+#             active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+#             active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
+#             active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+#
+#             active_rand_bool = np.zeros(nonzero.sum(), np.bool)
+#             for x in range(nonzero.sum()):
+#                 active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
+#             active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
+#             for w in range(nactive_nonrand):
+#                 active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
+#             active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
+#             for z in range(nactive_LASSO):
+#                 active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
+#
+#             cov_sel, power_sel = coverage(sel_intervals, beta_target_rand, nonzero.sum(), active_rand_bool)
+#             cov_Lee, power_Lee = coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO,  active_LASSO_bool)
+#             cov_unad, power_unad = coverage(unad_intervals, beta_target_nonrand, nactive_nonrand, active_nonrand_bool)
+#             break
+#
+#     if True:
+#         return relative_risk(sel_MLE, beta, Sigma), \
+#                relative_risk(ind_estimator, beta, Sigma), \
+#                relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
+#                relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
+#                relative_risk(rel_LASSO, beta, Sigma), \
+#                relative_risk(est_LASSO, beta, Sigma), \
+#                cov_sel,\
+#                cov_Lee,\
+#                cov_unad,\
+#                (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
+#                (Lee_intervals[:, 1] - Lee_intervals[:, 0]).sum() / float(nactive_LASSO), \
+#                (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
+#                power_sel/float((beta != 0).sum()),  \
+#                power_Lee/float((beta != 0).sum()), \
+#                power_unad/float((beta != 0).sum())
+
+# if __name__ == "__main__":
+#
+#     ndraw = 50
+#     bias = 0.
+#     risk_selMLE = 0.
+#     risk_indest = 0.
+#     risk_LASSO_rand = 0.
+#     risk_relLASSO_rand = 0.
+#
+#     risk_relLASSO_nonrand = 0.
+#     risk_LASSO_nonrand = 0.
+#
+#     coverage_selMLE = 0.
+#     coverage_Lee = 0.
+#     coverage_unad = 0.
+#
+#     length_sel = 0.
+#     length_Lee = 0.
+#     length_unad = 0.
+#
+#     power_sel = 0.
+#     power_Lee = 0.
+#     power_unad = 0.
+#
+#     for i in range(ndraw):
+#         output = comparison_risk_inference(n=200, p=500, nval=200, rho=0.35, s=20, beta_type=2, snr=.20,
+#                                            randomizer_scale=np.sqrt(0.25), target="selected",
+#                                            full_dispersion=True)
+#
+#         risk_selMLE += output[0]
+#         risk_indest += output[1]
+#         risk_LASSO_rand += output[2]
+#         risk_relLASSO_rand += output[3]
+#         risk_relLASSO_nonrand += output[4]
+#         risk_LASSO_nonrand += output[5]
+#
+#         coverage_selMLE += output[6]
+#         coverage_Lee += output[7]
+#         coverage_unad += output[8]
+#
+#         length_sel += output[9]
+#         length_Lee += output[10]
+#         length_unad += output[11]
+#
+#         power_sel += output[12]
+#         power_Lee += output[13]
+#         power_unad += output[14]
+#
+#         sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
+#         sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
+#         sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+#         sys.stderr.write("overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n"+ "\n")
+#
+#         sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+#         sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+#
+#         sys.stderr.write("overall selective coverage " + str(coverage_selMLE/ float(i + 1)) + "\n" )
+#         sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) +  "\n")
+#         sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+#
+#         sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+#         sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
+#         sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+#
+#         sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+#         sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
+#         sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+#
+#         sys.stderr.write("iteration completed " + str(i+1) + "\n")
+
+
 def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
                               randomizer_scale=np.sqrt(0.25), target = "selected",
                               full_dispersion = True):
@@ -130,12 +343,12 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         y = y - y.mean()
         y_val = y_val - y_val.mean()
 
-        dispersion = None
         if full_dispersion:
             dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
         else:
             dispersion = np.std(y)
 
+        dispersion = None
         sigma_ = np.std(y)
         LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
         soln = LASSO_py.fit()
@@ -144,7 +357,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
 
         const = highdim.gaussian
-        lam_seq = sigma_ * np.linspace(0.25, 2.75, num=100) * \
+        lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
         err = np.zeros(100)
         for k in range(100):
@@ -177,11 +390,11 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
         sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
 
-        if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
-            Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-            Lee_intervals = np.zeros((nactive_LASSO, 2))
-            Lee_intervals[:, 0] = np.asarray(Lee['lower_confidence'])
-            Lee_intervals[:, 1] = np.asarray(Lee['upper_confidence'])
+        if nonzero.sum()>0 and nactive_nonrand>0:
+            # Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
+            # Lee_intervals = np.zeros((nactive_LASSO, 2))
+            # Lee_intervals[:, 0] = np.asarray(Lee['lower_confidence'])
+            # Lee_intervals[:, 1] = np.asarray(Lee['upper_confidence'])
 
             sel_MLE = np.zeros(p)
             estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
@@ -192,7 +405,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
             if target == "selected":
                 beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
-                beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
+                #beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
                 beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
 
                 post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
@@ -229,7 +442,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
                 active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
 
             cov_sel, power_sel = coverage(sel_intervals, beta_target_rand, nonzero.sum(), active_rand_bool)
-            cov_Lee, power_Lee = coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO,  active_LASSO_bool)
+            #cov_Lee, power_Lee = coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO,  active_LASSO_bool)
             cov_unad, power_unad = coverage(unad_intervals, beta_target_nonrand, nactive_nonrand, active_nonrand_bool)
             break
 
@@ -241,13 +454,10 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
                relative_risk(rel_LASSO, beta, Sigma), \
                relative_risk(est_LASSO, beta, Sigma), \
                cov_sel,\
-               cov_Lee,\
                cov_unad,\
                (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
-               (Lee_intervals[:, 1] - Lee_intervals[:, 0]).sum() / float(nactive_LASSO), \
                (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
                power_sel/float((beta != 0).sum()),  \
-               power_Lee/float((beta != 0).sum()), \
                power_unad/float((beta != 0).sum())
 
 if __name__ == "__main__":
@@ -275,8 +485,8 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     power_unad = 0.
 
     for i in range(ndraw):
-        output = comparison_risk_inference(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2, snr=.30,
-                                           randomizer_scale=np.sqrt(0.25), target="selected",
+        output = comparison_risk_inference(n=200, p=500, nval=200, rho=0.35, s=10, beta_type=2, snr=.20,
+                                           randomizer_scale=np.sqrt(0.25), target="full",
                                            full_dispersion=False)
 
         risk_selMLE += output[0]
@@ -287,16 +497,16 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         risk_LASSO_nonrand += output[5]
 
         coverage_selMLE += output[6]
-        coverage_Lee += output[7]
-        coverage_unad += output[8]
+        #coverage_Lee += output[7]
+        coverage_unad += output[7]
 
-        length_sel += output[9]
-        length_Lee += output[10]
-        length_unad += output[11]
+        length_sel += output[8]
+       # length_Lee += output[10]
+        length_unad += output[9]
 
-        power_sel += output[12]
-        power_Lee += output[13]
-        power_unad += output[14]
+        power_sel += output[10]
+        #power_Lee += output[13]
+        power_unad += output[11]
 
         sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
         sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
@@ -320,4 +530,3 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
 
         sys.stderr.write("iteration completed " + str(i+1) + "\n")
 
-

From 68b7ccd40dd36419264a3beec3634d799a95640a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 2 Apr 2018 07:49:35 -0700
Subject: [PATCH 538/617] double requirement

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 280ef2764..5bbc478f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,5 +6,4 @@ mpmath
 pyinter
 statsmodels
 sklearn
-pyinter
 rpy2

From 815f39426234d79b521920abbb1af94a773d4a66 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 3 Apr 2018 07:09:42 -0700
Subject: [PATCH 539/617] clarifying highdim docstring

---
 selection/randomized/lasso.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 8f0e7a3f8..7fa423f19 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1335,16 +1335,18 @@ def sqrt_lasso(X,
 class highdim(lasso):
 
     r"""
-    A class for the LASSO for post-selection inference.
+    A class for the randomized LASSO for post-selection inference.
     The problem solved is
 
     .. math::
 
-        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
-            \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
+        \text{minimize}_{\beta} \ell(\beta) + 
+            \sum_{i=1}^p \lambda_i |\beta_i\| - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
 
     where $\lambda$ is `lam`, $\omega$ is a randomization generated below
-    and the last term is a small ridge penalty.
+    and the last term is a small ridge penalty. Each static method
+    forms $\ell$ as well as the $\ell_1$ penalty. The generic class
+    forms the remaining two terms in the objective.
 
     """
 
@@ -1836,7 +1838,7 @@ def gaussian(X,
         r"""
         Squared-error LASSO with feature weights.
 
-        Objective function (before randomizer) is 
+        Objective function is (before randomization)
         $$
         \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
@@ -1908,7 +1910,7 @@ def logistic(X,
                  ridge_term=None,
                  randomizer_scale=None):
         r"""
-        Logistic LASSO with feature weights.
+        Logistic LASSO with feature weights (before randomization)
 
         Objective function is 
         $$
@@ -1987,7 +1989,7 @@ def coxph(X,
         r"""
         Cox proportional hazards LASSO with feature weights.
 
-        Objective function is 
+        Objective function is (before randomization)
         $$
         \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
@@ -2068,7 +2070,7 @@ def poisson(X,
         r"""
         Poisson log-linear LASSO with feature weights.
 
-        Objective function is 
+        Objective function is (before randomization)
         $$
         \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
         $$
@@ -2144,7 +2146,7 @@ def sqrt_lasso(X,
         r"""
         Use sqrt-LASSO to choose variables.
 
-        Objective function is 
+        Objective function is (before randomization)
         $$
         \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
         $$

From dde4895f2dea7422ad30059f9b34a98e0a694d73 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 3 Apr 2018 12:05:18 -0700
Subject: [PATCH 540/617] updating R software

---
 R-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-software b/R-software
index 232760d6a..2d396e70e 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit 232760d6aef5182e040b82e30555f4af5ad6803c
+Subproject commit 2d396e70ed253c282e14d3500ab34b7b2807bc83

From b1236a66433be7d7b95c6190429e5376e69b8bc4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 3 Apr 2018 12:21:03 -0700
Subject: [PATCH 541/617] added test for liu agreement

---
 selection/algorithms/tests/test_compareR.py | 58 +++++++++++++++++----
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 58b73d66e..361a0cfcf 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -7,12 +7,13 @@
 try:
     import rpy2.robjects as rpy
     rpy2_available = True
+    import rpy2.robjects.numpy2ri as numpy2ri
 except ImportError:
     rpy2_available = False
 
-from selection.algorithms.lasso import lasso
-from selection.algorithms.forward_step import forward_step
-
+from ..lasso import lasso, lasso_full
+from ..forward_step import forward_step
+from ...tests.instance import gaussian_instance
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_fixed_lambda():
@@ -320,8 +321,7 @@ def test_solve_QP_lasso():
     problem = rr.simple_problem(loss, pen)
     soln = problem.solve(min_its=500, tol=1.e-12)
 
-    import rpy2.robjects.numpy2ri
-    rpy2.robjects.numpy2ri.activate()
+    numpy2ri.activate()
 
     rpy.r.assign('X', X)
     rpy.r.assign('Y', Y)
@@ -386,7 +386,7 @@ def test_solve_QP_lasso():
 
     soln_R = np.asarray(rpy.r('soln_R'))
     soln_R_wide = np.asarray(rpy.r('soln_R_wide'))
-    rpy2.robjects.numpy2ri.deactivate()
+    numpy2ri.deactivate()
 
     tol = 1.e-5
     print(soln - soln_R)
@@ -414,8 +414,7 @@ def test_solve_QP():
     problem = rr.simple_problem(loss, pen)
     soln = problem.solve(Q, min_its=500, tol=1.e-12)
 
-    import rpy2.robjects.numpy2ri
-    rpy2.robjects.numpy2ri.activate()
+    numpy2ri.activate()
 
     rpy.r.assign('X', X)
     rpy.r.assign('E', E)
@@ -480,7 +479,7 @@ def test_solve_QP():
 
     soln_R = np.asarray(rpy.r('soln_R'))
     soln_R_wide = np.asarray(rpy.r('soln_R_wide'))
-    rpy2.robjects.numpy2ri.deactivate()
+    numpy2ri.deactivate()
 
     tol = 1.e-5
     print(soln - soln_R)
@@ -494,4 +493,43 @@ def test_solve_QP():
     yield nt.assert_true, np.fabs(G).max() < lam * (1. + 1.e-6), 'testing linfinity norm'
 
     
-
+@np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
+def test_full_lasso():
+    n, p, s = 200, 100, 10
+    X, y = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)[:2]
+
+    lam = 4. * np.sqrt(n)
+    X *= np.sqrt(n)
+    L = lasso_full.gaussian(X, y, lam)
+    L.fit()
+    if len(L.active) > 0:
+        S = L.summary(compute_intervals=False)
+        numpy2ri.activate()
+
+        rpy.r.assign("X", X)
+        rpy.r.assign("y", y)
+        rpy.r.assign("lam", lam)
+        rpy.r("""
+        y = as.numeric(y)
+        n = nrow(X)
+        p = ncol(X)
+        sigma_est = sigma(lm(y ~ X - 1))
+        penalty_factor = rep(1, p);
+        lam = lam / n;
+        soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="ls")
+        print(lam)
+        print(soln)
+        PVS = selectiveInference:::inference_group_lasso(X, y, 
+                                                         soln, groups=1:ncol(X), 
+                                                         lambda=lam, penalty_factor=penalty_factor, 
+                                                         sigma_est, loss="ls", algo="glmnet", 
+                                                         construct_ci=FALSE)
+        active_vars=PVS$active_vars - 1 # for 0-based
+        pvalues = PVS$pvalues
+        """)
+        pvalues = rpy.r('pvalues')
+        active_set = rpy.r('active_vars')
+
+        nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+
+        numpy2ri.deactivate()

From 58d11a89aada0796d46fb9efc7aa66db87b61d78 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 4 Apr 2018 22:14:48 -0700
Subject: [PATCH 542/617] needed to reset the bound parameter for each row

---
 selection/algorithms/debiased_lasso.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 58e5cd92d..a36ab918c 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -31,7 +31,9 @@ def debiasing_matrix(X,
     n, p = X.shape
 
     if bound is None:
-        bound = (1./np.sqrt(n)) * ndist.ppf(1.-(0.1/(p**2)))
+        orig_bound = (1./np.sqrt(n)) * ndist.ppf(1.-(0.1/(p**2)))
+    else:
+        orig_bound = bound
 
     if max_active is None:
         max_active = max(50, 0.3 * n)
@@ -43,6 +45,7 @@ def debiasing_matrix(X,
 
     for idx, row in enumerate(rows):
 
+        bound = orig_bound
         soln = np.zeros(p)
         soln_old = np.zeros(p)
         ever_active = np.zeros(p, np.int)

From ab31cb0cad34442e1c8578015ce3b5df75b64d46 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 4 Apr 2018 22:18:16 -0700
Subject: [PATCH 543/617] tests to ensure debiasing code working for more than
 one row

---
 selection/algorithms/lasso.py                 | 18 ++++++--
 selection/algorithms/tests/test_compareR.py   | 46 ++++++++++++++++++-
 .../algorithms/tests/test_debiased_lasso.py   | 15 +++++-
 selection/algorithms/tests/test_lasso_full.py |  3 +-
 4 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 344ce1385..52bfc341a 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -1900,6 +1900,15 @@ class lasso_full(lasso):
 
     where $\lambda$ is `lam`.
 
+    Notes
+    -----
+
+    In solving the debiasing problem to approximate the inverse
+    of (X^TWX) in a GLM, this class makes the implicit assumption
+    that the scaling of X is such that diag(X^TWX) is O(n)
+    with n=X.shape[0]. That is, X's are similar to IID samples
+    from a population that does not depend on n.
+
     """
 
     # level for coverage is 1-alpha
@@ -2012,10 +2021,11 @@ def fit(self,
                 # target is one-step estimator
 
                 G = self.loglike.smooth_objective(lasso_solution, 'grad')
-                Qinv_hat = np.atleast_2d(debiasing_matrix(
-                                             X * np.sqrt(self._W)[:, None], 
-                                             self.active,
-                                             **debiasing_args)) / n
+                M = debiasing_matrix(X * np.sqrt(W)[:, None], 
+                                     self.active,
+                                     **debiasing_args)
+
+                Qinv_hat = np.atleast_2d(M) / n # the n is to make sure we get rows of the inverse of (X^TWX) instead of (X^TWX/n).
                 observed_target = lasso_solution[self.active] - Qinv_hat.dot(G)
                 M1 = Qinv_hat.dot(X.T)
                 self._QiE = (M1 * self._W[None,:]).dot(M1.T)
diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 361a0cfcf..95a8b6198 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -494,7 +494,7 @@ def test_solve_QP():
 
     
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
-def test_full_lasso():
+def test_full_lasso_tall():
     n, p, s = 200, 100, 10
     X, y = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)[:2]
 
@@ -522,7 +522,7 @@ def test_full_lasso():
         PVS = selectiveInference:::inference_group_lasso(X, y, 
                                                          soln, groups=1:ncol(X), 
                                                          lambda=lam, penalty_factor=penalty_factor, 
-                                                         sigma_est, loss="ls", algo="glmnet", 
+                                                         sigma_est, loss="ls", algo="Q", 
                                                          construct_ci=FALSE)
         active_vars=PVS$active_vars - 1 # for 0-based
         pvalues = PVS$pvalues
@@ -533,3 +533,45 @@ def test_full_lasso():
         nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
 
         numpy2ri.deactivate()
+
+@np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
+def test_full_lasso_wide():
+    n, p, s = 30, 50, 10
+    X, y, _, _, sigma = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)
+
+    lam = 8. * np.sqrt(n)
+    X *= np.sqrt(n)
+    L = lasso_full.gaussian(X, y, lam)
+    L.fit()
+    L._sigma = sigma
+    if len(L.active) > 0:
+        S = L.summary(compute_intervals=False)
+        numpy2ri.activate()
+
+        rpy.r.assign("X", X)
+        rpy.r.assign("y", y)
+        rpy.r.assign("sigma_est", sigma)
+        rpy.r.assign("lam", lam)
+        rpy.r("""
+
+        y = as.numeric(y)
+        n = nrow(X)
+        p = ncol(X)
+
+        penalty_factor = rep(1, p);
+        lam = lam / n;
+        soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="ls")
+        PVS = selectiveInference:::inference_group_lasso(X, y, 
+                                                         soln, groups=1:ncol(X), 
+                                                         lambda=lam, penalty_factor=penalty_factor, 
+                                                         sigma_est, loss="ls", algo="glmnet", 
+                                                         construct_ci=FALSE)
+        active_vars=PVS$active_vars - 1 # for 0-based
+        pvalues = PVS$pvalues
+        """)
+        pvalues = rpy.r('pvalues')
+        active_set = rpy.r('active_vars')
+
+        nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+        print('cor', np.corrcoef(pvalues, S['pval'])[0,1])
+        numpy2ri.deactivate()
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index 30ce91a41..4ad99a079 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -98,7 +98,20 @@ def test_compareR(n=50, p=100):
     rpy.r('soln = selectiveInference:::debiasingMatrix(X, TRUE, nrow(X), j)')
     soln_R = np.squeeze(np.asarray(rpy.r('soln')))
 
-    soln_py = debiasing_matrix(X, j, linesearch=True)
+    soln_py = debiasing_matrix(X, j)
+
+    np.testing.assert_allclose(soln_R, soln_py)
+
+    numpy2ri.activate()
+
+    j = np.array([3,5])
+    numpy2ri.activate()
+    rpy.r.assign('X', X)
+    rpy.r.assign('j', j+1)
+    rpy.r('soln = selectiveInference:::debiasingMatrix(X, TRUE, nrow(X), j)')
+    soln_R = np.squeeze(np.asarray(rpy.r('soln')))
+
+    soln_py = debiasing_matrix(X, j)
 
     np.testing.assert_allclose(soln_R, soln_py)
 
diff --git a/selection/algorithms/tests/test_lasso_full.py b/selection/algorithms/tests/test_lasso_full.py
index bad00fe0b..4e300168f 100644
--- a/selection/algorithms/tests/test_lasso_full.py
+++ b/selection/algorithms/tests/test_lasso_full.py
@@ -41,8 +41,9 @@ def truncation_interval(Qbeta_bar, Q, Qi_jj, j, beta_barj, lagrange):
 
     return lower, upper
 
-def test_agreement(n=200, p=100, s=4):
+def test_smaller():
 
+    n, p, s = 200, 100, 4
     X, y, beta = gaussian_instance(n=n,
                                    p=p,
                                    s=s)[:3]

From 2fe9f29ec7628afac92d17ff03e30a1f29d5a9ea Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Apr 2018 06:34:58 -0700
Subject: [PATCH 544/617] adding dispersion estimate argument to summary --
 what to do about logistic?

---
 selection/algorithms/lasso.py               | 21 ++++++++++++++++-----
 selection/algorithms/tests/test_compareR.py | 10 +++++-----
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 52bfc341a..76f1523ba 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -2012,7 +2012,8 @@ def fit(self,
                 _beta_bar = Qi.dot(self._Qbeta_bar)
                 self._beta_barE = _beta_bar[E]
                 one_step = self._beta_barE
-                self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X.dot(_beta_bar)))**2 / self._W).sum() / (n - p))
+                # Pearson's X^2 to estimate sigma
+                self._pearson_sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(X.dot(_beta_bar)))**2 / self._W).sum() / (n - p))
                 
             else:
 
@@ -2033,7 +2034,9 @@ def fit(self,
                 Qrelax = Xfeat.T.dot(self._W[:, None] * Xfeat)
                 relaxed_soln = lasso_solution[self.active] - np.linalg.inv(Qrelax).dot(G[self.active])
                 self._beta_barE = observed_target
-                self._sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / self._W).sum() / (n - len(self.active)))
+
+                # relaxed Pearson's X^2 to estimate sigma
+                self._pearson_sigma = np.sqrt(((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / self._W).sum() / (n - len(self.active)))
 
         else:
             self.active = []
@@ -2041,7 +2044,8 @@ def fit(self,
         return self.lasso_solution
 
     def summary(self, alpha=0.05,
-                compute_intervals=False):
+                compute_intervals=False,
+                dispersion=None):
         """
         Summary table for inference adjusted for selection.
 
@@ -2054,6 +2058,9 @@ def summary(self, alpha=0.05,
         compute_intervals : bool
             Should we compute confidence intervals?
 
+        dispersion : float
+            Estimate of dispersion. Defaults to a Pearson's X^2 estimate in the relaxed model.
+
         Returns
         -------
 
@@ -2064,7 +2071,11 @@ def summary(self, alpha=0.05,
         """
 
         X, y = self.loglike.data
-        W, sigma = self._W, self._sigma
+        W, sigma = self._W, self._pearson_sigma
+        if dispersion is None:
+            sqrt_dispersion = sigma
+        else:
+            sqrt_dispersion = np.sqrt(dispersion)
         active_set, QiE, beta_barE, Qbeta_bar = self.active, self._QiE, self._beta_barE, self._Qbeta_bar
 
         result = [] 
@@ -2074,7 +2085,7 @@ def summary(self, alpha=0.05,
             lower, upper = _truncation_interval(Qbeta_bar, X, W, QiE[j,j], idx, beta_barE[j], self.feature_weights)
 
 
-            sd = sigma * np.sqrt(QiE[j,j])
+            sd = sqrt_dispersion * np.sqrt(QiE[j,j])
             tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
             pvalue = tg.cdf(beta_barE[j])
             pvalue = float(2 * min(pvalue, 1 - pvalue))
diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 95a8b6198..63ffa51e2 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -496,7 +496,7 @@ def test_solve_QP():
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_full_lasso_tall():
     n, p, s = 200, 100, 10
-    X, y = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)[:2]
+    X, y, _, _, sigma = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)
 
     lam = 4. * np.sqrt(n)
     X *= np.sqrt(n)
@@ -514,6 +514,7 @@ def test_full_lasso_tall():
         n = nrow(X)
         p = ncol(X)
         sigma_est = sigma(lm(y ~ X - 1))
+        print(sigma_est)
         penalty_factor = rep(1, p);
         lam = lam / n;
         soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="ls")
@@ -539,13 +540,13 @@ def test_full_lasso_wide():
     n, p, s = 30, 50, 10
     X, y, _, _, sigma = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)
 
-    lam = 8. * np.sqrt(n)
+    lam = 6. * np.sqrt(n)
     X *= np.sqrt(n)
     L = lasso_full.gaussian(X, y, lam)
     L.fit()
-    L._sigma = sigma
+
     if len(L.active) > 0:
-        S = L.summary(compute_intervals=False)
+        S = L.summary(compute_intervals=False, dispersion=sigma**2)
         numpy2ri.activate()
 
         rpy.r.assign("X", X)
@@ -573,5 +574,4 @@ def test_full_lasso_wide():
         active_set = rpy.r('active_vars')
 
         nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
-        print('cor', np.corrcoef(pvalues, S['pval'])[0,1])
         numpy2ri.deactivate()

From a90538f0035dc0d547044002aec135e4c4970bf4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Apr 2018 06:36:41 -0700
Subject: [PATCH 545/617] fixed version of debiased liu

---
 R-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-software b/R-software
index 2d396e70e..e2ebc9928 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit 2d396e70ed253c282e14d3500ab34b7b2807bc83
+Subproject commit e2ebc9928021f479f274bc74596d70e6b7531f6c

From fbad7e419a4f64fb3ca7d83df9cf1966d04c02e0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 5 Apr 2018 09:37:10 -0700
Subject: [PATCH 546/617] updated debiased LASSO

---
 .../adjusted_MLE/tests/test_risk_coverage.py  |  2 +-
 selection/algorithms/debiased_lasso.py        |  5 +-
 .../algorithms/tests/test_debiased_lasso.py   | 73 +++++++++++--------
 3 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 1132e5bbd..0e711d95f 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -485,7 +485,7 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     power_unad = 0.
 
     for i in range(ndraw):
-        output = comparison_risk_inference(n=200, p=500, nval=200, rho=0.35, s=10, beta_type=2, snr=.20,
+        output = comparison_risk_inference(n=200, p=500, nval=200, rho=0.35, s=10, beta_type=2, snr=.30,
                                            randomizer_scale=np.sqrt(0.25), target="full",
                                            full_dispersion=False)
 
diff --git a/selection/algorithms/debiased_lasso.py b/selection/algorithms/debiased_lasso.py
index 0b73b0082..d711d6205 100644
--- a/selection/algorithms/debiased_lasso.py
+++ b/selection/algorithms/debiased_lasso.py
@@ -31,7 +31,9 @@ def debiasing_matrix(X,
     n, p = X.shape
 
     if bound is None:
-        bound = (1. / np.sqrt(n)) * ndist.ppf(1. - (0.1 / (p ** 2)))
+        orig_bound = (1. / np.sqrt(n)) * ndist.ppf(1. - (0.1 / (p ** 2)))
+    else:
+        orig_bound = bound
 
     if max_active is None:
         max_active = max(50, 0.3 * n)
@@ -43,6 +45,7 @@ def debiasing_matrix(X,
 
     for idx, row in enumerate(rows):
 
+        bound = orig_bound
         soln = np.zeros(p)
         soln_old = np.zeros(p)
         ever_active = np.zeros(p, np.int)
diff --git a/selection/algorithms/tests/test_debiased_lasso.py b/selection/algorithms/tests/test_debiased_lasso.py
index 30ce91a41..070f4cef8 100644
--- a/selection/algorithms/tests/test_debiased_lasso.py
+++ b/selection/algorithms/tests/test_debiased_lasso.py
@@ -4,7 +4,7 @@
 
 from ...tests.instance import gaussian_instance as instance
 
-from ..lasso import lasso 
+from ..lasso import lasso
 from ..debiased_lasso import (debiased_lasso_inference,
                               _find_row_approx_inverse_X,
                               debiasing_matrix)
@@ -20,10 +20,11 @@
 
 import rpy2.robjects as rpy
 from rpy2.robjects import numpy2ri
+
 rpy.r('library(selectiveInference)')
 
-def test_gaussian(n=100, p=20):
 
+def test_gaussian(n=100, p=20):
     X, y, beta = instance(n=n, p=p, sigma=1.)[:3]
 
     lam_theor = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 1000)))).max(0))
@@ -38,20 +39,22 @@ def test_gaussian(n=100, p=20):
     print(debiased_lasso_inference(L, L.active, np.sqrt(2 * np.log(p) / n)))
     print(beta)
 
-def test_approx_inverse(n=50, p=100):
 
+def test_approx_inverse(n=50, p=100):
     X = np.random.standard_normal((n, p))
     j = 5
     delta = 0.30
-    
-    X[:,3] = X[:,3] + X[:,j]
-    X[:,10] = X[:,10] + X[:,j]
+
+    X[:, 3] = X[:, 3] + X[:, j]
+    X[:, 10] = X[:, 10] + X[:, j]
     S = X.T.dot(X) / n
-    
-    soln = _find_row_approx_inverse(S, j, delta, solve_args={'min_its':500, 'tol':1.e-14, 'max_its':1000} )
 
-    soln_C = _find_row_approx_inverse_X(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, maxiter=1000, objective_tol=1.e-14)
-    soln_C2 = debiasing_matrix(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, max_iter=1000, objective_tol=1.e-14, linesearch=False)
+    soln = _find_row_approx_inverse(S, j, delta, solve_args={'min_its': 500, 'tol': 1.e-14, 'max_its': 1000})
+
+    soln_C = _find_row_approx_inverse_X(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, maxiter=1000,
+                                        objective_tol=1.e-14)
+    soln_C2 = debiasing_matrix(X, j, delta, kkt_tol=1.e-14, parameter_tol=1.e-14, max_iter=1000, objective_tol=1.e-14,
+                               linesearch=False)
 
     # make sure linesearch terminates
 
@@ -61,7 +64,7 @@ def test_approx_inverse(n=50, p=100):
     basis_vector[j] = 1.
 
     nt.assert_true(np.fabs(S.dot(soln) - basis_vector).max() < delta * 1.001)
-    
+
     U = - S.dot(-soln) - basis_vector
 
     yield np.testing.assert_allclose, soln_C, soln_C2
@@ -70,59 +73,65 @@ def test_approx_inverse(n=50, p=100):
     yield nt.assert_raises, ValueError, _find_row_approx_inverse, S, j, 1.e-7 * delta
     yield np.testing.assert_allclose, soln, soln_C, 1.e-3
 
-def test_approx_inverse_nondegen(n=100, p=20):
 
+def test_approx_inverse_nondegen(n=100, p=20):
     X = np.random.standard_normal((n, p))
     j = 5
     delta = 0.30
-    
-    X[:,3] = X[:,3] + X[:,j]
-    X[:,10] = X[:,10] + X[:,j]
+
+    X[:, 3] = X[:, 3] + X[:, j]
+    X[:, 10] = X[:, 10] + X[:, j]
 
     M = debiasing_matrix(X, np.arange(p))
 
 
 def test_compareR(n=50, p=100):
-
     X = np.random.standard_normal((n, p))
     j = 5
     delta = 0.30
-    
-    X[:,3] = X[:,3] + X[:,j]
-    X[:,10] = X[:,10] + X[:,j]
+
+    X[:, 3] = X[:, 3] + X[:, j]
+    X[:, 10] = X[:, 10] + X[:, j]
     S = X.T.dot(X) / n
-    
+
     numpy2ri.activate()
     rpy.r.assign('X', X)
-    rpy.r.assign('j', j+1)
+    rpy.r.assign('j', j + 1)
     rpy.r('soln = selectiveInference:::debiasingMatrix(X, TRUE, nrow(X), j)')
     soln_R = np.squeeze(np.asarray(rpy.r('soln')))
 
-    soln_py = debiasing_matrix(X, j, linesearch=True)
+    soln_py = debiasing_matrix(X, j)
 
     np.testing.assert_allclose(soln_R, soln_py)
 
     numpy2ri.activate()
-    
+
+    j = np.array([3, 5])
+    numpy2ri.activate()
+    rpy.r.assign('X', X)
+    rpy.r.assign('j', j + 1)
+    rpy.r('soln = selectiveInference:::debiasingMatrix(X, TRUE, nrow(X), j)')
+    soln_R = np.squeeze(np.asarray(rpy.r('soln')))
+
+    soln_py = debiasing_matrix(X, j)
+
+    np.testing.assert_allclose(soln_R, soln_py)
+
+    numpy2ri.activate()
+
+
 ## regreg implementation
 
-def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its':100, 'tol':1.e-6, 'max_its':500}):
+def _find_row_approx_inverse(Sigma, j, delta, solve_args={'min_its': 100, 'tol': 1.e-6, 'max_its': 500}):
     """
-
     Find an approximation of j-th row of inverse of Sigma.
-
     Solves the problem
-
     .. math::
-
         \text{min}_{\theta} \frac{1}{2} \theta^TS\theta
-
     subject to $\|\Sigma \hat{\theta} - e_j\|_{\infty} \leq \delta$ with
-    $e_j$ the $j$-th elementary basis vector and `S` as $\Sigma$, 
+    $e_j$ the $j$-th elementary basis vector and `S` as $\Sigma$,
     and `delta` as $\delta$.
-
     Described in Table 1, display (4) of https://arxiv.org/pdf/1306.3171.pdf
-
     """
     p = Sigma.shape[0]
     elem_basis = np.zeros(p, np.float)

From dbffe212bb0154dfd07ff87e44a8335b5b75a541 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Apr 2018 20:18:54 -0700
Subject: [PATCH 547/617] NF: modelX full lasso with known Q

---
 selection/algorithms/lasso.py | 225 ++++++++++++++++++++++++++++++++--
 1 file changed, 217 insertions(+), 8 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 76f1523ba..dc977a7ca 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -28,7 +28,8 @@
                         coxph as coxph_obj,
                         smooth_sum,
                         squared_error,
-                        identity_quadratic)
+                        identity_quadratic,
+                        quadratic_loss)
 
 from .sqrt_lasso import solve_sqrt_lasso, estimate_sigma
 from .debiased_lasso import debiasing_matrix
@@ -1845,15 +1846,23 @@ def additive_noise(X,
 # Liu, Markovic, Tibs selection
 # put this into library!
 
-def _solve_restricted_problem(Qbeta_bar, X, W, lagrange, initial=None,
+def _solve_restricted_problem(Qbeta_bar, Xinfo, lagrange, initial=None,
+                              wide=True,
                               min_its=30, tol=1.e-12):
     p = Qbeta_bar.shape[0]
 
-    loss = squared_error(X * np.sqrt(W)[:, None], np.zeros(X.shape[0]))
+    if wide:
+        X, W = Xinfo
+        loss = squared_error(X * np.sqrt(W)[:, None], np.zeros(X.shape[0]))
+    else:
+        Q = Xinfo
+        loss = quadratic_loss(Q.shape[0], Q=Q)
+        
     loss.quadratic = identity_quadratic(0, 
                                         0, 
                                         -Qbeta_bar, 
                                         0)
+
     lagrange = np.asarray(lagrange)
     if lagrange.shape in [(), (1,)]:
         lagrange = np.ones(p) * lagrange
@@ -1864,20 +1873,25 @@ def _solve_restricted_problem(Qbeta_bar, X, W, lagrange, initial=None,
     soln = problem.solve(tol=tol, min_its=min_its)
     return soln
 
-def _truncation_interval(Qbeta_bar, X, W, Qi_jj, j, beta_barj, lagrange):
+def _truncation_interval(Qbeta_bar, Xinfo, Qi_jj, j, beta_barj, lagrange, wide=True):
     if lagrange[j] != 0:
         lagrange_cp = lagrange.copy()
     else:
         return -np.inf, np.inf
     lagrange_cp[j] = np.inf
-    restricted_soln = _solve_restricted_problem(Qbeta_bar, X, W, lagrange_cp)
+    restricted_soln = _solve_restricted_problem(Qbeta_bar, Xinfo, lagrange_cp, wide) # TODO: use initial solution for speed
 
     p = Qbeta_bar.shape[0]
     Ij = np.zeros(p)
     Ij[j] = 1.
     nuisance = Qbeta_bar - Ij / Qi_jj * beta_barj
     
-    Qj = X.T.dot(X[:,j] * W)
+    if wide:
+        X, W = Xinfo
+        Qj = X.T.dot(X[:,j] * W)
+    else:
+        Q = Xinfo
+        Qj = Q[j]
     center = nuisance[j] - Qj.dot(restricted_soln)
     upper = (lagrange[j] - center) * Qi_jj
     lower = (-lagrange[j] - center) * Qi_jj
@@ -2082,8 +2096,7 @@ def summary(self, alpha=0.05,
 
         for j in range(len(active_set)):
             idx = self.active[j]
-            lower, upper = _truncation_interval(Qbeta_bar, X, W, QiE[j,j], idx, beta_barE[j], self.feature_weights)
-
+            lower, upper = _truncation_interval(Qbeta_bar, (X, W), QiE[j,j], idx, beta_barE[j], self.feature_weights, wide=True)
 
             sd = sqrt_dispersion * np.sqrt(QiE[j,j])
             tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
@@ -2318,3 +2331,199 @@ def poisson(X,
         """
         loglike = glm.poisson(X, counts, quadratic=quadratic)
         return lasso_full(loglike, feature_weights)
+
+class lasso_full_modelX(lasso):
+
+    r"""
+    A class for the LASSO for post-selection inference.
+    The problem solved is
+
+    .. math::
+
+        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
+            \lambda \|\beta\|_1
+
+    where $\lambda$ is `lam`.
+
+    Notes
+    -----
+
+    In solving the debiasing problem to approximate the inverse
+    of (X^TWX) in a GLM, this class makes the implicit assumption
+    that the scaling of X is such that diag(X^TWX) is O(n)
+    with n=X.shape[0]. That is, X's are similar to IID samples
+    from a population that does not depend on n.
+
+    """
+
+    # level for coverage is 1-alpha
+    alpha = 0.05
+
+    def __init__(self, 
+                 Q,               # population or semi-supervised version of X.T.dot(X)
+                 X, 
+                 y,
+                 feature_weights):
+        r"""
+
+        Create a new post-selection for the LASSO problem
+
+        Parameters
+        ----------
+
+        Q : np.ndarray((p,p))
+
+        sufficient_stat : np.ndarray(p)
+
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is brodcast to all features.
+
+        """
+
+        self.Q = Q
+        self.X, self.y = X, y
+        self._loss = quadratic_loss(Q.shape[0], Q=Q)
+        self._linear_term = identity_quadratic(0, 0, -X.T.dot(y), 0)
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(Q.shape[0]) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+    def fit(self, 
+            solve_args={'tol':1.e-12, 'min_its':50},
+            debiasing_args={}):
+        """
+        Fit the lasso using `regreg`.
+        This sets the attributes `soln`, `onestep` and
+        forms the constraints necessary for post-selection inference
+        by calling `form_constraints()`.
+
+        Parameters
+        ----------
+
+        lasso_solution : optional
+
+             If not None, this is taken to be the solution
+             of the optimization problem. No checks
+             are done, though the implied affine
+             constraints will generally not be satisfied.
+
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+
+        Returns
+        -------
+
+        soln : np.float
+             Solution to lasso.
+             
+        Notes
+        -----
+
+        If `self` already has an attribute `lasso_solution`
+        this will be taken to be the solution and 
+        no optimization problem will be solved. Supplying
+        the optional argument `lasso_solution` will
+        overwrite `self`'s `lasso_solution`.
+
+        """
+
+        self._penalty = weighted_l1norm(self.feature_weights, lagrange=1.)
+        problem = simple_problem(self._loss, self._penalty)
+        self.lasso_solution = problem.solve(self._linear_term, **solve_args)
+
+        lasso_solution = self.lasso_solution # shorthand after setting it correctly above
+
+        if not np.all(lasso_solution == 0):
+
+            self.active = np.nonzero(lasso_solution != 0)[0]
+            self.inactive = lasso_solution == 0
+            self.active_signs = np.sign(lasso_solution[self.active])
+            self._active_soln = lasso_solution[self.active]
+
+            # Needed for finding truncation intervals
+
+            G = self._loss.smooth_objective(lasso_solution, 'grad') + self._linear_term.objective(lasso_solution, 'grad')
+            self._Qbeta_bar = self.Q.dot(lasso_solution) - G 
+
+            Q = self.Q
+            E = self.active
+            QiE = np.linalg.inv(Q)[E] # maybe we want to use a debised estimate
+            self._QiE = QiE[:,E]
+            self._beta_barE = QiE.dot(self._Qbeta_bar)
+
+            # Pearson's X^2 to estimate sigma
+            self._pearson_sigma = np.sqrt((((y - X.dot(_beta_bar)))**2).sum() / (n - p))
+
+        else:
+            self.active = []
+            self.inactive = np.arange(lasso_solution.shape[0])
+        return self.lasso_solution
+
+    def summary(self, alpha=0.05,
+                compute_intervals=False,
+                dispersion=None):
+        """
+        Summary table for inference adjusted for selection.
+
+        Parameters
+        ----------
+
+        alpha : float
+            Form (1-alpha)*100% selective confidence intervals.
+
+        compute_intervals : bool
+            Should we compute confidence intervals?
+
+        dispersion : float
+            Estimate of dispersion. Defaults to a Pearson's X^2 estimate in the relaxed model.
+
+        Returns
+        -------
+
+        pval_summary : np.recarray
+            Array with one entry per active variable.
+            Columns are 'variable', 'pval', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'.
+
+        """
+
+        X, y = self.X, self.y
+        sigma = self._pearson_sigma
+        if dispersion is None:
+            sqrt_dispersion = sigma
+        else:
+            sqrt_dispersion = np.sqrt(dispersion)
+        active_set, QiE, beta_barE, Qbeta_bar = self.active, self._QiE, self._beta_barE, self._Qbeta_bar
+
+        result = [] 
+
+        for j in range(len(active_set)):
+            idx = self.active[j]
+            lower, upper = _truncation_interval(Qbeta_bar, Q, QiE[j,j], idx, beta_barE[j], self.feature_weights, wide=False)
+
+            sd = sqrt_dispersion * np.sqrt(QiE[j,j])
+            tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
+            pvalue = tg.cdf(beta_barE[j])
+            pvalue = float(2 * min(pvalue, 1 - pvalue))
+
+            if compute_intervals:
+                l, u = tg.equal_tailed_interval(beta_barE[j], alpha=alpha)
+            else:
+                l, u = np.nan, np.nan
+
+            result.append((idx, pvalue, self.lasso_solution[idx], beta_barE[j], sd, l, u, lower, upper))
+
+        df = pd.DataFrame(index=self.active,
+                          data=dict([(n, d) for n, d in zip(['variable',
+                                                             'pval', 
+                                                             'lasso', 
+                                                             'onestep',
+                                                             'sd',
+                                                             'lower_confidence', 
+                                                             'upper_confidence',
+                                                             'lower_truncation', 
+                                                             'upper_truncation'], 
+                                                            np.array(result).T)]))
+        df['variable'] = df['variable'].astype(int)
+        return df
+

From fffb135ad981231c2fb9aa50fa669ce815916d07 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Apr 2018 20:42:18 -0700
Subject: [PATCH 548/617] test to check full_modelX agrees with full

---
 selection/algorithms/lasso.py                 | 150 +++++++++---------
 selection/algorithms/tests/test_lasso_full.py |  22 +++
 2 files changed, 100 insertions(+), 72 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index dc977a7ca..33539da2c 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -1879,7 +1879,7 @@ def _truncation_interval(Qbeta_bar, Xinfo, Qi_jj, j, beta_barj, lagrange, wide=T
     else:
         return -np.inf, np.inf
     lagrange_cp[j] = np.inf
-    restricted_soln = _solve_restricted_problem(Qbeta_bar, Xinfo, lagrange_cp, wide) # TODO: use initial solution for speed
+    restricted_soln = _solve_restricted_problem(Qbeta_bar, Xinfo, lagrange_cp, wide=wide) # TODO: use initial solution for speed
 
     p = Qbeta_bar.shape[0]
     Ij = np.zeros(p)
@@ -2084,45 +2084,47 @@ def summary(self, alpha=0.05,
 
         """
 
-        X, y = self.loglike.data
-        W, sigma = self._W, self._pearson_sigma
-        if dispersion is None:
-            sqrt_dispersion = sigma
-        else:
-            sqrt_dispersion = np.sqrt(dispersion)
-        active_set, QiE, beta_barE, Qbeta_bar = self.active, self._QiE, self._beta_barE, self._Qbeta_bar
+        if len(self.active) > 0:
+            X, y = self.loglike.data
+            active_set, QiE, beta_barE, Qbeta_bar = self.active, self._QiE, self._beta_barE, self._Qbeta_bar
+            W, sigma = self._W, self._pearson_sigma
+            if dispersion is None:
+                sqrt_dispersion = sigma
+            else:
+                sqrt_dispersion = np.sqrt(dispersion)
 
-        result = [] 
 
-        for j in range(len(active_set)):
-            idx = self.active[j]
-            lower, upper = _truncation_interval(Qbeta_bar, (X, W), QiE[j,j], idx, beta_barE[j], self.feature_weights, wide=True)
+            result = [] 
 
-            sd = sqrt_dispersion * np.sqrt(QiE[j,j])
-            tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
-            pvalue = tg.cdf(beta_barE[j])
-            pvalue = float(2 * min(pvalue, 1 - pvalue))
+            for j in range(len(active_set)):
+                idx = self.active[j]
+                lower, upper = _truncation_interval(Qbeta_bar, (X, W), QiE[j,j], idx, beta_barE[j], self.feature_weights, wide=True)
 
-            if compute_intervals:
-                l, u = tg.equal_tailed_interval(beta_barE[j], alpha=alpha)
-            else:
-                l, u = np.nan, np.nan
+                sd = sqrt_dispersion * np.sqrt(QiE[j,j])
+                tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
+                pvalue = tg.cdf(beta_barE[j])
+                pvalue = float(2 * min(pvalue, 1 - pvalue))
 
-            result.append((idx, pvalue, self.lasso_solution[idx], beta_barE[j], sd, l, u, lower, upper))
-
-        df = pd.DataFrame(index=self.active,
-                          data=dict([(n, d) for n, d in zip(['variable',
-                                                             'pval', 
-                                                             'lasso', 
-                                                             'onestep',
-                                                             'sd',
-                                                             'lower_confidence', 
-                                                             'upper_confidence',
-                                                             'lower_truncation', 
-                                                             'upper_truncation'], 
-                                                            np.array(result).T)]))
-        df['variable'] = df['variable'].astype(int)
-        return df
+                if compute_intervals:
+                    l, u = tg.equal_tailed_interval(beta_barE[j], alpha=alpha)
+                else:
+                    l, u = np.nan, np.nan
+
+                result.append((idx, pvalue, self.lasso_solution[idx], beta_barE[j], sd, l, u, lower, upper))
+
+            df = pd.DataFrame(index=self.active,
+                              data=dict([(n, d) for n, d in zip(['variable',
+                                                                 'pval', 
+                                                                 'lasso', 
+                                                                 'onestep',
+                                                                 'sd',
+                                                                 'lower_confidence', 
+                                                                 'upper_confidence',
+                                                                 'lower_truncation', 
+                                                                 'upper_truncation'], 
+                                                                np.array(result).T)]))
+            df['variable'] = df['variable'].astype(int)
+            return df
 
     @property
     def soln(self):
@@ -2452,8 +2454,11 @@ def fit(self,
             self._QiE = QiE[:,E]
             self._beta_barE = QiE.dot(self._Qbeta_bar)
 
-            # Pearson's X^2 to estimate sigma
-            self._pearson_sigma = np.sqrt((((y - X.dot(_beta_bar)))**2).sum() / (n - p))
+            # Pearson's X^2 to estimate sigma from relaxed estimator
+            y, X = self.y, self.X
+            n, p = X.shape
+            relaxed_beta_barE = np.linalg.inv(Q[E][:,E]).dot(X[:,E].T.dot(y))
+            self._pearson_sigma = np.sqrt((((y - X[:,E].dot(relaxed_beta_barE)))**2).sum() / (n - p))
 
         else:
             self.active = []
@@ -2487,43 +2492,44 @@ def summary(self, alpha=0.05,
 
         """
 
-        X, y = self.X, self.y
-        sigma = self._pearson_sigma
-        if dispersion is None:
-            sqrt_dispersion = sigma
-        else:
-            sqrt_dispersion = np.sqrt(dispersion)
-        active_set, QiE, beta_barE, Qbeta_bar = self.active, self._QiE, self._beta_barE, self._Qbeta_bar
+        if len(self.active) > 0:
+            X, y = self.X, self.y
+            sigma = self._pearson_sigma
+            if dispersion is None:
+                sqrt_dispersion = sigma
+            else:
+                sqrt_dispersion = np.sqrt(dispersion)
+            active_set, QiE, beta_barE, Qbeta_bar = self.active, self._QiE, self._beta_barE, self._Qbeta_bar
 
-        result = [] 
+            result = [] 
 
-        for j in range(len(active_set)):
-            idx = self.active[j]
-            lower, upper = _truncation_interval(Qbeta_bar, Q, QiE[j,j], idx, beta_barE[j], self.feature_weights, wide=False)
+            for j in range(len(active_set)):
+                idx = self.active[j]
+                lower, upper = _truncation_interval(Qbeta_bar, self.Q, QiE[j,j], idx, beta_barE[j], self.feature_weights, wide=False)
 
-            sd = sqrt_dispersion * np.sqrt(QiE[j,j])
-            tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
-            pvalue = tg.cdf(beta_barE[j])
-            pvalue = float(2 * min(pvalue, 1 - pvalue))
+                sd = sqrt_dispersion * np.sqrt(QiE[j,j])
+                tg = TG([(-np.inf, lower), (upper, np.inf)], scale=sd)
+                pvalue = tg.cdf(beta_barE[j])
+                pvalue = float(2 * min(pvalue, 1 - pvalue))
 
-            if compute_intervals:
-                l, u = tg.equal_tailed_interval(beta_barE[j], alpha=alpha)
-            else:
-                l, u = np.nan, np.nan
-
-            result.append((idx, pvalue, self.lasso_solution[idx], beta_barE[j], sd, l, u, lower, upper))
-
-        df = pd.DataFrame(index=self.active,
-                          data=dict([(n, d) for n, d in zip(['variable',
-                                                             'pval', 
-                                                             'lasso', 
-                                                             'onestep',
-                                                             'sd',
-                                                             'lower_confidence', 
-                                                             'upper_confidence',
-                                                             'lower_truncation', 
-                                                             'upper_truncation'], 
-                                                            np.array(result).T)]))
-        df['variable'] = df['variable'].astype(int)
-        return df
+                if compute_intervals:
+                    l, u = tg.equal_tailed_interval(beta_barE[j], alpha=alpha)
+                else:
+                    l, u = np.nan, np.nan
+
+                result.append((idx, pvalue, self.lasso_solution[idx], beta_barE[j], sd, l, u, lower, upper))
+
+            df = pd.DataFrame(index=self.active,
+                              data=dict([(n, d) for n, d in zip(['variable',
+                                                                 'pval', 
+                                                                 'lasso', 
+                                                                 'onestep',
+                                                                 'sd',
+                                                                 'lower_confidence', 
+                                                                 'upper_confidence',
+                                                                 'lower_truncation', 
+                                                                 'upper_truncation'], 
+                                                                np.array(result).T)]))
+            df['variable'] = df['variable'].astype(int)
+            return df
 
diff --git a/selection/algorithms/tests/test_lasso_full.py b/selection/algorithms/tests/test_lasso_full.py
index 4e300168f..344723dab 100644
--- a/selection/algorithms/tests/test_lasso_full.py
+++ b/selection/algorithms/tests/test_lasso_full.py
@@ -5,6 +5,7 @@
 from ...tests.instance import gaussian_instance
 
 from ..lasso import (lasso_full,
+                     lasso_full_modelX,
                      _truncation_interval,
                      _solve_restricted_problem)
 
@@ -76,5 +77,26 @@ def test_smaller():
         np.testing.assert_allclose(l, lower)
         np.testing.assert_allclose(u, upper)
 
+def test_modelX():
+
+    n, p, s = 200, 50, 4
+    X, y, beta = gaussian_instance(n=n,
+                                   p=p,
+                                   s=s,
+                                   sigma=1)[:3]
+
+    lagrange = 1. * np.ones(p)
+
+    LF = lasso_full.gaussian(X, y, lagrange)
+    LF.fit()
+    S = LF.summary(dispersion=1)
+
+    LX = lasso_full_modelX(X.T.dot(X), X, y, lagrange)
+    LX.fit()
+    SX = LX.summary(dispersion=1)
+
+    np.testing.assert_allclose(S['pval'], SX['pval'])
+
+
 
 

From a8c92edbea8659eeed757c47a952903b54ac0296 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 5 Apr 2018 21:30:20 -0700
Subject: [PATCH 549/617] BF: degrees of freedom

---
 selection/algorithms/lasso.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 33539da2c..54e8d468e 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -2458,7 +2458,7 @@ def fit(self,
             y, X = self.y, self.X
             n, p = X.shape
             relaxed_beta_barE = np.linalg.inv(Q[E][:,E]).dot(X[:,E].T.dot(y))
-            self._pearson_sigma = np.sqrt((((y - X[:,E].dot(relaxed_beta_barE)))**2).sum() / (n - p))
+            self._pearson_sigma = np.sqrt(((y - X[:,E].dot(relaxed_beta_barE))**2).sum() / (n - len(self.active)))
 
         else:
             self.active = []

From 44485b3346b6976cfc8a5c78b60f0ee2bed943a6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 6 Apr 2018 14:02:28 -0700
Subject: [PATCH 550/617] allowed lasso_full to use a sparse estimate of
 inverse

---
 selection/algorithms/lasso.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 54e8d468e..8fd5905fd 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -1930,7 +1930,8 @@ class lasso_full(lasso):
 
     def __init__(self, 
                  loglike, 
-                 feature_weights):
+                 feature_weights,
+                 sparse_inverse=False):
         r"""
 
         Create a new post-selection for the LASSO problem
@@ -1945,12 +1946,17 @@ def __init__(self,
             Feature weights for L-1 penalty. If a float,
             it is brodcast to all features.
 
+        sparse_inverse : bool
+             If True, use the sparse LASSO estimate of the
+             inverse of X.T.dot(X).
+
         """
 
         self.loglike = loglike
         if np.asarray(feature_weights).shape == ():
             feature_weights = np.ones(loglike.shape) * feature_weights
         self.feature_weights = np.asarray(feature_weights)
+        self.sparse_inverse = sparse_inverse
 
     def fit(self, 
             lasso_solution=None, 
@@ -1966,7 +1972,6 @@ def fit(self,
         ----------
 
         lasso_solution : optional
-
              If not None, this is taken to be the solution
              of the optimization problem. No checks
              are done, though the implied affine
@@ -1975,6 +1980,9 @@ def fit(self,
         solve_args : keyword args
              Passed to `regreg.problems.simple_problem.solve`.
 
+        debiasing_args : dict
+             Arguments passed to `.debiased_lasso.debiasing_matrix`.
+
         Returns
         -------
 
@@ -2018,7 +2026,7 @@ def fit(self,
             self._Qbeta_bar = X.T.dot(W * X.dot(lasso_solution)) - self.loglike.smooth_objective(lasso_solution, 'grad')
             self._W = W
 
-            if n > p:
+            if n > p and not self.sparse_inverse:
                 Q = self.loglike.hessian(lasso_solution)
                 E = self.active
                 Qi = np.linalg.inv(Q)

From b3ae6551e2ce42b7eeb4beda3eaf33f7c9a8ccc2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 6 Apr 2018 14:11:47 -0700
Subject: [PATCH 551/617] separated high and low case instances

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 302 +++++++++---------
 1 file changed, 146 insertions(+), 156 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 0e711d95f..9b8561065 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -108,147 +108,145 @@ def coverage(intervals, truth, npars, active_bool):
     return ((truth > intervals[:, 0])*(truth < intervals[:, 1])).sum() / float(npars),\
            ((active_bool)*(np.logical_or((0. < intervals[:, 0]),(0. > intervals[:,1])))).sum()
 
-# def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
-#                               randomizer_scale=np.sqrt(0.25), target = "selected",
-#                               full_dispersion = True):
-#
-#     while True:
-#         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
-#                                                         s=s, beta_type=beta_type, snr=snr)
-#         rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
-#         active_nonrand = (est_LASSO != 0)
-#         nactive_nonrand = active_nonrand.sum()
-#         true_mean = X.dot(beta)
-#
-#         _X = X
-#         X -= X.mean(0)[None, :]
-#         X /= (X.std(0)[None, :] * np.sqrt(n))
-#         X_val -= X_val.mean(0)[None, :]
-#         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
-#
-#         _y = y
-#         y = y - y.mean()
-#         y_val = y_val - y_val.mean()
-#
-#         dispersion = None
-#         if full_dispersion:
-#             dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-#         else:
-#             dispersion = np.std(y)
-#
-#         sigma_ = np.std(y)
-#         LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
-#         soln = LASSO_py.fit()
-#         active_LASSO = (soln != 0)
-#         nactive_LASSO = active_LASSO.sum()
-#         glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
-#
-#         const = highdim.gaussian
-#         lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
-#                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-#         err = np.zeros(100)
-#         for k in range(100):
-#             W = lam_seq[k]
-#             conv = const(X,
-#                          y,
-#                          W,
-#                          randomizer_scale=randomizer_scale * sigma_)
-#             signs = conv.fit()
-#             nonzero = signs != 0
-#             estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
-#
-#             full_estimate = np.zeros(p)
-#             full_estimate[nonzero] = estimate
-#             err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
-#
-#         lam = lam_seq[np.argmin(err)]
-#         sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
-#         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
-#
-#         randomized_lasso = const(X,
-#                                  y,
-#                                  lam,
-#                                  randomizer_scale=randomizer_scale * sigma_)
-#
-#         signs = randomized_lasso.fit()
-#         nonzero = signs != 0
-#         sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
-#         sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
-#         sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
-#         sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
-#
-#         if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
-#             Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-#             Lee_intervals = np.zeros((nactive_LASSO, 2))
-#             Lee_intervals[:, 0] = np.asarray(Lee['lower_confidence'])
-#             Lee_intervals[:, 1] = np.asarray(Lee['upper_confidence'])
-#
-#             sel_MLE = np.zeros(p)
-#             estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
-#                                                                                                          dispersion=dispersion)
-#             sel_MLE[nonzero] = estimate / np.sqrt(n)
-#             ind_estimator = np.zeros(p)
-#             ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
-#
-#             if target == "selected":
-#                 beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
-#                 beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
-#                 beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
-#
-#                 post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
-#                 unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
-#                 unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-#                                             post_LASSO_OLS + 1.65 * unad_sd]).T
-#
-#             elif target == "full":
-#                 beta_target_rand = beta[nonzero]
-#                 beta_target_nonrand_py = beta[active_LASSO]
-#                 beta_target_nonrand = beta[active_nonrand]
-#
-#                 post_LASSO_OLS = np.linalg.pinv(X)[active_nonrand].dot(y)
-#                 unad_sd = sigma_ * np.sqrt(
-#                     np.diag((np.linalg.pinv(X)[active_nonrand].dot(np.linalg.pinv(X)[active_nonrand].T))))
-#                 unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-#                                             post_LASSO_OLS + 1.65 * unad_sd]).T
-#
-#             true_signals = np.zeros(p, np.bool)
-#             true_signals[beta != 0] = 1
-#             true_set = np.asarray([u for u in range(p) if true_signals[u]])
-#             active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-#             active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
-#             active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-#
-#             active_rand_bool = np.zeros(nonzero.sum(), np.bool)
-#             for x in range(nonzero.sum()):
-#                 active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
-#             active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
-#             for w in range(nactive_nonrand):
-#                 active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
-#             active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
-#             for z in range(nactive_LASSO):
-#                 active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
-#
-#             cov_sel, power_sel = coverage(sel_intervals, beta_target_rand, nonzero.sum(), active_rand_bool)
-#             cov_Lee, power_Lee = coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO,  active_LASSO_bool)
-#             cov_unad, power_unad = coverage(unad_intervals, beta_target_nonrand, nactive_nonrand, active_nonrand_bool)
-#             break
-#
-#     if True:
-#         return relative_risk(sel_MLE, beta, Sigma), \
-#                relative_risk(ind_estimator, beta, Sigma), \
-#                relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
-#                relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
-#                relative_risk(rel_LASSO, beta, Sigma), \
-#                relative_risk(est_LASSO, beta, Sigma), \
-#                cov_sel,\
-#                cov_Lee,\
-#                cov_unad,\
-#                (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
-#                (Lee_intervals[:, 1] - Lee_intervals[:, 0]).sum() / float(nactive_LASSO), \
-#                (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
-#                power_sel/float((beta != 0).sum()),  \
-#                power_Lee/float((beta != 0).sum()), \
-#                power_unad/float((beta != 0).sum())
+def comparison_risk_inference_low(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+                                  randomizer_scale=np.sqrt(0.25), target = "selected",
+                                  full_dispersion = True):
+
+    while True:
+        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
+                                                        s=s, beta_type=beta_type, snr=snr)
+        rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
+        active_nonrand = (est_LASSO != 0)
+        nactive_nonrand = active_nonrand.sum()
+        true_mean = X.dot(beta)
+
+        X -= X.mean(0)[None, :]
+        X /= (X.std(0)[None, :] * np.sqrt(n))
+        X_val -= X_val.mean(0)[None, :]
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
+
+        y = y - y.mean()
+        y_val = y_val - y_val.mean()
+
+        dispersion = None
+        if full_dispersion:
+            dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+        else:
+            dispersion = np.std(y)
+
+        sigma_ = np.std(y)
+        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
+        soln = LASSO_py.fit()
+        active_LASSO = (soln != 0)
+        nactive_LASSO = active_LASSO.sum()
+        glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
+
+        const = highdim.gaussian
+        lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
+                  np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+        err = np.zeros(100)
+        for k in range(100):
+            W = lam_seq[k]
+            conv = const(X,
+                         y,
+                         W,
+                         randomizer_scale=randomizer_scale * sigma_)
+            signs = conv.fit()
+            nonzero = signs != 0
+            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+
+            full_estimate = np.zeros(p)
+            full_estimate[nonzero] = estimate
+            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+
+        lam = lam_seq[np.argmin(err)]
+        sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
+        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
+
+        randomized_lasso = const(X,
+                                 y,
+                                 lam,
+                                 randomizer_scale=randomizer_scale * sigma_)
+
+        signs = randomized_lasso.fit()
+        nonzero = signs != 0
+        sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
+        sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
+        sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
+        sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
+
+        if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
+            Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
+            Lee_intervals = np.zeros((nactive_LASSO, 2))
+            Lee_intervals[:, 0] = np.asarray(Lee['lower_confidence'])
+            Lee_intervals[:, 1] = np.asarray(Lee['upper_confidence'])
+
+            sel_MLE = np.zeros(p)
+            estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
+                                                                                                         dispersion=dispersion)
+            sel_MLE[nonzero] = estimate / np.sqrt(n)
+            ind_estimator = np.zeros(p)
+            ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
+
+            if target == "selected":
+                beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
+                beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
+                beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
+
+                post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+                unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
+                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                            post_LASSO_OLS + 1.65 * unad_sd]).T
+
+            elif target == "full":
+                beta_target_rand = beta[nonzero]
+                beta_target_nonrand_py = beta[active_LASSO]
+                beta_target_nonrand = beta[active_nonrand]
+
+                post_LASSO_OLS = np.linalg.pinv(X)[active_nonrand].dot(y)
+                unad_sd = sigma_ * np.sqrt(
+                    np.diag((np.linalg.pinv(X)[active_nonrand].dot(np.linalg.pinv(X)[active_nonrand].T))))
+                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                            post_LASSO_OLS + 1.65 * unad_sd]).T
+
+            true_signals = np.zeros(p, np.bool)
+            true_signals[beta != 0] = 1
+            true_set = np.asarray([u for u in range(p) if true_signals[u]])
+            active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+            active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
+            active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+
+            active_rand_bool = np.zeros(nonzero.sum(), np.bool)
+            for x in range(nonzero.sum()):
+                active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
+            active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
+            for w in range(nactive_nonrand):
+                active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
+            active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
+            for z in range(nactive_LASSO):
+                active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
+
+            cov_sel, power_sel = coverage(sel_intervals, beta_target_rand, nonzero.sum(), active_rand_bool)
+            cov_Lee, power_Lee = coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO,  active_LASSO_bool)
+            cov_unad, power_unad = coverage(unad_intervals, beta_target_nonrand, nactive_nonrand, active_nonrand_bool)
+            break
+
+    if True:
+        return relative_risk(sel_MLE, beta, Sigma), \
+               relative_risk(ind_estimator, beta, Sigma), \
+               relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
+               relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
+               relative_risk(rel_LASSO, beta, Sigma), \
+               relative_risk(est_LASSO, beta, Sigma), \
+               cov_sel,\
+               cov_Lee,\
+               cov_unad,\
+               (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
+               (Lee_intervals[:, 1] - Lee_intervals[:, 0]).sum() / float(nactive_LASSO), \
+               (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
+               power_sel/float((beta != 0).sum()),  \
+               power_Lee/float((beta != 0).sum()), \
+               power_unad/float((beta != 0).sum())
 
 # if __name__ == "__main__":
 #
@@ -321,9 +319,9 @@ def coverage(intervals, truth, npars, active_bool):
 #         sys.stderr.write("iteration completed " + str(i+1) + "\n")
 
 
-def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
-                              randomizer_scale=np.sqrt(0.25), target = "selected",
-                              full_dispersion = True):
+def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
+                                   randomizer_scale=np.sqrt(0.25), target = "selected",
+                                   full_dispersion = True):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
@@ -333,22 +331,16 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
         nactive_nonrand = active_nonrand.sum()
         true_mean = X.dot(beta)
 
-        _X = X
         X -= X.mean(0)[None, :]
         X /= (X.std(0)[None, :] * np.sqrt(n))
         X_val -= X_val.mean(0)[None, :]
         X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
 
-        _y = y
         y = y - y.mean()
         y_val = y_val - y_val.mean()
 
-        if full_dispersion:
-            dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        else:
-            dispersion = np.std(y)
-
         dispersion = None
+
         sigma_ = np.std(y)
         LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
         soln = LASSO_py.fit()
@@ -375,8 +367,6 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
             err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
-        sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
-        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
 
         randomized_lasso = const(X,
                                  y,
@@ -485,9 +475,9 @@ def comparison_risk_inference(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2
     power_unad = 0.
 
     for i in range(ndraw):
-        output = comparison_risk_inference(n=200, p=500, nval=200, rho=0.35, s=10, beta_type=2, snr=.30,
-                                           randomizer_scale=np.sqrt(0.25), target="full",
-                                           full_dispersion=False)
+        output = comparison_risk_inference_high(n=200, p=1000, nval=200, rho=0.35, s=10, beta_type=2, snr=.30,
+                                                randomizer_scale=np.sqrt(0.25), target="selected",
+                                                full_dispersion=False)
 
         risk_selMLE += output[0]
         risk_indest += output[1]

From b676a927c8fb4c294db53a8552e436e29a3c2fd8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 6 Apr 2018 14:11:47 -0700
Subject: [PATCH 552/617] removing report framework -- better to use the
 testing and comparison suite

---
 selection/algorithms/tests/test_lasso.py      |  41 +-
 selection/algorithms/tests/test_sqrt_lasso.py |  43 +-
 selection/randomized/tests/test_cv.py         |  34 +-
 .../test_cv_corrected_nonrandomized_lasso.py  |  30 +-
 .../randomized/tests/test_cv_lee_et_al.py     |  40 +-
 selection/randomized/tests/test_fixedX.py     |  15 +-
 selection/randomized/tests/test_intervals.py  |  19 +-
 .../randomized/tests/test_multiple_splits.py  |  24 +-
 selection/randomized/tests/test_naive.py      |  31 +-
 selection/randomized/tests/test_split.py      |  23 +-
 .../randomized/tests/test_split_compare.py    |  27 -
 selection/randomized/tests/test_sqrt_lasso.py |   2 +-
 selection/tests/decorators.py                 |  22 -
 selection/tests/reports.py                    | 521 ------------------
 14 files changed, 24 insertions(+), 848 deletions(-)
 delete mode 100644 selection/tests/reports.py

diff --git a/selection/algorithms/tests/test_lasso.py b/selection/algorithms/tests/test_lasso.py
index 17739a9eb..26406f69d 100644
--- a/selection/algorithms/tests/test_lasso.py
+++ b/selection/algorithms/tests/test_lasso.py
@@ -6,8 +6,7 @@
 from selection.tests.flags import SMALL_SAMPLES
 from selection.tests.instance import (gaussian_instance as instance,
                                       logistic_instance)
-from selection.tests.decorators import set_sampling_params_iftrue, wait_for_return_value, register_report
-import selection.tests.reports as reports
+from selection.tests.decorators import set_sampling_params_iftrue, wait_for_return_value
 
 from selection.algorithms.lasso import (lasso, 
                                         lasso_full,
@@ -162,7 +161,6 @@ def test_coxph():
 
     return L, C, P
 
-@register_report(['pvalue', 'split_pvalue', 'active'])
 @wait_for_return_value(max_tries=100)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_data_carving_gaussian(n=200,
@@ -230,7 +228,6 @@ def test_data_carving_gaussian(n=200,
         v = (carve, split, active)
         return v
 
-@register_report(['pvalue', 'split_pvalue', 'active'])
 @wait_for_return_value()
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_data_carving_sqrt_lasso(n=200,
@@ -297,7 +294,6 @@ def test_data_carving_sqrt_lasso(n=200,
         return v
 
 
-@register_report(['pvalue', 'split_pvalue', 'active'])
 @wait_for_return_value()
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_data_carving_logistic(n=700,
@@ -371,7 +367,6 @@ def test_data_carving_logistic(n=700,
         v = (carve, split, active)
         return v
 
-@register_report(['pvalue', 'split_pvalue', 'active'])
 @wait_for_return_value()
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_data_carving_poisson(n=500,
@@ -441,9 +436,6 @@ def test_data_carving_poisson(n=500,
         v = (carve, split, active)
         return v
        
-
-
-@register_report(['pvalue', 'split_pvalue', 'active'])
 @wait_for_return_value()
 @dec.skipif(not statsmodels_available, "needs statsmodels")
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
@@ -518,7 +510,6 @@ def test_intervals(n=100, p=20, s=5):
     S = las.summary(compute_intervals=True)
     nominal_intervals(las)
     
-@register_report(['pvalue', 'active'])
 @wait_for_return_value()
 def test_gaussian_pvals(n=100,
                         p=500,
@@ -541,7 +532,6 @@ def test_gaussian_pvals(n=100,
         S = L.summary('twosided')
         return S['pval'], [v in true_active for v in S['variable']]
 
-@register_report(['pvalue', 'active'])
 @wait_for_return_value()
 def test_sqrt_lasso_pvals(n=100,
                           p=200,
@@ -572,7 +562,6 @@ def test_sqrt_lasso_pvals(n=100,
         return S['pval'], [v in true_active for v in S['variable']]
 
 
-@register_report(['pvalue', 'active'])
 @wait_for_return_value()
 def test_sqrt_lasso_sandwich_pvals(n=200,
                                    p=50,
@@ -604,7 +593,6 @@ def test_sqrt_lasso_sandwich_pvals(n=200,
         S = L_SQ.summary('twosided')
         return S['pval'], [v in true_active for v in S['variable']]
 
-@register_report(['pvalue', 'parametric_pvalue', 'active'])
 @wait_for_return_value()
 def test_gaussian_sandwich_pvals(n=200,
                                  p=50,
@@ -664,7 +652,6 @@ def test_gaussian_sandwich_pvals(n=200,
         return P_P, P_S, [v in true_active for v in S['variable']]
 
 
-@register_report(['pvalue', 'active'])
 @wait_for_return_value()
 def test_logistic_pvals(n=500,
                         p=200,
@@ -793,29 +780,3 @@ def test_poisson_full():
     L.fit()
     L.summary(compute_intervals=True)
 
-def report(niter=50, **kwargs):
-
-    # these are all our null tests
-    fn_names = ['test_gaussian_pvals',
-                'test_logistic_pvals',
-                'test_data_carving_gaussian',
-                'test_data_carving_sqrt_lasso',
-                'test_data_carving_logistic',
-                'test_data_carving_poisson',
-                'test_data_carving_coxph'
-                ]
-
-    dfs = []
-    for fn in fn_names:
-        fn = reports.reports[fn]
-        dfs.append(reports.collect_multiple_runs(fn['test'],
-                                                 fn['columns'],
-                                                 niter,
-                                                 reports.summarize_all))
-    dfs = pd.concat(dfs)
-
-    fig = reports.pvalue_plot(dfs)
-    fig.savefig('algorithms_pvalues.pdf') 
-
-    fig = reports.split_pvalue_plot(dfs)
-    fig.savefig('algorithms_split_pvalues.pdf') 
diff --git a/selection/algorithms/tests/test_sqrt_lasso.py b/selection/algorithms/tests/test_sqrt_lasso.py
index 9d360efe6..0d05495d1 100644
--- a/selection/algorithms/tests/test_sqrt_lasso.py
+++ b/selection/algorithms/tests/test_sqrt_lasso.py
@@ -6,26 +6,21 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance as instance
-from selection.tests.decorators import (set_sampling_params_iftrue, 
-                                        set_seed_iftrue, 
-                                        wait_for_return_value,
-                                        register_report)
-import selection.tests.reports as reports
+from ...tests.instance import gaussian_instance as instance
+from ...tests.decorators import (set_sampling_params_iftrue, 
+                                 set_seed_iftrue, 
+                                 wait_for_return_value)
+
+from ...tests.flags import SET_SEED, SMALL_SAMPLES
+from ..sqrt_lasso import (solve_sqrt_lasso, 
+                                      choose_lambda,
+                                      goodness_of_fit,
+                                      sqlasso_objective,
+                                      sqlasso_objective_skinny,
+                                      solve_sqrt_lasso_fat,
+                                      solve_sqrt_lasso_skinny)
+from ..lasso import lasso
 
-from selection.tests.flags import SET_SEED, SMALL_SAMPLES
-from selection.algorithms.sqrt_lasso import (solve_sqrt_lasso, 
-                                             choose_lambda,
-                                             goodness_of_fit,
-                                             sqlasso_objective,
-                                             sqlasso_objective_skinny,
-                                             solve_sqrt_lasso_fat,
-                                             solve_sqrt_lasso_skinny)
-
-
-from selection.algorithms.lasso import lasso
-
-@register_report(['pvalue', 'active'])
 @wait_for_return_value()
 @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10, burnin=10, ndraw=10)
 @dec.slow
@@ -76,15 +71,5 @@ def test_skinny_fat():
 
     np.testing.assert_allclose(soln1, soln2, rtol=1.e-3)
 
-def report(niter=50, **kwargs):
-
-    _report = goodness_of_fit_report = reports.reports['test_goodness_of_fit']
-    runs = reports.collect_multiple_runs(_report['test'],
-                                         _report['columns'],
-                                         niter,
-                                         reports.summarize_all,
-                                         **kwargs)
-    fig = reports.pvalue_plot(runs)
-    fig.savefig('sqrtlasso_goodness_of_fit.pdf')
 
 
diff --git a/selection/randomized/tests/test_cv.py b/selection/randomized/tests/test_cv.py
index 97f740127..b43828c93 100644
--- a/selection/randomized/tests/test_cv.py
+++ b/selection/randomized/tests/test_cv.py
@@ -10,12 +10,10 @@
 from ...tests.instance import (gaussian_instance,
                                logistic_instance)
 
-import selection.tests.reports as reports
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.decorators import (wait_for_return_value, 
                                  set_seed_iftrue, 
-                                 set_sampling_params_iftrue, 
-                                 register_report)
+                                 set_sampling_params_iftrue)
 
 from ..query import naive_confidence_intervals, naive_pvalues
 from ..M_estimator import restricted_Mest
@@ -28,8 +26,6 @@
 else: 
     nboot = -1
 
-@register_report(['pvalue', 'cover', 'ci_length_clt', 'naive_pvalues', 'naive_cover', 'ci_length_naive',
-                  'active', 'BH_decisions', 'active_var'])
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
@@ -180,31 +176,3 @@ def test_cv(n=100, p=50, s=5, signal=7.5, K=5, rho=0.,
         BH_desicions = multipletests(pvalues, alpha=q, method="fdr_bh")[0]
         return sel_covered, sel_length, naive_pvals, naive_covered, naive_length, active_var, BH_desicions, active_var
 
-
-def report(niter=50, **kwargs):
-    np.random.seed(500)
-    intervals_report = reports.reports['test_cv']
-    runs = reports.collect_multiple_runs(intervals_report['test'],
-                                             intervals_report['columns'],
-                                             niter,
-                                             reports.summarize_all,
-                                             **kwargs)
-
-    pkl_label = ''.join([kwargs['loss'], "_", str(kwargs['condition_on_CVR']), "_", "test_cv.pkl"])
-    pdf_label = ''.join([kwargs['loss'], "_", str(kwargs['condition_on_CVR']), "_", "test_cv.pdf"])
-    runs.to_pickle(pkl_label)
-    runs_read = pd.read_pickle(pkl_label)
-
-    fig = reports.pivot_plot_plus_naive(runs_read)
-    fig.suptitle("CV pivots", fontsize=20)
-    fig.savefig(pdf_label)
-
-
-def main():
-    np.random.seed(500)
-    kwargs = {'n': 600, 'p': 20, 's': 0, 'signal': 3.5, 'K': 5, 'rho': 0.,
-              'randomizer': 'gaussian', 'randomizer_scale': 1.5,
-              'scale1': 0.1, 'scale2': 0.1,  'lam_frac': 1.,
-              'loss': 'logistic', 'intervals': 'old',
-              'bootstrap': False, 'condition_on_CVR': True, 'marginalize_subgrad':  True}
-    report(niter=1, **kwargs)
diff --git a/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py b/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
index 882173254..5d4bd0b1b 100644
--- a/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
+++ b/selection/randomized/tests/test_cv_corrected_nonrandomized_lasso.py
@@ -4,9 +4,8 @@
 import regreg.api as rr
 
 from ...tests.instance import (gaussian_instance, logistic_instance)
-import selection.tests.reports as reports
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
-from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue
 
 from ...algorithms.lasso import (glm_sandwich_estimator,
                                         lasso)
@@ -17,9 +16,6 @@
 from ..cv_view import CV_view, have_glmnet
 from .test_cv_lee_et_al import pivot, equal_tailed_interval
 
-@register_report(['pvalue', 'cover', 'ci_length_clt',
-                  'naive_pvalues', 'covered_naive', 'ci_length_naive',
-                  'active_var'])
 @set_seed_iftrue(SET_SEED)
 @wait_for_return_value()
 def test_cv_corrected_nonrandomized_lasso(n=300,
@@ -200,27 +196,3 @@ def coverage(LU):
                naive_pvalues, naive_covered, naive_length, active_var
 
 
-def report(niter=100, design="random", **kwargs):
-
-    if design == "fixed":
-        X, _, _, _, _ = gaussian_instance(**kwargs)
-        kwargs.update({'X': X})
-
-    intervals_report = reports.reports['test_cv_corrected_nonrandomized_lasso']
-    screened_results = reports.collect_multiple_runs(intervals_report['test'],
-                                             intervals_report['columns'],
-                                             niter,
-                                             reports.summarize_all,
-                                             **kwargs)
-    screened_results.to_pickle("cv_corrected_nonrandomized_lasso.pkl")
-    results = pd.read_pickle("cv_corrected_nonrandomized_lasso.pkl")
-
-    fig = reports.pvalue_plot(results, label = 'CV corrected')
-    fig.suptitle("CV corrected norandomized Lasso pivots", fontsize=20)
-    fig.savefig('cv_corrected_nonrandomized_lasso_pivots.pdf')
-
-
-def main():
-    np.random.seed(500)
-    kwargs = {'s': 0, 'n': 500, 'p': 100, 'signal': 3.5, 'sigma': 1, 'rho': 0., 'intervals':False}
-    report(niter=1, **kwargs)
diff --git a/selection/randomized/tests/test_cv_lee_et_al.py b/selection/randomized/tests/test_cv_lee_et_al.py
index 9fb7c0e61..fa65ffb58 100644
--- a/selection/randomized/tests/test_cv_lee_et_al.py
+++ b/selection/randomized/tests/test_cv_lee_et_al.py
@@ -8,12 +8,10 @@
 from ...tests.instance import gaussian_instance
 from ...algorithms.lasso import lasso
 
-import selection.tests.reports as reports
 from ...tests.flags import SET_SEED
 from ...tests.decorators import (wait_for_return_value, 
                                  set_seed_iftrue, 
-                                 set_sampling_params_iftrue, 
-                                 register_report)
+                                 set_sampling_params_iftrue)
 
 from ..cv_view import (CV_view, have_glmnet)
 from ..query import (naive_pvalues, naive_confidence_intervals)
@@ -51,9 +49,6 @@ def F(param):
     return np.array([L_conf, U_conf])
 
 
-@register_report(['pvalue', 'cover', 'ci_length_clt',
-                  'naive_pvalues', 'covered_naive', 'ci_length_naive',
-                  'active_var','BH_decisions'])
 @set_seed_iftrue(SET_SEED)
 @wait_for_return_value()
 def test_lee_et_al(n=300,
@@ -196,38 +191,5 @@ def coverage(LU):
                 naive_pvalues, naive_covered, naive_length, active_var, BH_desicions
 
 
-def report(niter=100, design="random", **kwargs):
-
-    if design=="fixed":
-        X, _, _, _, _ = gaussian_instance(**kwargs)
-        kwargs.update({'X':X})
-
-    intervals_report = reports.reports['test_lee_et_al']
-    screened_results = reports.collect_multiple_runs(intervals_report['test'],
-                                             intervals_report['columns'],
-                                             niter,
-                                             reports.summarize_all,
-                                             **kwargs)
-
-    screened_results.to_pickle("lee_et_al_pivots.pkl")
-    results = pd.read_pickle("lee_et_al_pivots.pkl")
-
-    #naive plus lee et al.
-    fig = reports.pivot_plot_plus_naive(results)
-    fig.suptitle("Lee et al. and naive p-values", fontsize=20)
-    fig.savefig('lee_et_al_pivots.pdf')
-
-    # naive only
-    fig1 = reports.naive_pvalue_plot(results)
-    fig1.suptitle("Naive p-values", fontsize=20)
-    fig1.savefig('naive_pvalues.pdf')
-
-
-def main():
-
-    np.random.seed(500)
-    kwargs = {'s': 0, 'n': 500, 'p': 100, 'signal': 3.5, 'sigma': 1, 'rho': 0., 'intervals':False,
-              'cross_validation': True, 'condition_on_CVR': False}
-    report(niter=100, **kwargs)
 
 
diff --git a/selection/randomized/tests/test_fixedX.py b/selection/randomized/tests/test_fixedX.py
index 941aa66c3..b6fcfebc8 100644
--- a/selection/randomized/tests/test_fixedX.py
+++ b/selection/randomized/tests/test_fixedX.py
@@ -5,8 +5,7 @@
 
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.instance import gaussian_instance
-from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
-import selection.tests.reports as reports
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue
 
 from ..api import randomization 
 from ..glm import (resid_bootstrap, 
@@ -14,7 +13,6 @@
                    fixedX_group_lasso)
 
 
-@register_report(['pvalue', 'cover', 'active'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @set_seed_iftrue(SET_SEED)
 @wait_for_return_value()
@@ -79,14 +77,3 @@ def test_fixedX(ndraw=10000, burnin=2000): # nsim needed for decorator
 
         return pvalues, covered, active_var
 
-def report(niter=50, **kwargs):
-
-    fixedX_report = reports.reports['test_fixedX']
-    runs = reports.collect_multiple_runs(fixedX_report['test'],
-                                         fixedX_report['columns'],
-                                         niter,
-                                         reports.summarize_all,
-                                         **kwargs)
-
-    fig = reports.pvalue_plot(runs)
-    fig.savefig('fixedX_pivots.pdf') # will have both bootstrap and CLT on plot
diff --git a/selection/randomized/tests/test_intervals.py b/selection/randomized/tests/test_intervals.py
index 60ffef313..b8725ff10 100644
--- a/selection/randomized/tests/test_intervals.py
+++ b/selection/randomized/tests/test_intervals.py
@@ -3,10 +3,9 @@
 
 import regreg.api as rr
 
-import selection.tests.reports as reports
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.instance import (gaussian_instance, logistic_instance)
-from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue
 
 from ..randomization import randomization
 
@@ -17,8 +16,6 @@
                    glm_parametric_covariance,
                    pairs_bootstrap_glm)
 
-@register_report(['pvalue', 'cover', 'ci_length_clt',
-                  'naive_pvalues', 'naive_cover', 'ci_length_naive', 'active'])
 @set_seed_iftrue(SET_SEED, seed=20)
 @set_sampling_params_iftrue(SMALL_SAMPLES, burnin=10, ndraw=10)
 @wait_for_return_value()
@@ -130,17 +127,3 @@ def test_intervals(s=0,
                 ci_length_naive, 
                 active_var)
 
-def report(niter=50, **kwargs):
-    kwargs = {'s': 0, 'n': 600, 'p': 100, 'signal': 7, 'bootstrap': False, 'randomizer':'gaussian',
-                    'loss':'gaussian', 'intervals':'old'}
-    intervals_report = reports.reports['test_intervals']
-    runs = reports.collect_multiple_runs(intervals_report['test'],
-                                             intervals_report['columns'],
-                                             niter,
-                                             reports.summarize_all,
-                                             **kwargs)
-    fig = reports.pivot_plot_plus_naive(runs)
-    fig.suptitle('Selective vs naive p-values after group Lasso')
-    fig.savefig('Group_lasso.pdf')
-
-
diff --git a/selection/randomized/tests/test_multiple_splits.py b/selection/randomized/tests/test_multiple_splits.py
index 71b0e82b8..480a01557 100644
--- a/selection/randomized/tests/test_multiple_splits.py
+++ b/selection/randomized/tests/test_multiple_splits.py
@@ -3,15 +3,13 @@
 
 import regreg.api as rr
 
-import selection.tests.reports as reports
-
 
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from selection.api import (randomization,
                            split_glm_group_lasso,
                            multiple_queries)
 from ...tests.instance import logistic_instance
-from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
+from ...tests.decorators import wait_for_return_value, set_sampling_params_iftrue
 
 from ..glm import (standard_split_ci,
                    glm_nonparametric_bootstrap,
@@ -20,10 +18,6 @@
 from ..M_estimator import restricted_Mest
 from ..query import naive_confidence_intervals
 
-@register_report(['pivots_clt', 'pivots_boot',
-                  'covered_clt', 'ci_length_clt',
-                  'covered_boot', 'ci_length_boot',
-                  'active', 'covered_naive'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @wait_for_return_value()
 def test_multiple_splits(s=3,
@@ -132,19 +126,3 @@ def coverage(LU):
             ci_length_naive)
 
 
-def report(niter=3, **kwargs):
-
-    kwargs = {'s': 0, 'n': 300, 'p': 20, 'signal': 7, 'split_frac': 0.5, 'nsplits':3}
-    split_report = reports.reports['test_multiple_splits']
-    screened_results = reports.collect_multiple_runs(split_report['test'],
-                                                     split_report['columns'],
-                                                     niter,
-                                                     reports.summarize_all,
-                                                     **kwargs)
-
-    fig = reports.boot_clt_plot(screened_results, inactive=True, active=False)
-    fig.savefig('multiple_splits.pdf') # will have both bootstrap and CLT on plot
-
-
-if __name__=='__main__':
-    report()
diff --git a/selection/randomized/tests/test_naive.py b/selection/randomized/tests/test_naive.py
index 31b7309c3..b81406d2b 100644
--- a/selection/randomized/tests/test_naive.py
+++ b/selection/randomized/tests/test_naive.py
@@ -8,9 +8,8 @@
 
 from ...tests.instance import gaussian_instance
 from ...algorithms.lasso import lasso
-import selection.tests.reports as reports
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
-from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue
 from ..cv_view import CV_view, have_glmnet
 from ..query import (naive_pvalues, naive_confidence_intervals)
 
@@ -29,7 +28,6 @@ def compute_projection_parameters(n, p, s, signal, rho, sigma, active):
     return proj_param
 
 
-@register_report(['naive_pvalues', 'covered_naive', 'ci_length_naive', 'active_var'])
 @set_seed_iftrue(SET_SEED)
 @wait_for_return_value()
 def test_naive(n=300,
@@ -145,31 +143,4 @@ def coverage(LU):
         return  naive_pvalues, naive_covered, naive_length, active_var
 
 
-def report(niter=50, design="random", **kwargs):
-
-    if design=="fixed":
-        X, _, _, _, _ = gaussian_instance(**kwargs)
-        kwargs.update({'X':X})
-
-    kwargs.update({'cross_validation':True, 'condition_on_CVR':False})
-    intervals_report = reports.reports['test_naive']
-    screened_results = reports.collect_multiple_runs(intervals_report['test'],
-                                             intervals_report['columns'],
-                                             niter,
-                                             reports.summarize_all,
-                                             **kwargs)
-
-    screened_results.to_pickle("naive.pkl")
-    results = pd.read_pickle("naive.pkl")
-
-    fig = reports.naive_pvalue_plot(results)
-    #fig = reports.pvalue_plot(results, label="Naive p-values")
-    fig.suptitle("Naive p-values", fontsize=20)
-    fig.savefig('naive_pvalues.pdf')
-
-def main():
-    np.random.seed(500)
-    kwargs = {'s': 0, 'n': 100, 'p': 50, 'signal': 3.5, 'sigma': 1, 'rho': 0., 'intervals':True}
-    report(niter=100, **kwargs)
-
 
diff --git a/selection/randomized/tests/test_split.py b/selection/randomized/tests/test_split.py
index 49bbdb77e..71732f970 100644
--- a/selection/randomized/tests/test_split.py
+++ b/selection/randomized/tests/test_split.py
@@ -3,8 +3,7 @@
 
 import regreg.api as rr
 
-from ...tests.decorators import wait_for_return_value, register_report, set_sampling_params_iftrue
-import selection.tests.reports as reports
+from ...tests.decorators import wait_for_return_value, set_sampling_params_iftrue
 from ...tests.flags import SMALL_SAMPLES
 from ...tests.instance import logistic_instance
 
@@ -14,7 +13,6 @@
                    pairs_bootstrap_glm)
 from ..M_estimator import restricted_Mest
 
-@register_report(['pvalue', 'cover', 'active'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @wait_for_return_value()
 def test_split(s=3,
@@ -94,22 +92,3 @@ def test_split(s=3,
 
         return pvalues, covered, active_var
 
-def report(niter=50, **kwargs):
-
-    split_report = reports.reports['test_split']
-    CLT_runs = reports.collect_multiple_runs(split_report['test'],
-                                             split_report['columns'],
-                                             niter,
-                                             reports.summarize_all,
-                                             **kwargs)
-    kwargs['bootstrap'] = False
-    fig = reports.pivot_plot(CLT_runs, color='b', label='CLT')
-
-    kwargs['bootstrap'] = True
-    bootstrap_runs = reports.collect_multiple_runs(split_report['test'],
-                                                   split_report['columns'],
-                                                   niter,
-                                                   reports.summarize_all,
-                                                   **kwargs)
-    fig = reports.pivot_plot(bootstrap_runs, color='g', label='Bootstrap', fig=fig)
-    fig.savefig('split_pivots.pdf') # will have both bootstrap and CLT on plot
diff --git a/selection/randomized/tests/test_split_compare.py b/selection/randomized/tests/test_split_compare.py
index 2031da1a1..a1297dfd0 100644
--- a/selection/randomized/tests/test_split_compare.py
+++ b/selection/randomized/tests/test_split_compare.py
@@ -3,15 +3,12 @@
 
 import regreg.api as rr
 
-import selection.tests.reports as reports
-
 from ...tests.flags import SMALL_SAMPLES
 from selection.api import (randomization, 
                            split_glm_group_lasso)
 
 from ...tests.instance import logistic_instance
 from ...tests.decorators import (wait_for_return_value, 
-                                 register_report, 
                                  set_sampling_params_iftrue)
 
 from ..glm import (standard_split_ci,
@@ -21,14 +18,6 @@
 from ..M_estimator import restricted_Mest
 from ..query import naive_confidence_intervals
 
-@register_report(['pivots_clt', 
-                  'covered_clt', 
-                  'ci_length_clt', 
-                  'covered_split', 
-                  'ci_length_split', 
-                  'active', 
-                  'covered_naive',
-                  'ci_length_naive'])
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 @wait_for_return_value()
 def test_split_compare(s=3,
@@ -138,19 +127,3 @@ def coverage(LU):
                 ci_length_naive)
 
 
-def report(niter=3, **kwargs):
-
-    kwargs = {'s': 0, 'n': 300, 'p': 20, 'signal': 7, 'split_frac': 0.8}
-    split_report = reports.reports['test_split_compare']
-    screened_results = reports.collect_multiple_runs(split_report['test'],
-                                                     split_report['columns'],
-                                                     niter,
-                                                     reports.summarize_all,
-                                                     **kwargs)
-
-    fig = reports.boot_clt_plot(screened_results, inactive=True, active=False)
-    fig.savefig('split_compare_pivots.pdf') # will have both bootstrap and CLT on plot
-
-
-if __name__=='__main__':
-    report()
diff --git a/selection/randomized/tests/test_sqrt_lasso.py b/selection/randomized/tests/test_sqrt_lasso.py
index 49da3e1d5..089a8696a 100644
--- a/selection/randomized/tests/test_sqrt_lasso.py
+++ b/selection/randomized/tests/test_sqrt_lasso.py
@@ -19,7 +19,7 @@
                    pairs_bootstrap_glm)
 
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
-from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue, register_report
+from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue
 
 def choose_lambda_with_randomization(X, randomization, quantile=0.90, ndraw=10000):
     X = rr.astransform(X)
diff --git a/selection/tests/decorators.py b/selection/tests/decorators.py
index b25401007..d29e98f25 100644
--- a/selection/tests/decorators.py
+++ b/selection/tests/decorators.py
@@ -5,7 +5,6 @@
 import nose
 import nose.tools
 
-from .reports import reports
 
 def set_seed_iftrue(condition, seed=10):
     """
@@ -147,24 +146,3 @@ def _new_test(*args, **kwargs):
 
     return wait_for_decorator
 
-def register_report(columns):
-    """
-    Register a report in selection.tests.reports
-    that can be used to create simulation results
-    """
-
-    def register_decorator(test):
-
-        @wraps(test)
-        def _new_test(*args, **kwargs):
-            return test(*args, **kwargs)
-        if hasattr(test, 'func_name'): # Py2.*
-            name = test.func_name
-        else:
-            name = test.__name__       # Py3.*
-        if name in reports:
-            print('Overwriting existing report %s' % name)
-        reports[name] = {'test':_new_test, 'columns':columns}
-        return nose.tools.make_decorator(test)(_new_test)
-
-    return register_decorator
diff --git a/selection/tests/reports.py b/selection/tests/reports.py
deleted file mode 100644
index 5b7d047bc..000000000
--- a/selection/tests/reports.py
+++ /dev/null
@@ -1,521 +0,0 @@
-"""
-special column names:
-mle -- pivot at unpenalized MLE
-truth -- pivot at true parameter
-pvalue -- tests of H0 for each variable
-count -- how many runs (including last one) until success
-active -- was variable truly active
-naive_pvalue --
-cover -- 
-naive_cover --
-"""
-from __future__ import division
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy.stats import probplot, uniform
-import statsmodels.api as sm
-
-def collect_multiple_runs(test_fn, columns, nrun, summary_fn, *args, **kwargs):
-    """
-    Assumes a wait_for_return_value test...
-    """
-
-    dfs = [] 
-    for i in range(nrun):
-        print(i)
-        count, result = test_fn(*args, **kwargs)
-
-        #print(result)
-        #print(len(np.atleast_1d(result[0])))
-        if hasattr(result, "__len__"):
-            df_i = pd.DataFrame(index=np.arange(len(np.atleast_1d(result[0]))),
-                                columns=columns + ['count', 'run'])
-        else:
-            df_i = pd.DataFrame(index=np.arange(1),
-                                columns=columns + ['count', 'run'])
-
-        df_i = pd.DataFrame(index=np.arange(len(np.atleast_1d(result[0]))), 
-                            columns=columns + ['count', 'run'])
-
-        df_i.loc[:,'count'] = count
-        df_i.loc[:,'run'] = i
-
-        for col, v in zip(columns, result):
-            df_i.loc[:,col] = np.atleast_1d(v)
-
-        df_i['func'] = [str(test_fn)] * len(df_i)
-        dfs.append(df_i)
-        if summary_fn is not None:
-            summary_fn(pd.concat(dfs))
-    return pd.concat(dfs)
-
-def pvalue_plot(multiple_results, screening=False, fig=None, label = '$H_0$', colors=['b','r']):
-    """
-    Extract pvalues and group by 
-    null and alternative.
-    """
-    P0 = multiple_results['pvalue'][~multiple_results['active_var']]
-    P0 = P0[~pd.isnull(P0)]
-    PA = multiple_results['pvalue'][multiple_results['active_var']]
-    PA = PA[~pd.isnull(PA)]
-
-    if fig is None:
-        fig = plt.figure()
-    ax = fig.gca()
-
-    fig.suptitle('Null and alternative p-values')
-
-    grid = np.linspace(0, 1, 51)
-
-    if len(P0) > 0:
-        ecdf0 = sm.distributions.ECDF(P0)
-        F0 = ecdf0(grid)
-        ax.plot(grid, F0, '--o', c=colors[0], lw=2, label=label)
-    if len(PA) > 0:
-        ecdfA = sm.distributions.ECDF(PA)
-        FA = ecdfA(grid)
-        ax.plot(grid, FA, '--o', c=colors[1], lw=2, label=r'$H_A$')
-
-    ax.plot([0, 1], [0, 1], 'k-', lw=1)
-    ax.set_xlabel("observed p-value", fontsize=18)
-    ax.set_ylabel("empirical CDF", fontsize=18)
-    ax.legend(loc='lower right', fontsize=18)
-
-    if screening:
-        screen = 1. / np.mean(multiple_results.loc[multiple_results.index == 0,'count'])
-        ax.set_title('Screening: %0.2f' % screen)
-    return fig
-
-def naive_pvalue_plot(multiple_results, screening=False, fig=None, colors=['r', 'g']):
-    """
-    Extract naive pvalues and group by
-    null and alternative.
-    """
-
-    P0 = multiple_results['naive_pvalues'][~multiple_results['active_var']]
-    P0 = P0[~pd.isnull(P0)]
-    PA = multiple_results['naive_pvalues'][multiple_results['active_var']]
-    PA = PA[~pd.isnull(PA)]
-
-    if fig is None:
-        fig = plt.figure()
-    ax = fig.gca()
-
-    fig.suptitle('Null and alternative p-values')
-
-    grid = np.linspace(0, 1, 51)
-
-    if len(P0) > 0:
-        ecdf0 = sm.distributions.ECDF(P0)
-        F0 = ecdf0(grid)
-        ax.plot(grid, F0, '--o', c=colors[0], lw=2, label=r'Naive p-values')
-    if len(PA) > 0:
-        ecdfA = sm.distributions.ECDF(PA)
-        FA = ecdfA(grid)
-        ax.plot(grid, FA, '--o', c=colors[1], lw=2, label=r'$H_A$ naive')
-
-    ax.plot([0, 1], [0, 1], 'k-', lw=2)
-
-    ax.set_xlabel("Observed p-pvalue", fontsize=18)
-    ax.set_ylabel("Empirical CDF", fontsize=18)
-    ax.legend(loc='lower right', fontsize=18)
-
-    if screening:
-        screen = 1. / np.mean(multiple_results.loc[multiple_results.index == 0,'count'])
-        ax.set_title('Screening: %0.2f' % screen)
-
-    return fig
-
-def split_pvalue_plot(multiple_results, screening=False, fig=None):
-    """
-    Compare pvalues where we have a split_pvalue
-    """
-
-    have_split = ~pd.isnull(multiple_results['split_pvalue'])
-    multiple_results = multiple_results.loc[have_split]
-
-    P0_s = multiple_results['split_pvalue'][~multiple_results['active']]
-    PA_s = multiple_results['split_pvalue'][multiple_results['active']]
-
-    # presumes we also have a pvalue
-    P0 = multiple_results['pvalue'][~multiple_results['active']]
-    PA = multiple_results['pvalue'][multiple_results['active']]
-
-    if fig is None:
-        fig = plt.figure()
-    ax = fig.gca()
-
-    fig.suptitle('Null and alternative p-values')
-
-    grid = np.linspace(0, 1, 51)
-
-    if len(P0) > 0:
-        ecdf0 = sm.distributions.ECDF(P0)
-        F0 = ecdf0(grid)
-        ax.plot(grid, F0, '--o', c='r', lw=2, label=r'$H_0$')
-    if len(PA) > 0:
-        ecdfA = sm.distributions.ECDF(PA)
-        FA = ecdfA(grid)
-        ax.plot(grid, FA, '--o', c='g', lw=2, label=r'$H_A$')
-
-    if len(P0_s) > 0:
-        ecdf0 = sm.distributions.ECDF(P0_s)
-        F0 = ecdf0(grid)
-        ax.plot(grid, F0, '-+', c='r', lw=2, label=r'$H_0$ split')
-    if len(PA) > 0:
-        ecdfA = sm.distributions.ECDF(PA_s)
-        FA = ecdfA(grid)
-        ax.plot(grid, FA, '-+', c='g', lw=2, label=r'$H_A$ split')
-
-    ax.plot([0, 1], [0, 1], 'k-', lw=2)
-    ax.legend(loc='lower right')
-
-    if screening:
-        screen = 1. / np.mean(multiple_results.loc[multiple_results.index == 0,'count'])
-        ax.set_title('Screening: %0.2f' % screen)
-
-def pivot_plot_simple(multiple_results, coverage=True, color='b', label=None, fig=None):
-    """
-    Extract pivots at truth and mle.
-    """
-
-    if fig is None:
-        fig, _ = plt.subplots(nrows=1, ncols=2)
-        plot_pivots, _ = fig.axes
-        plot_pivots.set_title("CLT Pivots")
-    else:
-        _, plot_pivots = fig.axes
-        plot_pivots.set_title("Bootstrap Pivots")
-
-    if 'pivot' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['pivot'])
-    elif 'truth' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['truth'])
-
-    G = np.linspace(0, 1)
-    F_pivot = ecdf(G)
-    #print(color)
-    plot_pivots.plot(G, F_pivot, '-o', c=color, lw=2, label=label)
-    plot_pivots.plot([0, 1], [0, 1], 'k-', lw=2)
-    plot_pivots.set_xlim([0, 1])
-    plot_pivots.set_ylim([0, 1])
-
-    return fig
-
-
-def pivot_plot_2in1(multiple_results, coverage=True, color='b', label=None, fig=None):
-    """
-    Extract pivots at truth and mle.
-    """
-
-    if fig is None:
-        fig = plt.figure()
-    ax = fig.gca()
-
-    fig.suptitle('Plugin CLT and bootstrap pivots')
-
-    if 'pivot' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['pivot'])
-    elif 'truth' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['truth'])
-    elif 'pvalue' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['pvalue'])
-
-    G = np.linspace(0, 1)
-    F_pivot = ecdf(G)
-    #print(color)
-    ax.plot(G, F_pivot, '-o', c=color, lw=2, label=label)
-    ax.plot([0, 1], [0, 1], 'k-', lw=2)
-    ax.set_xlim([0, 1])
-    ax.set_ylim([0, 1])
-    ax.legend(loc='lower right')
-
-    return fig
-
-def pivot_plot_2in1(multiple_results, coverage=True, color='b', label=None, fig=None):
-    """
-    Extract pivots at truth and mle.
-    """
-
-    if fig is None:
-        fig = plt.figure()
-    ax = fig.gca()
-
-    fig.suptitle('Plugin CLT and bootstrap pivots')
-
-    if 'pivot' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['pivot'])
-    elif 'truth' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['truth'])
-    elif 'pvalue' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['pvalue'])
-
-    G = np.linspace(0, 1)
-    F_pivot = ecdf(G)
-    #print(color)
-    ax.plot(G, F_pivot, '-o', c=color, lw=2, label=label)
-    ax.plot([0, 1], [0, 1], 'k-', lw=2)
-    ax.set_xlim([0, 1])
-    ax.set_ylim([0, 1])
-    ax.legend(loc='lower right')
-
-    return fig
-
-
-def pivot_plot_plus_naive(multiple_results, coverage=True, color='b', label=None, fig=None):
-    """
-    Extract pivots at truth and mle.
-    """
-
-    if fig is None:
-        fig = plt.figure()
-    ax = fig.gca()
-
-    fig.suptitle('Lee et al. and naive p-values')
-
-    if 'pivot' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['pivot'])
-    elif 'truth' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['truth'])
-    elif 'pvalue' in multiple_results.columns:
-        ecdf = sm.distributions.ECDF(multiple_results['pvalue'])
-
-    G = np.linspace(0, 1)
-    F_pivot = ecdf(G)
-    #print(color)
-
-    ax.plot(G, F_pivot, '-o', c=color, lw=2, label="Lee et al. p-values")
-    ax.plot([0, 1], [0, 1], 'k-', lw=2)
-
-    if 'naive_pvalues' in multiple_results.columns:
-        ecdf_naive = sm.distributions.ECDF(multiple_results['naive_pvalues'])
-    F_naive = ecdf_naive(G)
-
-    ax.plot(G, F_naive, '-o', c='r', lw=2, label="Naive p-values")
-    ax.plot([0, 1], [0, 1], 'k-', lw=2)
-
-    ax.set_xlim([0, 1])
-    ax.set_ylim([0, 1])
-
-    ax.set_xlabel("Observed value", fontsize=18)
-    ax.set_ylabel("Empirical CDF", fontsize=18)
-    ax.legend(loc='lower right', fontsize=18)
-
-    return fig
-
-
-def pivot_plot(multiple_results, coverage=True, color='b', label=None, fig=None):
-    """
-    Extract pivots at truth and mle.
-    """
-
-    if fig is None:
-        fig, _ = plt.subplots(nrows=1, ncols=2)
-    plot_pvalues_mle, plot_pvalues_truth = fig.axes
-
-    ecdf_mle = sm.distributions.ECDF(multiple_results['mle'])
-    G = np.linspace(0, 1)
-    F_MLE = ecdf_mle(G)
-    print(color)
-    plot_pvalues_mle.plot(G, F_MLE, '-o', c=color, lw=2, label=label)
-    plot_pvalues_mle.plot([0, 1], [0, 1], 'k-', lw=2)
-    plot_pvalues_mle.set_title("Pivots at the unpenalized MLE")
-    plot_pvalues_mle.set_xlim([0, 1])
-    plot_pvalues_mle.set_ylim([0, 1])
-    plot_pvalues_mle.legend(loc='lower right')
-
-    ecdf_truth = sm.distributions.ECDF(multiple_results['truth'])
-    F_true = ecdf_truth(G)
-    plot_pvalues_truth.plot(G, F_true, '-o', c=color, lw=2, label=label)
-    plot_pvalues_truth.plot([0, 1], [0, 1], 'k-', lw=2)
-    plot_pvalues_truth.set_title("Pivots at the truth (by tilting)")
-    plot_pvalues_truth.set_xlim([0, 1])
-    plot_pvalues_truth.set_ylim([0, 1])
-    plot_pvalues_truth.legend(loc='lower right')
-
-    if coverage:
-        if 'naive_cover' in multiple_results.columns:
-            fig.suptitle('Coverage: %0.2f, Naive: %0.2f' % (np.mean(multiple_results['cover']), 
-                                                            np.mean(multiple_results['naive_cover'])))
-        else:
-            fig.suptitle('Coverage: %0.2f' % np.mean(multiple_results['cover'])) 
-
-    return fig
-
-def boot_clt_plot(multiple_results, coverage=True, label=None, fig=None, active=True, inactive=True):
-    """
-    Extract pivots at truth and mle.
-    """
-
-    test = np.zeros_like(multiple_results['active'])
-    if active:
-        test += multiple_results['active']
-    if inactive:
-        test += ~multiple_results['active']
-    multiple_results = multiple_results[test]
-    print(test.sum(), test.shape)
-
-    if fig is None:
-        fig = plt.figure()
-    ax = fig.gca()
-
-    ecdf_clt = sm.distributions.ECDF(multiple_results['pivots_clt'])
-    G = np.linspace(0, 1)
-    F_MLE = ecdf_clt(G)
-    ax.plot(G, F_MLE, '-o', c='b', lw=2, label='CLT')
-    ax.plot([0, 1], [0, 1], 'k-', lw=2)
-    ax.set_xlim([0, 1])
-    ax.set_ylim([0, 1])
-
-    ecdf_boot = sm.distributions.ECDF(multiple_results['pivots_boot'])
-    F_true = ecdf_boot(G)
-    ax.plot(G, F_true, '-o', c='g', lw=2, label='Bootstrap')
-    ax.plot([0, 1], [0, 1], 'k-', lw=2)
-    ax.set_xlim([0, 1])
-    ax.set_ylim([0, 1])
-    ax.legend(loc='lower right')
-    #plot_pvalues_boot.legend(loc='lower right')
-
-    if coverage:
-        if 'covered_split' in multiple_results.columns:
-            fig.suptitle('CLT Coverage: %0.2f, Boot: %0.2f, Naive: %0.2f, Split: %0.2f' % (np.mean(multiple_results['covered_clt']),
-                            np.mean(multiple_results['covered_boot']), np.mean(multiple_results['covered_naive']),
-                                                                      np.mean(multiple_results['covered_split'])))
-        else:
-
-            fig.suptitle('CLT Coverage: %0.2f, Boot: %0.2f, Naive: %0.2f' % (np.mean(multiple_results['covered_clt']),
-                                                                             np.mean(multiple_results['covered_boot']),
-                                                                             np.mean(multiple_results['covered_naive'])))
-    return fig
-
-def compute_pivots(multiple_results):
-    if 'truth' in multiple_results.columns:
-        pivots = multiple_results['truth']
-        return {'pivot (mean, SD, type I):': (np.mean(pivots), np.std(pivots), np.mean(pivots < 0.05))}
-
-    if 'truth' in multiple_results.columns:
-        pivots = multiple_results['truth']
-        return {'pivot (mean, SD, type I):': (np.mean(pivots), np.std(pivots), np.mean(pivots < 0.05))}
-    if 'pvalue' in multiple_results.columns:
-        pivots = multiple_results['pvalue']
-        return {'selective pvalues (mean, SD, type I):': (np.mean(pivots), np.std(pivots), np.mean(pivots < 0.05))}
-    return {}
-
-def compute_naive_pivots(multiple_results):
-    if 'naive_pvalues' in multiple_results.columns:
-        pivots = multiple_results['naive_pvalues']
-        return {'naive pvalues (mean, SD, type I):': (np.mean(pivots), np.std(pivots), np.mean(pivots < 0.05))}
-    return {}
-
-def boot_clt_pivots(multiple_results):
-    pivot_summary = {}
-    if 'pivots_clt' in multiple_results.columns:
-        pivots_clt = multiple_results['pivots_clt']
-        pivot_summary['pivots_clt'] = {'CLT pivots (mean, SD, type I):': (np.mean(pivots_clt), np.std(pivots_clt), np.mean(pivots_clt < 0.05))}
-    if 'pivots_boot' in multiple_results.columns:
-        pivots_boot = multiple_results['pivots_boot']
-        pivot_summary['pivots_boot'] = {'Bootstrap pivots (mean, SD, type I):': (np.mean(pivots_boot), np.std(pivots_boot), np.mean(pivots_boot < 0.05))}
-    if 'pivot' in multiple_results.columns:
-        pivots = multiple_results['pivot']
-        pivot_summary['pivots'] = {'pivots (mean, SD, type I):': (np.mean(pivots), np.std(pivots), np.mean(pivots < 0.05))}
-
-    if 'naive_pvalues' in multiple_results.columns:
-        naive_pvalues = multiple_results['naive_pvalues']
-        pivot_summary['naive_pvalues'] = {'pivots (mean, SD, type I):': (np.mean(naive_pvalues), np.std(naive_pvalues), np.mean(naive_pvalues < 0.05))}
-
-
-    return pivot_summary
-
-def compute_coverage(multiple_results):
-    result = {}
-    if 'naive_cover' in multiple_results.columns:
-        result['naive coverage'] = np.mean(multiple_results['naive_cover'])
-    if 'cover' in multiple_results.columns:
-        result['selective coverage'] = np.mean(multiple_results['cover'])
-    return result
-
-def boot_clt_coverage(multiple_results): #
-    result = {}
-    if 'covered_naive' in multiple_results.columns:
-        result['naive coverage'] = np.mean(multiple_results['covered_naive'])
-    if 'covered_boot' in multiple_results.columns:
-        result['boot coverage'] = np.mean(multiple_results['covered_boot'])
-    if 'covered_clt' in multiple_results.columns:
-        result['clt coverage'] = np.mean(multiple_results['covered_clt'])
-    if 'covered_split' in multiple_results.columns:
-        result['split coverage'] = np.mean(multiple_results['covered_split'])
-    return result
-
-
-def compute_lengths(multiple_results):
-    result = {}
-    if 'ci_length_clt' in multiple_results.columns:
-        result['ci_length_clt'] = np.mean(multiple_results['ci_length_clt'])
-    if 'ci_length_boot' in multiple_results.columns:
-        result['ci_length_boot'] = np.mean(multiple_results['ci_length_boot'])
-    if 'ci_length_split' in multiple_results.columns:
-        result['ci_length_split'] = np.mean(multiple_results['ci_length_split'])
-    if 'ci_length_naive' in multiple_results.columns:
-        result['ci_length_naive'] = np.mean(multiple_results['ci_length_naive'])
-
-    if 'ci_length' in multiple_results.columns:
-        result['ci_length'] = np.mean(multiple_results['ci_length'])
-    return result
-
-def compute_length_frac(multiple_results):
-    result = {}
-    if 'ci_length_clt' and 'ci_length_split' in multiple_results.columns:
-        split = multiple_results['ci_length_split']
-        clt = multiple_results['ci_length_clt']
-        split = split[~np.isnan(clt)]
-        clt = clt[~np.isnan(clt)]
-        result['split/clt'] = np.median(np.divide(split, clt))
-    if 'ci_length_boot' and 'ci_length_split' in multiple_results.columns:
-        split = multiple_results['ci_length_split']
-        boot = multiple_results['ci_length_boot']
-        split = split[~np.isnan(boot)]
-        boot = clt[~np.isnan(boot)]
-        result['split/boot'] = np.median(np.divide(split, boot))
-    return result
-
-def compute_FDP(multiple_results):
-    result = {}
-    if ('BH_decisions' in multiple_results.columns) and ('active_var' in multiple_results.columns):
-        BH_decisions = multiple_results['BH_decisions']
-        active_var = multiple_results['active_var']
-        BH_TP = BH_decisions[active_var].sum()
-        FDP = (BH_decisions.sum()-BH_TP)/(1.*max(BH_decisions.sum(),1))
-        result['FDP'] = FDP
-    return result
-
-
-def compute_power(multiple_results):
-    result = {}
-    if ('BH_decisions' in multiple_results.columns) and ('active_var' in multiple_results.columns):
-        BH_decisions = multiple_results['BH_decisions']
-        active_var = multiple_results['active_var']
-        BH_TP = BH_decisions[active_var].sum()
-        power = BH_TP
-        result['power'] = power
-    return result
-
-def compute_screening(multiple_results):
-    return {'screening:': 1. / np.mean(multiple_results.loc[multiple_results.index == 0,'count'])}
-
-def summarize_all(multiple_results):
-    result = {}
-    result.update(boot_clt_pivots(multiple_results))
-    result.update(compute_pivots(multiple_results))
-    result.update(boot_clt_coverage(multiple_results))
-    result.update(compute_coverage(multiple_results))
-    result.update(compute_screening(multiple_results))
-    result.update(compute_lengths(multiple_results))
-    result.update(compute_length_frac(multiple_results))
-    result.update(compute_FDP(multiple_results))
-    result.update(compute_power(multiple_results))
-    result.update(compute_naive_pivots(multiple_results))
-    for i in result:
-        print(i, result[i])
-
-reports = {}

From 5cdf084e1340a8aea9606e177280c74803c7066a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 6 Apr 2018 22:16:41 -0700
Subject: [PATCH 553/617] updated test for debised target

---
 .../adjusted_MLE/tests/test_risk_coverage.py  |  2 +-
 .../tests/test_selective_MLE_high.py          | 21 +++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 9b8561065..807c7e07c 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -491,7 +491,7 @@ def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_t
         coverage_unad += output[7]
 
         length_sel += output[8]
-       # length_Lee += output[10]
+        #length_Lee += output[10]
         length_unad += output[9]
 
         power_sel += output[10]
diff --git a/selection/randomized/tests/test_selective_MLE_high.py b/selection/randomized/tests/test_selective_MLE_high.py
index 233875ec8..83ef81b49 100644
--- a/selection/randomized/tests/test_selective_MLE_high.py
+++ b/selection/randomized/tests/test_selective_MLE_high.py
@@ -42,7 +42,7 @@ def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, rand
     if full_dispersion:
         dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y)))**2 / (n - p)
 
-    estimate, _, _, pval, intervals = conv.selective_MLE(target="full", dispersion=dispersion)
+    estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="full", dispersion=dispersion)
 
     coverage = (beta[nonzero] > intervals[:,0]) * (beta[nonzero] < intervals[:,1])
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage
@@ -80,32 +80,35 @@ def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4,
     if full_dispersion:
         dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y)))**2 / (n - p)
 
-    estimate, _, _, pval, intervals = conv.selective_MLE(target="selected", dispersion=dispersion)
+    estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=dispersion)
 
     beta_target = np.linalg.pinv(X[:,nonzero]).dot(X.dot(beta))
 
     coverage = (beta_target > intervals[:,0]) * (beta_target < intervals[:,1])
     return pval[beta_target == 0], pval[beta_target != 0], coverage
 
-print(test_selected_targets())
-
-def main(nsim=500, full=True, full_dispersion=False):
+def main(nsim=500, full=True):
 
     P0, PA, cover = [], [], []
     from statsmodels.distributions import ECDF
 
-    n, p, s = 500, 200, 20
+    n, p, s = 200, 500, 10
 
     for i in range(nsim):
         if full:
+            if n>p:
+                full_dispersion = True
+            else:
+                full_dispersion = False
             p0, pA, cover_ = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
         else:
+            full_dispersion = True
             p0, pA, cover_ = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
 
         cover.extend(cover_)
         P0.extend(p0)
         PA.extend(pA)
-        print(np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.05), np.mean(np.array(PA) < 0.05), np.mean(cover), 'null pvalue + power')
+        print(np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover), 'null pvalue + power')
     
         if i % 3 == 0 and i > 0:
             U = np.linspace(0, 1, 101)
@@ -115,8 +118,8 @@ def main(nsim=500, full=True, full_dispersion=False):
             if len(PA) > 0:
                 plt.plot(U, ECDF(PA)(U), 'r')
             plt.plot([0, 1], [0, 1], 'k--')
-            plt.savefig("plot.pdf")
+            plt.savefig("/Users/snigdhapanigrahi/Desktop/plot.pdf")
     plt.show()
 
-#main()
+main()
 

From b2db3b742dc7541f929d3a73ca17b13205f33206 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 6 Apr 2018 22:45:37 -0700
Subject: [PATCH 554/617] debiased lasso inference based on selective MLE
 tested

---
 selection/randomized/tests/test_selective_MLE_high.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/tests/test_selective_MLE_high.py b/selection/randomized/tests/test_selective_MLE_high.py
index 83ef81b49..6491e063f 100644
--- a/selection/randomized/tests/test_selective_MLE_high.py
+++ b/selection/randomized/tests/test_selective_MLE_high.py
@@ -9,7 +9,7 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, randomizer_scale=1, full_dispersion=True):
+def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, randomizer_scale=0.25, full_dispersion=True):
     """
     Compare to R randomized lasso
     """
@@ -92,7 +92,7 @@ def main(nsim=500, full=True):
     P0, PA, cover = [], [], []
     from statsmodels.distributions import ECDF
 
-    n, p, s = 200, 500, 10
+    n, p, s = 200, 1000, 20
 
     for i in range(nsim):
         if full:

From 6a54fccdce75f3d696f49057f20bc2721a589c34 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 7 Apr 2018 11:46:30 -0700
Subject: [PATCH 555/617] modelX to modelQ

---
 selection/algorithms/lasso.py                 | 15 +++++++++------
 selection/algorithms/tests/test_lasso_full.py |  6 +++---
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/selection/algorithms/lasso.py b/selection/algorithms/lasso.py
index 8fd5905fd..3ad41ea4e 100644
--- a/selection/algorithms/lasso.py
+++ b/selection/algorithms/lasso.py
@@ -2342,18 +2342,19 @@ def poisson(X,
         loglike = glm.poisson(X, counts, quadratic=quadratic)
         return lasso_full(loglike, feature_weights)
 
-class lasso_full_modelX(lasso):
+class lasso_full_modelQ(lasso):
 
     r"""
-    A class for the LASSO for post-selection inference.
+    A class for the LASSO for post-selection inference
+    in which 
     The problem solved is
 
     .. math::
 
-        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 + 
-            \lambda \|\beta\|_1
+        \text{minimize}_{\beta} -(X\beta)^Ty + \frac{1}{2} \beta^TQ\beta + 
+            \sum_i \lambda_i |\beta_i|
 
-    where $\lambda$ is `lam`.
+    where $\lambda$ is `feature_weights`.
 
     Notes
     -----
@@ -2383,7 +2384,9 @@ def __init__(self,
 
         Q : np.ndarray((p,p))
 
-        sufficient_stat : np.ndarray(p)
+        X : np.ndarray((n, p))
+
+        y : np.ndarray(n)
 
         feature_weights : np.ndarray
             Feature weights for L-1 penalty. If a float,
diff --git a/selection/algorithms/tests/test_lasso_full.py b/selection/algorithms/tests/test_lasso_full.py
index 344723dab..713a043ae 100644
--- a/selection/algorithms/tests/test_lasso_full.py
+++ b/selection/algorithms/tests/test_lasso_full.py
@@ -5,7 +5,7 @@
 from ...tests.instance import gaussian_instance
 
 from ..lasso import (lasso_full,
-                     lasso_full_modelX,
+                     lasso_full_modelQ,
                      _truncation_interval,
                      _solve_restricted_problem)
 
@@ -77,7 +77,7 @@ def test_smaller():
         np.testing.assert_allclose(l, lower)
         np.testing.assert_allclose(u, upper)
 
-def test_modelX():
+def test_modelQ():
 
     n, p, s = 200, 50, 4
     X, y, beta = gaussian_instance(n=n,
@@ -91,7 +91,7 @@ def test_modelX():
     LF.fit()
     S = LF.summary(dispersion=1)
 
-    LX = lasso_full_modelX(X.T.dot(X), X, y, lagrange)
+    LX = lasso_full_modelQ(X.T.dot(X), X, y, lagrange)
     LX.fit()
     SX = LX.summary(dispersion=1)
 

From e0000f500a9702dfb460b801ffed521c13119ca8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 7 Apr 2018 12:17:47 -0700
Subject: [PATCH 556/617] update to R software

---
 R-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-software b/R-software
index e2ebc9928..8a2a30a5f 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit e2ebc9928021f479f274bc74596d70e6b7531f6c
+Subproject commit 8a2a30a5f14b080e6dea476cfb0dc21d6316afdb

From 01f6e39b13a01da0a858756c03988b3566dfd32b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 7 Apr 2018 14:21:38 -0700
Subject: [PATCH 557/617] NF: for Gaussian allowing a modelled Q instead of
 X.T.dot(X)

---
 selection/randomized/modelQ.py            | 384 ++++++++++++++++++++++
 selection/randomized/tests/test_modelQ.py |  39 +++
 2 files changed, 423 insertions(+)
 create mode 100644 selection/randomized/modelQ.py
 create mode 100644 selection/randomized/tests/test_modelQ.py

diff --git a/selection/randomized/modelQ.py b/selection/randomized/modelQ.py
new file mode 100644
index 000000000..9618c0be1
--- /dev/null
+++ b/selection/randomized/modelQ.py
@@ -0,0 +1,384 @@
+import functools
+
+import numpy as np
+import regreg.api as rr
+from ..constraints.affine import constraints
+
+from .query import affine_gaussian_sampler
+from .lasso import highdim
+from .randomization import randomization
+
+class modelQ(object):
+
+    r"""
+    A class for the randomized LASSO for post-selection inference.
+    The problem solved is
+
+    .. math::
+
+        \text{minimize}_{\beta} -X^Ty + \frac{1}{2} \beta^TQ\beta + 
+            \sum_{i=1}^p \lambda_i |\beta_i\| - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
+
+    where $\lambda$ is `lam`, $\omega$ is a randomization generated below
+    and the last term is a small ridge penalty. Each static method
+    forms $\ell$ as well as the $\ell_1$ penalty. The generic class
+    forms the remaining two terms in the objective.
+
+    """
+
+    def __init__(self, 
+                 Q,
+                 X, 
+                 y,
+                 feature_weights,
+                 ridge_term=None,
+                 randomizer_scale=None,
+                 perturb=None):
+        r"""
+
+        Create a new post-selection object for the LASSO problem
+
+        Parameters
+        ----------
+
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is brodcast to all features.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomization.
+
+        perturb : np.ndarray
+            Random perturbation subtracted as a linear
+            term in the objective function.
+
+        """
+
+        (self.Q,
+         self.X,
+         self.y) = (Q, X, y)
+
+        self.loss = rr.quadratic_loss(Q.shape[0], Q=Q)
+        n, p = X.shape
+        self.nfeature = p
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+        mean_diag = np.diag(Q).mean()
+        if ridge_term is None:
+            ridge_term = np.std(y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(y) * np.sqrt(n / (n - 1.))
+
+        self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
+        self.ridge_term = ridge_term
+        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
+        self._initial_omega = perturb # random perturbation
+
+    def fit(self, 
+            solve_args={'tol':1.e-12, 'min_its':50}, 
+            perturb=None):
+        """
+        Fit the randomized lasso using `regreg`.
+
+        Parameters
+        ----------
+
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+
+        Returns
+        -------
+
+        signs : np.float
+             Support and non-zero signs of randomized lasso solution.
+             
+        """
+
+        p = self.nfeature
+
+        # take a new perturbation if supplied
+        if perturb is not None:
+            self._initial_omega = perturb
+        if self._initial_omega is None:
+            self._initial_omega = self.randomizer.sample()
+
+        quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0)
+        quad_data = rr.identity_quadratic(0, 0, -self.X.T.dot(self.y), 0)
+        problem = rr.simple_problem(self.loss, self.penalty)
+        self.initial_soln = problem.solve(quad + quad_data, **solve_args)
+
+        active_signs = np.sign(self.initial_soln)
+        active = self._active = active_signs != 0
+
+        self._lagrange = self.penalty.weights
+        unpenalized = self._lagrange == 0
+
+        active *= ~unpenalized
+
+        self._overall = overall = (active + unpenalized) > 0
+        self._inactive = inactive = ~self._overall
+        self._unpenalized = unpenalized
+
+        _active_signs = active_signs.copy()
+        _active_signs[unpenalized] = np.nan # don't release sign of unpenalized variables
+        self.selection_variable = {'sign':_active_signs,
+                                   'variables':self._overall}
+
+        # initial state for opt variables
+
+        initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + 
+                            quad_data.objective(self.initial_soln, 'grad') +
+                            quad.objective(self.initial_soln, 'grad')) 
+        self.initial_subgrad = initial_subgrad
+
+        initial_scalings = np.fabs(self.initial_soln[active])
+        initial_unpenalized = self.initial_soln[self._unpenalized]
+
+        self.observed_opt_state = np.concatenate([initial_scalings,
+                                                  initial_unpenalized])
+
+        E = overall
+        Q_E = self.Q[E][:,E]
+        _beta_unpenalized = np.linalg.inv(Q_E).dot(self.X[:,E].T.dot(self.y))
+        beta_bar = np.zeros(p)
+        beta_bar[overall] = _beta_unpenalized
+        self._beta_full = beta_bar
+
+        # observed state for score in internal coordinates
+
+        self.observed_internal_state = np.hstack([_beta_unpenalized,
+                                                  -self.loss.smooth_objective(beta_bar, 'grad')[inactive] + 
+                                                  quad_data.objective(beta_bar, 'grad')[inactive]])
+
+        # form linear part
+
+        self.num_opt_var = self.observed_opt_state.shape[0]
+
+        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
+        # E for active
+        # U for unpenalized
+        # -E for inactive
+
+        _opt_linear_term = np.zeros((p, self.num_opt_var))
+        _score_linear_term = np.zeros((p, self.num_opt_var))
+
+        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
+
+        X, y = self.X, self.y
+        _hessian_active = self.Q[:, active]
+        _hessian_unpen = self.Q[:, unpenalized]
+
+        _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen])
+
+        # set the observed score (data dependent) state
+
+        self.observed_score_state = _score_linear_term.dot(_beta_unpenalized)
+        self.observed_score_state[inactive] += (self.loss.smooth_objective(beta_bar, 'grad')[inactive] + 
+                                                quad_data.objective(beta_bar, 'grad')[inactive])
+
+        def signed_basis_vector(p, j, s):
+            v = np.zeros(p)
+            v[j] = s
+            return v
+
+        active_directions = np.array([signed_basis_vector(p, j, active_signs[j]) for j in np.nonzero(active)[0]]).T
+
+        scaling_slice = slice(0, active.sum())
+        if np.sum(active) == 0:
+            _opt_hessian = 0
+        else:
+            _opt_hessian = _hessian_active * active_signs[None, active] + self.ridge_term * active_directions
+        _opt_linear_term[:, scaling_slice] = _opt_hessian
+
+        # beta_U piece
+
+        unpenalized_slice = slice(active.sum(), self.num_opt_var)
+        unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
+        if unpenalized.sum():
+            _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
+                                                      + self.ridge_term * unpenalized_directions) 
+
+        # two transforms that encode score and optimization
+        # variable roles 
+
+        self.opt_transform = (_opt_linear_term, self.initial_subgrad)
+        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
+
+        # now store everything needed for the projections
+        # the projection acts only on the optimization
+        # variables
+
+        self._setup = True
+        self.scaling_slice = scaling_slice
+        self.unpenalized_slice = unpenalized_slice
+        self.ndim = self.loss.shape[0]
+
+        # compute implied mean and covariance
+
+        cov, prec = self.randomizer.cov_prec
+        opt_linear, opt_offset = self.opt_transform
+
+        cond_precision = opt_linear.T.dot(opt_linear) * prec
+        cond_cov = np.linalg.inv(cond_precision)
+        logdens_linear = cond_cov.dot(opt_linear.T) * prec
+
+        cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+
+        def log_density(logdens_linear, offset, cond_prec, score, opt):
+            if score.ndim == 1:
+                mean_term = logdens_linear.dot(score.T + offset).T
+            else:
+                mean_term = logdens_linear.dot(score.T + offset[:, None]).T
+            arg = opt + mean_term
+            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+        log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
+
+        # now make the constraints
+
+        A_scaling = -np.identity(self.num_opt_var)
+        b_scaling = np.zeros(self.num_opt_var)
+
+        affine_con = constraints(A_scaling,
+                                 b_scaling,
+                                 mean=cond_mean,
+                                 covariance=cond_cov)
+
+        logdens_transform = (logdens_linear, opt_offset)
+
+        self.sampler = affine_gaussian_sampler(affine_con,
+                                               self.observed_opt_state,
+                                               self.observed_score_state,
+                                               log_density,
+                                               logdens_transform,
+                                               selection_info=self.selection_variable) # should be signs and the subgradients we've conditioned on
+        
+        return active_signs
+
+    def summary(self,
+                target="selected",
+                features=None,
+                parameter=None,
+                level=0.9,
+                ndraw=10000, 
+                burnin=2000,
+                compute_intervals=False,
+                dispersion=None):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        target : one of ['selected', 'full']
+
+        features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+
+        level : float
+            Confidence level.
+
+        ndraw : int (optional)
+            Defaults to 1000.
+
+        burnin : int (optional)
+            Defaults to 1000.
+
+        compute_intervals : bool
+            Compute confidence intervals?
+
+        dispersion : float (optional)
+            Use a known value for dispersion, or Pearson's X^2?
+
+        """
+
+        if parameter is None:
+            parameter = np.zeros(self.loss.shape[0])
+
+        observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
+
+        if self._overall.sum() > 0:
+            opt_sample = self.sampler.sample(ndraw,  burnin)
+
+            pivots = self.sampler.coefficient_pvalues(observed_target, 
+                                                      cov_target, 
+                                                      cov_target_score, 
+                                                      parameter=parameter, 
+                                                      sample=opt_sample, 
+                                                      alternatives=alternatives)
+            if not np.all(parameter == 0):
+                pvalues = self.sampler.coefficient_pvalues(observed_target, 
+                                                           cov_target, 
+                                                           cov_target_score, 
+                                                           parameter=np.zeros_like(parameter), 
+                                                           sample=opt_sample, 
+                                                           alternatives=alternatives)
+            else:
+                pvalues = pivots
+
+            intervals = None
+            if compute_intervals:
+                intervals = self.sampler.confidence_intervals(observed_target, 
+                                                              cov_target, 
+                                                              cov_target_score,
+                                                              sample=opt_sample)
+
+            return pivots, pvalues, intervals
+        else:
+            return [], [], []
+
+
+    def selected_targets(self, features=None, dispersion=None):
+
+        X, y = self.X, self.y
+        n, p = X.shape
+
+        if features is None:
+            active = self._active
+            unpenalized = self._unpenalized
+            noverall = active.sum() + unpenalized.sum()
+            overall = active + unpenalized
+
+            score_linear = self.score_transform[0]
+            Q = -score_linear[overall]
+            cov_target = np.linalg.inv(Q)
+            observed_target = self._beta_full[overall]
+            crosscov_target_score = score_linear.dot(cov_target)
+            Xfeat = X[:,overall]
+            alternatives = [{1:'greater', -1:'less'}[int(s)] for s in self.selection_variable['sign'][active]] + ['twosided'] * unpenalized.sum()
+
+        else:
+
+            features_b = np.zeros_like(self._overall)
+            features_b[features] = True
+            features = features_b
+
+            Xfeat = X[:,features]
+            Qfeat = self.Q[features][:,features]
+            Gfeat = self.loss.smooth_objective(self.initial_soln, 'grad')[features] - Xfeat.T.dot(y)
+            Qfeat_inv = np.linalg.inv(Qfeat)
+            one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat)
+            cov_target = Qfeat_inv
+            _score_linear = -self.Q[features]
+            crosscov_target_score = _score_linear.dot(cov_target)
+            observed_target = one_step
+            alternatives = ['twosided'] * features.sum()
+
+        if dispersion is None: # use Pearson's X^2
+            dispersion = ((y - Xfeat.dot(observed_target))**2).sum() / (n - Xfeat.shape[1])
+
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
diff --git a/selection/randomized/tests/test_modelQ.py b/selection/randomized/tests/test_modelQ.py
new file mode 100644
index 000000000..a6622fd8a
--- /dev/null
+++ b/selection/randomized/tests/test_modelQ.py
@@ -0,0 +1,39 @@
+from __future__ import division, print_function
+
+import numpy as np
+import nose.tools as nt
+
+import regreg.api as rr
+
+from ..modelQ import modelQ
+from ..lasso import highdim
+from ...tests.instance import gaussian_instance
+
+def test_modelQ():
+
+    n, p, s = 200, 50, 4
+    X, y, beta = gaussian_instance(n=n,
+                                   p=p,
+                                   s=s,
+                                   sigma=1)[:3]
+
+    lagrange = 5. * np.ones(p) * np.sqrt(n)
+    perturb = np.random.standard_normal(p) * n
+    LH = highdim.gaussian(X, y, lagrange)
+    LH.fit(perturb=perturb, solve_args={'min_its':1000})
+
+    LQ = modelQ(X.T.dot(X), X, y, lagrange)
+    LQ.fit(perturb=perturb, solve_args={'min_its':1000})
+    LQ.summary() # smoke test
+
+    conH = LH.sampler.affine_con
+    conQ = LQ.sampler.affine_con
+
+    np.testing.assert_allclose(LH.initial_soln, LQ.initial_soln)
+    np.testing.assert_allclose(LH.initial_subgrad, LQ.initial_subgrad)
+
+    np.testing.assert_allclose(conH.linear_part, conQ.linear_part)
+    np.testing.assert_allclose(conH.offset, conQ.offset)
+
+    np.testing.assert_allclose(LH._beta_full, LQ._beta_full)
+

From 88d2580aafad61782e6aa255186a0625c50c890f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 7 Apr 2018 14:22:03 -0700
Subject: [PATCH 558/617] fixed imports

---
 selection/randomized/lasso.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 7fa423f19..348e4a397 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -16,8 +16,8 @@
                     langevin_sampler,
                     affine_gaussian_sampler)
 
-from .reconstruction import reconstruct_full_from_internal
-from .randomization import split, randomization
+from .reconstruction import reconstruct_opt
+from .randomization import randomization
 from .base import restricted_estimator
 from .glm import (pairs_bootstrap_glm,
                   glm_nonparametric_bootstrap,

From 52e8b8ba2df339a9a0347a590ab6534596c0bd01 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 7 Apr 2018 23:46:40 -0700
Subject: [PATCH 559/617] restructured test and rectified scaling of target

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 355 ++++++++----------
 selection/randomized/lasso.py                 |   2 +-
 .../tests/test_selective_MLE_high.py          |  19 +-
 3 files changed, 169 insertions(+), 207 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 807c7e07c..a6081dd9c 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -6,6 +6,7 @@
 import selection.randomized.lasso as L; reload(L)
 from selection.randomized.lasso import highdim
 from selection.algorithms.lasso import lasso
+from scipy.stats import norm as ndist
 
 def glmnet_lasso(X, y, lambda_val):
     robjects.r('''
@@ -103,14 +104,17 @@ def relative_risk(est, truth, Sigma):
 
     return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
 
-def coverage(intervals, truth, npars, active_bool):
+def coverage(intervals, pval, truth):
+    if (truth!=0).sum()!=0:
+        avg_power = np.mean(pval[truth != 0])
+    else:
+        avg_power = 0.
+    return np.mean((truth > intervals[:, 0])*(truth < intervals[:, 1])), avg_power
 
-    return ((truth > intervals[:, 0])*(truth < intervals[:, 1])).sum() / float(npars),\
-           ((active_bool)*(np.logical_or((0. < intervals[:, 0]),(0. > intervals[:,1])))).sum()
 
-def comparison_risk_inference_low(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
-                                  randomizer_scale=np.sqrt(0.25), target = "selected",
-                                  full_dispersion = True):
+def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.20,
+                                       randomizer_scale=np.sqrt(0.25), target = "selected",
+                                       full_dispersion = True):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
@@ -128,11 +132,8 @@ def comparison_risk_inference_low(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
         y = y - y.mean()
         y_val = y_val - y_val.mean()
 
-        dispersion = None
         if full_dispersion:
             dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        else:
-            dispersion = np.std(y)
 
         sigma_ = np.std(y)
         LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
@@ -149,7 +150,7 @@ def comparison_risk_inference_low(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
             W = lam_seq[k]
             conv = const(X,
                          y,
-                         W,
+                         W * np.ones(p),
                          randomizer_scale=randomizer_scale * sigma_)
             signs = conv.fit()
             nonzero = signs != 0
@@ -160,12 +161,13 @@ def comparison_risk_inference_low(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
             err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
-        sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
-        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
 
+        # sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
+        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
+        #lam = np.sqrt(2 * np.log(p)) * sigma_
         randomized_lasso = const(X,
                                  y,
-                                 lam,
+                                 lam*np.ones(p),
                                  randomizer_scale=randomizer_scale * sigma_)
 
         signs = randomized_lasso.fit()
@@ -177,37 +179,25 @@ def comparison_risk_inference_low(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
 
         if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
             Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-            Lee_intervals = np.zeros((nactive_LASSO, 2))
-            Lee_intervals[:, 0] = np.asarray(Lee['lower_confidence'])
-            Lee_intervals[:, 1] = np.asarray(Lee['upper_confidence'])
+            Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
+            Lee_pval = np.asarray(Lee['pval'])
 
             sel_MLE = np.zeros(p)
-            estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
+            estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
                                                                                                          dispersion=dispersion)
             sel_MLE[nonzero] = estimate / np.sqrt(n)
             ind_estimator = np.zeros(p)
             ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
 
-            if target == "selected":
-                beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
-                beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
-                beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
+            beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
+            beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
+            beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
 
-                post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
-                unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
-                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-                                            post_LASSO_OLS + 1.65 * unad_sd]).T
-
-            elif target == "full":
-                beta_target_rand = beta[nonzero]
-                beta_target_nonrand_py = beta[active_LASSO]
-                beta_target_nonrand = beta[active_nonrand]
-
-                post_LASSO_OLS = np.linalg.pinv(X)[active_nonrand].dot(y)
-                unad_sd = sigma_ * np.sqrt(
-                    np.diag((np.linalg.pinv(X)[active_nonrand].dot(np.linalg.pinv(X)[active_nonrand].T))))
-                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-                                            post_LASSO_OLS + 1.65 * unad_sd]).T
+            post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+            unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
+            unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                        post_LASSO_OLS + 1.65 * unad_sd]).T
+            unad_pval = ndist.cdf(post_LASSO_OLS / unad_sd)
 
             true_signals = np.zeros(p, np.bool)
             true_signals[beta != 0] = 1
@@ -226,9 +216,13 @@ def comparison_risk_inference_low(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
             for z in range(nactive_LASSO):
                 active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
 
-            cov_sel, power_sel = coverage(sel_intervals, beta_target_rand, nonzero.sum(), active_rand_bool)
-            cov_Lee, power_Lee = coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO,  active_LASSO_bool)
-            cov_unad, power_unad = coverage(unad_intervals, beta_target_nonrand, nactive_nonrand, active_nonrand_bool)
+            cov_sel, _ = coverage(sel_intervals, sel_pval, beta_target_rand)
+            cov_Lee, _ = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
+            cov_unad, _ = coverage(unad_intervals, unad_pval, beta_target_nonrand)
+
+            power_sel = ((active_rand_bool)*(np.logical_or((0. < sel_intervals[:, 0]),(0. > sel_intervals[:,1])))).sum()
+            power_Lee = ((active_LASSO_bool)*(np.logical_or((0. < Lee_intervals[:, 0]),(0. > Lee_intervals[:,1])))).sum()
+            power_unad = ((active_nonrand_bool)*(np.logical_or((0. < unad_intervals[:, 0]),(0. > unad_intervals[:,1])))).sum()
             break
 
     if True:
@@ -241,86 +235,16 @@ def comparison_risk_inference_low(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
                cov_sel,\
                cov_Lee,\
                cov_unad,\
-               (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
-               (Lee_intervals[:, 1] - Lee_intervals[:, 0]).sum() / float(nactive_LASSO), \
-               (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
-               power_sel/float((beta != 0).sum()),  \
+               np.mean(sel_intervals[:, 1] - sel_intervals[:, 0]), \
+               np.mean(Lee_intervals[:, 1] - Lee_intervals[:, 0]), \
+               np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]), \
+               power_sel/float((beta != 0).sum()), \
                power_Lee/float((beta != 0).sum()), \
                power_unad/float((beta != 0).sum())
 
-# if __name__ == "__main__":
-#
-#     ndraw = 50
-#     bias = 0.
-#     risk_selMLE = 0.
-#     risk_indest = 0.
-#     risk_LASSO_rand = 0.
-#     risk_relLASSO_rand = 0.
-#
-#     risk_relLASSO_nonrand = 0.
-#     risk_LASSO_nonrand = 0.
-#
-#     coverage_selMLE = 0.
-#     coverage_Lee = 0.
-#     coverage_unad = 0.
-#
-#     length_sel = 0.
-#     length_Lee = 0.
-#     length_unad = 0.
-#
-#     power_sel = 0.
-#     power_Lee = 0.
-#     power_unad = 0.
-#
-#     for i in range(ndraw):
-#         output = comparison_risk_inference(n=200, p=500, nval=200, rho=0.35, s=20, beta_type=2, snr=.20,
-#                                            randomizer_scale=np.sqrt(0.25), target="selected",
-#                                            full_dispersion=True)
-#
-#         risk_selMLE += output[0]
-#         risk_indest += output[1]
-#         risk_LASSO_rand += output[2]
-#         risk_relLASSO_rand += output[3]
-#         risk_relLASSO_nonrand += output[4]
-#         risk_LASSO_nonrand += output[5]
-#
-#         coverage_selMLE += output[6]
-#         coverage_Lee += output[7]
-#         coverage_unad += output[8]
-#
-#         length_sel += output[9]
-#         length_Lee += output[10]
-#         length_unad += output[11]
-#
-#         power_sel += output[12]
-#         power_Lee += output[13]
-#         power_unad += output[14]
-#
-#         sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
-#         sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
-#         sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
-#         sys.stderr.write("overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n"+ "\n")
-#
-#         sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-#         sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
-#
-#         sys.stderr.write("overall selective coverage " + str(coverage_selMLE/ float(i + 1)) + "\n" )
-#         sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) +  "\n")
-#         sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
-#
-#         sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
-#         sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
-#         sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
-#
-#         sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
-#         sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
-#         sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
-#
-#         sys.stderr.write("iteration completed " + str(i+1) + "\n")
-
-
-def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.2,
-                                   randomizer_scale=np.sqrt(0.25), target = "selected",
+
+def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2, snr=0.2,
+                                   randomizer_scale=np.sqrt(0.25), target = "full",
                                    full_dispersion = True):
 
     while True:
@@ -329,8 +253,8 @@ def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_t
         rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
         active_nonrand = (est_LASSO != 0)
         nactive_nonrand = active_nonrand.sum()
-        true_mean = X.dot(beta)
 
+        _std = X.std(0)
         X -= X.mean(0)[None, :]
         X /= (X.std(0)[None, :] * np.sqrt(n))
         X_val -= X_val.mean(0)[None, :]
@@ -339,9 +263,14 @@ def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_t
         y = y - y.mean()
         y_val = y_val - y_val.mean()
 
-        dispersion = None
-
         sigma_ = np.std(y)
+        print("true and estimated sigma", sigma, sigma_)
+
+        if full_dispersion:
+            dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+        else:
+            dispersion = None
+
         LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
         soln = LASSO_py.fit()
         active_LASSO = (soln != 0)
@@ -353,7 +282,7 @@ def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_t
                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
         err = np.zeros(100)
         for k in range(100):
-            W = lam_seq[k]
+            W = lam_seq[k]*np.ones(p)
             conv = const(X,
                          y,
                          W,
@@ -367,10 +296,11 @@ def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_t
             err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
-
+        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
+        #lam = np.sqrt(2 * np.log(p)) * sigma_
         randomized_lasso = const(X,
                                  y,
-                                 lam,
+                                 lam*np.ones(p),
                                  randomizer_scale=randomizer_scale * sigma_)
 
         signs = randomized_lasso.fit()
@@ -380,60 +310,32 @@ def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_t
         sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
         sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
 
-        if nonzero.sum()>0 and nactive_nonrand>0:
+        if nonzero.sum()>0 and nactive_nonrand>0 and nonzero.sum()<50:
             # Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-            # Lee_intervals = np.zeros((nactive_LASSO, 2))
-            # Lee_intervals[:, 0] = np.asarray(Lee['lower_confidence'])
-            # Lee_intervals[:, 1] = np.asarray(Lee['upper_confidence'])
+            # Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
+            # Lee_pval = np.asarray(Lee['pval'])
 
             sel_MLE = np.zeros(p)
-            estimate, _, _, pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
+            estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
                                                                                                          dispersion=dispersion)
             sel_MLE[nonzero] = estimate / np.sqrt(n)
             ind_estimator = np.zeros(p)
             ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
 
-            if target == "selected":
-                beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
-                #beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
-                beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
+            beta_target_rand = np.sqrt(n)* _std[nonzero] * beta[nonzero]
+            beta_target_nonrand_py = np.sqrt(n)* _std[active_LASSO] * beta[active_LASSO]
+            beta_target_nonrand = np.sqrt(n)* _std[active_nonrand] * beta[active_nonrand]
 
-                post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
-                unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
-                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-                                            post_LASSO_OLS + 1.65 * unad_sd]).T
-
-            elif target == "full":
-                beta_target_rand = beta[nonzero]
-                beta_target_nonrand_py = beta[active_LASSO]
-                beta_target_nonrand = beta[active_nonrand]
-
-                post_LASSO_OLS = np.linalg.pinv(X)[active_nonrand].dot(y)
-                unad_sd = sigma_ * np.sqrt(
-                    np.diag((np.linalg.pinv(X)[active_nonrand].dot(np.linalg.pinv(X)[active_nonrand].T))))
-                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-                                            post_LASSO_OLS + 1.65 * unad_sd]).T
-
-            true_signals = np.zeros(p, np.bool)
-            true_signals[beta != 0] = 1
-            true_set = np.asarray([u for u in range(p) if true_signals[u]])
-            active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-            active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
-            active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+            post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+            unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
 
-            active_rand_bool = np.zeros(nonzero.sum(), np.bool)
-            for x in range(nonzero.sum()):
-                active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
-            active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
-            for w in range(nactive_nonrand):
-                active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
-            active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
-            for z in range(nactive_LASSO):
-                active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
+            unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                        post_LASSO_OLS + 1.65 * unad_sd]).T
+            unad_pval = ndist.cdf(post_LASSO_OLS/unad_sd)
 
-            cov_sel, power_sel = coverage(sel_intervals, beta_target_rand, nonzero.sum(), active_rand_bool)
-            #cov_Lee, power_Lee = coverage(Lee_intervals, beta_target_nonrand_py, nactive_LASSO,  active_LASSO_bool)
-            cov_unad, power_unad = coverage(unad_intervals, beta_target_nonrand, nactive_nonrand, active_nonrand_bool)
+            cov_sel, power_sel = coverage(sel_intervals, sel_pval, beta_target_rand)
+            #cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
+            cov_unad, power_unad = coverage(unad_intervals, unad_pval, beta_target_nonrand)
             break
 
     if True:
@@ -474,49 +376,102 @@ def comparison_risk_inference_high(n=500, p=100, nval=500, rho=0.35, s=5, beta_t
     power_Lee = 0.
     power_unad = 0.
 
-    for i in range(ndraw):
-        output = comparison_risk_inference_high(n=200, p=1000, nval=200, rho=0.35, s=10, beta_type=2, snr=.30,
-                                                randomizer_scale=np.sqrt(0.25), target="selected",
-                                                full_dispersion=False)
+    target = "full"
+    n, p, rho, s, beta_type, snr = 200, 1000, 0.35, 10, 1, 0.3
+
+    if target == "selected":
+        for i in range(ndraw):
+            output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
+                                                        randomizer_scale=np.sqrt(0.25), target=target,
+                                                        full_dispersion=True)
+
+            risk_selMLE += output[0]
+            risk_indest += output[1]
+            risk_LASSO_rand += output[2]
+            risk_relLASSO_rand += output[3]
+            risk_relLASSO_nonrand += output[4]
+            risk_LASSO_nonrand += output[5]
+
+            coverage_selMLE += output[6]
+            coverage_Lee += output[7]
+            coverage_unad += output[8]
+
+            length_sel += output[9]
+            length_Lee += output[10]
+            length_unad += output[11]
+
+            power_sel += output[12]
+            power_Lee += output[13]
+            power_unad += output[14]
+
+            sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
+            sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+            sys.stderr.write(
+                "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+            sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("iteration completed " + str(i + 1) + "\n")
+
+    elif target == "full":
+        if n > p:
+            full_dispersion = True
+        else:
+            full_dispersion = False
+        for i in range(ndraw):
+            output = comparison_risk_inference_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
+                                                    randomizer_scale=np.sqrt(0.25), target=target,
+                                                    full_dispersion=full_dispersion)
+
+            risk_selMLE += output[0]
+            risk_indest += output[1]
+            risk_LASSO_rand += output[2]
+            risk_relLASSO_rand += output[3]
+            risk_relLASSO_nonrand += output[4]
+            risk_LASSO_nonrand += output[5]
+
+            coverage_selMLE += output[6]
+            coverage_unad += output[7]
 
-        risk_selMLE += output[0]
-        risk_indest += output[1]
-        risk_LASSO_rand += output[2]
-        risk_relLASSO_rand += output[3]
-        risk_relLASSO_nonrand += output[4]
-        risk_LASSO_nonrand += output[5]
+            length_sel += output[8]
+            length_unad += output[9]
 
-        coverage_selMLE += output[6]
-        #coverage_Lee += output[7]
-        coverage_unad += output[7]
+            power_sel += output[10]
+            power_unad += output[11]
 
-        length_sel += output[8]
-        #length_Lee += output[10]
-        length_unad += output[9]
+            sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
+            sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+            sys.stderr.write(
+                "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
 
-        power_sel += output[10]
-        #power_Lee += output[13]
-        power_unad += output[11]
+            sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+            sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
 
-        sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
-        sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
-        sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
-        sys.stderr.write("overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n"+ "\n")
+            sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
 
-        sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-        sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
 
-        sys.stderr.write("overall selective coverage " + str(coverage_selMLE/ float(i + 1)) + "\n" )
-        sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) +  "\n")
-        sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
 
-        sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
-        sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
-        sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("iteration completed " + str(i + 1) + "\n")
 
-        sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
-        sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
-        sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
 
-        sys.stderr.write("iteration completed " + str(i+1) + "\n")
 
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 010e5c2a8..f6eba1cab 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1683,7 +1683,7 @@ def debiased_targets(self,
             relaxed_soln = self.initial_soln[features] - np.linalg.inv(Qrelax).dot(G[features])
             dispersion = ((y - self.loglike.saturated_loss.mean_function(
                 Xfeat.dot(relaxed_soln))) ** 2 / self._W).sum() / (n - features.sum())
-
+            #print("dispersion", np.sqrt(dispersion))
         alternatives = ['twosided'] * features.sum()
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
diff --git a/selection/randomized/tests/test_selective_MLE_high.py b/selection/randomized/tests/test_selective_MLE_high.py
index 6491e063f..a773d9340 100644
--- a/selection/randomized/tests/test_selective_MLE_high.py
+++ b/selection/randomized/tests/test_selective_MLE_high.py
@@ -9,7 +9,7 @@
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
-def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, randomizer_scale=0.25, full_dispersion=True):
+def test_full_targets(n=2000, p=200, signal_fac=0.5, s=5, sigma=3, rho=0.4, randomizer_scale=0.25, full_dispersion=True):
     """
     Compare to R randomized lasso
     """
@@ -25,6 +25,10 @@ def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, rand
                       sigma=sigma, 
                       random_signs=True)[:3]
 
+    idx = np.arange(p)
+    sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
+    print("snr", beta.T.dot(sigmaX).dot(beta)/((sigma**2.)* n))
+
     n, p = X.shape
 
     sigma_ = np.std(Y)
@@ -37,6 +41,7 @@ def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, rand
     
     signs = conv.fit()
     nonzero = signs != 0
+    print("dimensions", n, p, nonzero.sum())
 
     dispersion = None
     if full_dispersion:
@@ -45,7 +50,7 @@ def test_full_targets(n=2000, p=200, signal_fac=1.1, s=5, sigma=3, rho=0.4, rand
     estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="full", dispersion=dispersion)
 
     coverage = (beta[nonzero] > intervals[:,0]) * (beta[nonzero] < intervals[:,1])
-    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage
+    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4, randomizer_scale=1, full_dispersion=True):
     """
@@ -89,10 +94,10 @@ def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4,
 
 def main(nsim=500, full=True):
 
-    P0, PA, cover = [], [], []
+    P0, PA, cover, length_int = [], [], [], []
     from statsmodels.distributions import ECDF
 
-    n, p, s = 200, 1000, 20
+    n, p, s = 200, 1000, 10
 
     for i in range(nsim):
         if full:
@@ -100,7 +105,8 @@ def main(nsim=500, full=True):
                 full_dispersion = True
             else:
                 full_dispersion = False
-            p0, pA, cover_ = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
+            p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
+            avg_length = intervals[:,1]-intervals[:,0]
         else:
             full_dispersion = True
             p0, pA, cover_ = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
@@ -108,7 +114,8 @@ def main(nsim=500, full=True):
         cover.extend(cover_)
         P0.extend(p0)
         PA.extend(pA)
-        print(np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover), 'null pvalue + power')
+        print(np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
+              np.mean(avg_length), 'null pvalue + power + length')
     
         if i % 3 == 0 and i > 0:
             U = np.linspace(0, 1, 101)

From be920b7c19264857dcf4dc06512523010c15e9dd Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 9 Apr 2018 21:24:45 -0700
Subject: [PATCH 560/617] create plots + save results

---
 .../tests/plot_inferential_metrics.py         | 126 +++++
 .../tests/test_inferential_metrics.py         | 469 ++++++++++++++++++
 .../adjusted_MLE/tests/test_risk_coverage.py  | 276 +++++++----
 3 files changed, 762 insertions(+), 109 deletions(-)
 create mode 100644 selection/adjusted_MLE/tests/plot_inferential_metrics.py
 create mode 100644 selection/adjusted_MLE/tests/test_inferential_metrics.py

diff --git a/selection/adjusted_MLE/tests/plot_inferential_metrics.py b/selection/adjusted_MLE/tests/plot_inferential_metrics.py
new file mode 100644
index 000000000..c1e013e3b
--- /dev/null
+++ b/selection/adjusted_MLE/tests/plot_inferential_metrics.py
@@ -0,0 +1,126 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib.ticker as mticker
+import seaborn as sns
+import pickle
+
+df = pd.read_csv('/Users/snigdhapanigrahi/adjusted_MLE/results/metrics_selected_target_medium.csv')
+df_risk = pd.read_csv('/Users/snigdhapanigrahi/adjusted_MLE/results/risk_selected_target_medium.csv')
+order = ["Selective", "Lee", "Naive"]
+cols = ["#3498db", "#9b59b6", "#e74c3c"]
+
+def inference_result():
+    # Create a figure for comparing risk, coverage, lengths and power
+    sns.set(font_scale=2)  # font size
+    sns.set_style("white", {'axes.facecolor': 'white',
+                            'axes.grid': True,
+                            'axes.linewidth': 2.0,
+                            'grid.linestyle': u'--',
+                            'grid.linewidth': 4.0,
+                            'xtick.major.size': 5.0,
+                            })
+
+    fig = plt.figure(figsize=(11, 4))
+    ax1 = fig.add_subplot(131)
+    ax2 = fig.add_subplot(132)
+    ax3 = fig.add_subplot(133)
+
+    sns.pointplot(x="SNR", y="coverage", hue_order=order, markers='o', hue="method", data=df, ax=ax1,
+                  palette=cols)
+    sns.pointplot(x="SNR", y="power", hue_order=order, markers='o', hue="method", data=df, ax=ax2,
+                  palette=cols)
+    sns.pointplot(x="SNR", y="risk", hue_order=order, markers='o', hue="method", data=df, ax=ax3,
+                  palette=cols)
+
+    ax1.set_title("coverage", y=1.01)
+    ax2.set_title("power", y=1.01)
+    ax3.set_title("risk", y=1.01)
+
+    ax1.legend_.remove()
+    ax2.legend_.remove()
+    ax3.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+
+    ax1.set_ylim(0, 1.1)
+    ax2.set_ylim(0, 1.1)
+    ax3.set_ylim(-0.05, 0.8)
+
+    ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
+    ax2.set_xticklabels(ax2.get_xticklabels(), rotation=90)
+    ax3.set_xticklabels(ax3.get_xticklabels(), rotation=90)
+    # myLocator = mticker.MultipleLocator(2)
+    # ax1.xaxis.set_major_locator(myLocator)
+    # ax2.xaxis.set_major_locator(myLocator)
+    # ax3.xaxis.set_major_locator(myLocator)
+
+    def common_format(ax):
+        ax.grid(True, which='both')
+        ax.set_xlabel('', fontsize=22)
+        # ax.yaxis.label.set_size(22)
+        ax.set_ylabel('', fontsize=22)
+        return ax
+
+    common_format(ax1)
+    common_format(ax2)
+    common_format(ax3)
+    fig.text(0.5, -0.04, 'SNR', fontsize=22, ha='center')
+
+    # add target coverage on the first plot
+    ax1.axhline(y=0.9, color='k', linestyle='--', linewidth=2)
+
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
+    plt.savefig('/Users/snigdhapanigrahi/adjusted_MLE/results/inference_comparison_medium.pdf', format='pdf', bbox_inches='tight')
+
+def risk_comparison():
+    # Create a figure for comparing risk, coverage, lengths and power
+    sns.set(font_scale=2)  # font size
+    sns.set_style("white", {'axes.facecolor': 'white',
+                            'axes.grid': True,
+                            'axes.linewidth': 2.0,
+                            'grid.linestyle': u'--',
+                            'grid.linewidth': 4.0,
+                            'xtick.major.size': 5.0,
+                            })
+
+    fig = plt.figure(figsize=(11, 4))
+    ax1 = fig.add_subplot(121)
+    ax2 = fig.add_subplot(122)
+
+    sns.pointplot(x="SNR", y="Risk_selMLE", markers='o', data=df_risk, ax=ax1, color="#3498db")
+    sns.pointplot(x="SNR", y="Risk_indest", hue_order=order, markers='o', data=df_risk, ax=ax1, color="#3498db")
+    sns.pointplot(x="SNR", y="Risk_LASSO_rand", hue_order=order, markers='o', data=df_risk, ax=ax1, color="#3498db")
+
+    ax1.set_title("risk", y=1.01)
+
+    ax1.set_ylim(0, 1.1)
+    ax2.set_ylim(0, 1.1)
+
+    ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
+    ax2.set_xticklabels(ax2.get_xticklabels(), rotation=90)
+    #ax3.set_xticklabels(ax3.get_xticklabels(), rotation=90)
+    # myLocator = mticker.MultipleLocator(2)
+    # ax1.xaxis.set_major_locator(myLocator)
+    # ax2.xaxis.set_major_locator(myLocator)
+    # ax3.xaxis.set_major_locator(myLocator)
+
+    def common_format(ax):
+        ax.grid(True, which='both')
+        ax.set_xlabel('', fontsize=22)
+        # ax.yaxis.label.set_size(22)
+        ax.set_ylabel('', fontsize=22)
+        return ax
+
+    common_format(ax1)
+    common_format(ax2)
+    #common_format(ax3)
+    fig.text(0.5, -0.04, 'SNR', fontsize=22, ha='center')
+
+    # add target coverage on the first plot
+    ax1.axhline(y=0.9, color='k', linestyle='--', linewidth=2)
+
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
+    plt.savefig('/Users/snigdhapanigrahi/adjusted_MLE/results/risk_comparison_medium.pdf', format='pdf', bbox_inches='tight')
+
+risk_comparison()
+#inference_result()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
new file mode 100644
index 000000000..9013252ba
--- /dev/null
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -0,0 +1,469 @@
+import numpy as np, sys
+from rpy2 import robjects
+import rpy2.robjects.numpy2ri
+rpy2.robjects.numpy2ri.activate()
+
+import selection.randomized.lasso as L; reload(L)
+from selection.randomized.lasso import highdim
+from selection.algorithms.lasso import lasso
+from scipy.stats import norm as ndist
+
+def glmnet_lasso(X, y, lambda_val):
+    robjects.r('''
+                glmnet_LASSO = function(X,y,lambda){
+                y = as.matrix(y)
+                X = as.matrix(X)
+                lam = as.matrix(lambda)[1,1]
+                n = nrow(X)
+                fit = glmnet(X, y, standardize=TRUE, intercept=FALSE)
+                estimate = coef(fit, s=lam)[-1]
+                return(list(estimate = estimate))
+                }''')
+
+    lambda_R = robjects.globalenv['glmnet_LASSO']
+    n, p = X.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+    r_lam = robjects.r.matrix(lambda_val, nrow=1, ncol=1)
+    estimate = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate'))
+    return estimate
+
+def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
+    robjects.r('''
+    library(bestsubset)
+    sim_xy = bestsubset::sim.xy
+    ''')
+
+    r_simulate = robjects.globalenv['sim_xy']
+    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
+    X = np.array(sim.rx2('x'))
+    y = np.array(sim.rx2('y'))
+    X_val = np.array(sim.rx2('xval'))
+    y_val = np.array(sim.rx2('yval'))
+    Sigma = np.array(sim.rx2('Sigma'))
+    beta = np.array(sim.rx2('beta'))
+    sigma = np.array(sim.rx2('sigma'))
+
+    return X, y, X_val, y_val, Sigma, beta, sigma
+
+def tuned_lasso(X, y, X_val,y_val):
+    robjects.r('''
+        tuned_lasso_estimator = function(X,Y,X.val,Y.val){
+        Y = as.matrix(Y)
+        X = as.matrix(X)
+        Y.val = as.vector(Y.val)
+        X.val = as.matrix(X.val)
+        rel.LASSO = lasso(X,Y,intercept=TRUE, nrelax=10, nlam=50, standardize=TRUE)
+        LASSO = lasso(X,Y,intercept=TRUE,nlam=50, standardize=TRUE)
+        beta.hat.rellasso = as.matrix(coef(rel.LASSO))
+        beta.hat.lasso = as.matrix(coef(LASSO))
+        min.lam = min(rel.LASSO$lambda)
+        max.lam = max(rel.LASSO$lambda)
+        #print(paste("max and min values of lambda", max.lam, min.lam))
+        lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
+        muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
+        muhat.val.lasso = as.matrix(predict(LASSO, X.val))
+        err.val.rellasso = colMeans((muhat.val.rellasso - Y.val)^2)
+        err.val.lasso = colMeans((muhat.val.lasso - Y.val)^2)
+        opt_lam = ceiling(which.min(err.val.rellasso)/10)
+        lambda.tuned.rellasso = lam.seq[opt_lam]
+        lambda.tuned.lasso = lam.seq[which.min(err.val.lasso)]
+        fit = glmnet(X, Y, standardize=TRUE, intercept=TRUE)
+        estimate.tuned = coef(fit, s=lambda.tuned.lasso)[-1]
+        #print(paste("compare estimates", max(abs(estimate.tuned-(beta.hat.lasso[,which.min(err.val.lasso)])[-1])),
+        #length(which(estimate.tuned!=0)), length(which((beta.hat.lasso[,which.min(err.val.lasso)])[-1]!=0))))
+        return(list(beta.hat.rellasso = (beta.hat.rellasso[,which.min(err.val.rellasso)])[-1],
+        beta.hat.lasso = (beta.hat.lasso[,which.min(err.val.lasso)])[-1],
+        lambda.tuned.rellasso = lambda.tuned.rellasso, lambda.tuned.lasso= lambda.tuned.lasso,
+        lambda.seq = lam.seq))
+        }''')
+
+    r_lasso = robjects.globalenv['tuned_lasso_estimator']
+
+    n, p = X.shape
+    nval, _ = X_val.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+    r_X_val = robjects.r.matrix(X_val, nrow=nval, ncol=p)
+    r_y_val = robjects.r.matrix(y_val, nrow=nval, ncol=1)
+
+    tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
+    estimator_rellasso = np.array(tuned_est.rx2('beta.hat.rellasso'))
+    estimator_lasso = np.array(tuned_est.rx2('beta.hat.lasso'))
+    lam_tuned_rellasso = np.array(tuned_est.rx2('lambda.tuned.rellasso'))
+    lam_tuned_lasso = np.array(tuned_est.rx2('lambda.tuned.lasso'))
+    lam_seq = np.array(tuned_est.rx2('lambda.seq'))
+    return estimator_rellasso, estimator_lasso, lam_tuned_rellasso, lam_tuned_lasso, lam_seq
+
+def relative_risk(est, truth, Sigma):
+
+    return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
+
+def coverage(intervals, pval, truth):
+    if (truth!=0).sum()!=0:
+        avg_power = np.mean(pval[truth != 0])
+    else:
+        avg_power = 0.
+    return np.mean((truth > intervals[:, 0])*(truth < intervals[:, 1])), avg_power
+
+
+def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.20,
+                                       randomizer_scale=np.sqrt(0.25), target = "selected",
+                                       full_dispersion = True):
+
+    while True:
+        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
+                                                        s=s, beta_type=beta_type, snr=snr)
+        rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
+        active_nonrand = (est_LASSO != 0)
+        nactive_nonrand = active_nonrand.sum()
+        true_mean = X.dot(beta)
+
+        X -= X.mean(0)[None, :]
+        X /= (X.std(0)[None, :] * np.sqrt(n))
+        X_val -= X_val.mean(0)[None, :]
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
+
+        y = y - y.mean()
+        y_val = y_val - y_val.mean()
+
+        if full_dispersion:
+            dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+
+        sigma_ = np.std(y)
+        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
+        soln = LASSO_py.fit()
+        active_LASSO = (soln != 0)
+        nactive_LASSO = active_LASSO.sum()
+        glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
+
+        const = highdim.gaussian
+        lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
+                  np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+        err = np.zeros(100)
+        for k in range(100):
+            W = lam_seq[k]
+            conv = const(X,
+                         y,
+                         W * np.ones(p),
+                         randomizer_scale=randomizer_scale * sigma_)
+            signs = conv.fit()
+            nonzero = signs != 0
+            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+
+            full_estimate = np.zeros(p)
+            full_estimate[nonzero] = estimate
+            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+
+        lam = lam_seq[np.argmin(err)]
+
+        # sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
+        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
+        #lam = np.sqrt(2 * np.log(p)) * sigma_
+        randomized_lasso = const(X,
+                                 y,
+                                 lam*np.ones(p),
+                                 randomizer_scale=randomizer_scale * sigma_)
+
+        signs = randomized_lasso.fit()
+        nonzero = signs != 0
+        sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
+        sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
+        sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
+        sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
+
+        if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
+            Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
+            Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
+            Lee_pval = np.asarray(Lee['pval'])
+
+            sel_MLE = np.zeros(p)
+            estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
+                                                                                                         dispersion=dispersion)
+            sel_MLE[nonzero] = estimate / np.sqrt(n)
+            ind_estimator = np.zeros(p)
+            ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
+
+            beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
+            beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
+            beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
+
+            post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+            unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
+            unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                        post_LASSO_OLS + 1.65 * unad_sd]).T
+            unad_pval = ndist.cdf(post_LASSO_OLS / unad_sd)
+
+            true_signals = np.zeros(p, np.bool)
+            true_signals[beta != 0] = 1
+            true_set = np.asarray([u for u in range(p) if true_signals[u]])
+            active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+            active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
+            active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+
+            active_rand_bool = np.zeros(nonzero.sum(), np.bool)
+            for x in range(nonzero.sum()):
+                active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
+            active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
+            for w in range(nactive_nonrand):
+                active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
+            active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
+            for z in range(nactive_LASSO):
+                active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
+
+            cov_sel, _ = coverage(sel_intervals, sel_pval, beta_target_rand)
+            cov_Lee, _ = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
+            cov_unad, _ = coverage(unad_intervals, unad_pval, beta_target_nonrand)
+
+            power_sel = ((active_rand_bool)*(np.logical_or((0. < sel_intervals[:, 0]),(0. > sel_intervals[:,1])))).sum()
+            power_Lee = ((active_LASSO_bool)*(np.logical_or((0. < Lee_intervals[:, 0]),(0. > Lee_intervals[:,1])))).sum()
+            power_unad = ((active_nonrand_bool)*(np.logical_or((0. < unad_intervals[:, 0]),(0. > unad_intervals[:,1])))).sum()
+            break
+
+    if True:
+        return relative_risk(sel_MLE, beta, Sigma), \
+               relative_risk(ind_estimator, beta, Sigma), \
+               relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
+               relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
+               relative_risk(rel_LASSO, beta, Sigma), \
+               relative_risk(est_LASSO, beta, Sigma), \
+               cov_sel,\
+               cov_Lee,\
+               cov_unad,\
+               np.mean(sel_intervals[:, 1] - sel_intervals[:, 0]), \
+               np.mean(Lee_intervals[:, 1] - Lee_intervals[:, 0]), \
+               np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]), \
+               power_sel/float((beta != 0).sum()), \
+               power_Lee/float((beta != 0).sum()), \
+               power_unad/float((beta != 0).sum())
+
+
+def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2, snr=0.2,
+                                   randomizer_scale=np.sqrt(0.25), target = "full",
+                                   full_dispersion = True):
+
+    while True:
+        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
+                                                        s=s, beta_type=beta_type, snr=snr)
+        rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
+        active_nonrand = (est_LASSO != 0)
+        nactive_nonrand = active_nonrand.sum()
+
+        _std = X.std(0)
+        X -= X.mean(0)[None, :]
+        X /= (X.std(0)[None, :] * np.sqrt(n))
+        X_val -= X_val.mean(0)[None, :]
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
+
+        y = y - y.mean()
+        y_val = y_val - y_val.mean()
+
+        sigma_ = np.std(y)
+        print("true and estimated sigma", sigma, sigma_)
+
+        if full_dispersion:
+            dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+        else:
+            dispersion = None
+
+        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
+        soln = LASSO_py.fit()
+        active_LASSO = (soln != 0)
+        nactive_LASSO = active_LASSO.sum()
+        glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
+
+        const = highdim.gaussian
+        lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
+                  np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+        err = np.zeros(100)
+        for k in range(100):
+            W = lam_seq[k]*np.ones(p)
+            conv = const(X,
+                         y,
+                         W,
+                         randomizer_scale=randomizer_scale * sigma_)
+            signs = conv.fit()
+            nonzero = signs != 0
+            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+
+            full_estimate = np.zeros(p)
+            full_estimate[nonzero] = estimate
+            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+
+        lam = lam_seq[np.argmin(err)]
+        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
+        #lam = np.sqrt(2 * np.log(p)) * sigma_
+        randomized_lasso = const(X,
+                                 y,
+                                 lam*np.ones(p),
+                                 randomizer_scale=randomizer_scale * sigma_)
+
+        signs = randomized_lasso.fit()
+        nonzero = signs != 0
+        sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
+        sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
+        sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
+        sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
+
+        if nonzero.sum()>0 and nactive_nonrand>0 and nonzero.sum()<50:
+            # Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
+            # Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
+            # Lee_pval = np.asarray(Lee['pval'])
+
+            sel_MLE = np.zeros(p)
+            estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
+                                                                                                         dispersion=dispersion)
+            sel_MLE[nonzero] = estimate / np.sqrt(n)
+            ind_estimator = np.zeros(p)
+            ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
+
+            beta_target_rand = np.sqrt(n)* _std[nonzero] * beta[nonzero]
+            beta_target_nonrand_py = np.sqrt(n)* _std[active_LASSO] * beta[active_LASSO]
+            beta_target_nonrand = np.sqrt(n)* _std[active_nonrand] * beta[active_nonrand]
+
+            post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+            unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
+
+            unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                        post_LASSO_OLS + 1.65 * unad_sd]).T
+            unad_pval = ndist.cdf(post_LASSO_OLS/unad_sd)
+
+            cov_sel, power_sel = coverage(sel_intervals, sel_pval, beta_target_rand)
+            #cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
+            cov_unad, power_unad = coverage(unad_intervals, unad_pval, beta_target_nonrand)
+            break
+
+    if True:
+        return relative_risk(sel_MLE, beta, Sigma), \
+               relative_risk(ind_estimator, beta, Sigma), \
+               relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
+               relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
+               relative_risk(rel_LASSO, beta, Sigma), \
+               relative_risk(est_LASSO, beta, Sigma), \
+               cov_sel,\
+               cov_unad,\
+               (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
+               (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
+               power_sel/float((beta != 0).sum()),  \
+               power_unad/float((beta != 0).sum())
+
+if __name__ == "__main__":
+
+    ndraw = 50
+    bias = 0.
+    risk_selMLE = 0.
+    risk_indest = 0.
+    risk_LASSO_rand = 0.
+    risk_relLASSO_rand = 0.
+
+    risk_relLASSO_nonrand = 0.
+    risk_LASSO_nonrand = 0.
+
+    coverage_selMLE = 0.
+    coverage_Lee = 0.
+    coverage_unad = 0.
+
+    length_sel = 0.
+    length_Lee = 0.
+    length_unad = 0.
+
+    power_sel = 0.
+    power_Lee = 0.
+    power_unad = 0.
+
+    target = "full"
+    n, p, rho, s, beta_type, snr = 200, 1000, 0.35, 10, 1, 0.3
+
+    if target == "selected":
+        for i in range(ndraw):
+            output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
+                                                        randomizer_scale=np.sqrt(0.25), target=target,
+                                                        full_dispersion=True)
+
+            risk_selMLE += output[0]
+            risk_indest += output[1]
+            risk_LASSO_rand += output[2]
+            risk_relLASSO_rand += output[3]
+            risk_relLASSO_nonrand += output[4]
+            risk_LASSO_nonrand += output[5]
+
+            coverage_selMLE += output[6]
+            coverage_Lee += output[7]
+            coverage_unad += output[8]
+
+            length_sel += output[9]
+            length_Lee += output[10]
+            length_unad += output[11]
+
+            power_sel += output[12]
+            power_Lee += output[13]
+            power_unad += output[14]
+
+            sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
+            sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+            sys.stderr.write(
+                "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+            sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("iteration completed " + str(i + 1) + "\n")
+
+    elif target == "full":
+        if n > p:
+            full_dispersion = True
+        else:
+            full_dispersion = False
+        for i in range(ndraw):
+            output = comparison_risk_inference_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
+                                                    randomizer_scale=np.sqrt(0.25), target=target,
+                                                    full_dispersion=full_dispersion)
+
+            risk_selMLE += output[0]
+            risk_indest += output[1]
+            risk_LASSO_rand += output[2]
+            risk_relLASSO_rand += output[3]
+            risk_relLASSO_nonrand += output[4]
+            risk_LASSO_nonrand += output[5]
+
+            coverage_selMLE += output[6]
+            coverage_unad += output[7]
+
+            length_sel += output[8]
+            length_unad += output[9]
+
+            power_sel += output[10]
+            power_unad += output[11]
+
+            sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
+            sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+            sys.stderr.write(
+                "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+            sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("iteration completed " + str(i + 1) + "\n")
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index a6081dd9c..55f237351 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -3,6 +3,7 @@
 import rpy2.robjects.numpy2ri
 rpy2.robjects.numpy2ri.activate()
 
+import pandas as pd
 import selection.randomized.lasso as L; reload(L)
 from selection.randomized.lasso import highdim
 from selection.algorithms.lasso import lasso
@@ -354,124 +355,181 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
 if __name__ == "__main__":
 
-    ndraw = 50
-    bias = 0.
-    risk_selMLE = 0.
-    risk_indest = 0.
-    risk_LASSO_rand = 0.
-    risk_relLASSO_rand = 0.
-
-    risk_relLASSO_nonrand = 0.
-    risk_LASSO_nonrand = 0.
-
-    coverage_selMLE = 0.
-    coverage_Lee = 0.
-    coverage_unad = 0.
-
-    length_sel = 0.
-    length_Lee = 0.
-    length_unad = 0.
-
-    power_sel = 0.
-    power_Lee = 0.
-    power_unad = 0.
-
-    target = "full"
-    n, p, rho, s, beta_type, snr = 200, 1000, 0.35, 10, 1, 0.3
-
-    if target == "selected":
-        for i in range(ndraw):
-            output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
+    df_master = pd.DataFrame()
+    df_risk = pd.DataFrame()
+
+    target = "selected"
+    snr_values = np.array([0.10, 0.15, 0.20, 0.25, 0.30, 0.42, 0.71, 1.22])
+
+    for snr in snr_values:
+        ndraw = 50
+        bias = 0.
+        risk_selMLE = 0.
+        risk_indest = 0.
+        risk_LASSO_rand = 0.
+        risk_relLASSO_rand = 0.
+
+        risk_relLASSO_nonrand = 0.
+        risk_LASSO_nonrand = 0.
+
+        coverage_selMLE = 0.
+        coverage_Lee = 0.
+        coverage_unad = 0.
+
+        length_sel = 0.
+        length_Lee = 0.
+        length_unad = 0.
+
+        power_sel = 0.
+        power_Lee = 0.
+        power_unad = 0.
+        n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, snr
+
+        if target == "selected":
+            for i in range(ndraw):
+                output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type,
+                                                            snr=snr,
+                                                            randomizer_scale=np.sqrt(0.25), target=target,
+                                                            full_dispersion=True)
+
+                risk_selMLE += output[0]
+                risk_indest += output[1]
+                risk_LASSO_rand += output[2]
+                risk_relLASSO_rand += output[3]
+                risk_relLASSO_nonrand += output[4]
+                risk_LASSO_nonrand += output[5]
+
+                coverage_selMLE += output[6]
+                coverage_Lee += output[7]
+                coverage_unad += output[8]
+
+                length_sel += output[9]
+                length_Lee += output[10]
+                length_unad += output[11]
+
+                power_sel += output[12]
+                power_Lee += output[13]
+                power_unad += output[14]
+
+                sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
+                sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
+                sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+                sys.stderr.write(
+                    "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
+
+                sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+                sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+
+                sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
+                sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n")
+                sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+
+                sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+                sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
+                sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+
+                sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+                sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
+                sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+
+                sys.stderr.write("iteration completed " + str(i + 1) + "\n")
+
+                # metrics = pd.DataFrame()
+                metrics_selective = pd.DataFrame({"sample_size": n,
+                                                  "regression_dim": p,
+                                                  "correlation": rho,
+                                                  "SNR": snr,
+                                                  "signal_type": beta_type,
+                                                  "risk": output[0],
+                                                  "coverage": output[6],
+                                                  "length": output[9],
+                                                  "power": output[12],
+                                                  "method": "Selective"}, index=[0])
+
+                metrics_Lee = pd.DataFrame({"sample_size": n,
+                                            "regression_dim": p,
+                                            "correlation": rho,
+                                            "SNR": snr,
+                                            "signal_type": beta_type,
+                                            "risk": output[5],
+                                            "coverage": output[7],
+                                            "length": output[10],
+                                            "power": output[13],
+                                            "method": "Lee"}, index=[0])
+
+                metrics_unad = pd.DataFrame({"sample_size": n,
+                                             "regression_dim": p,
+                                             "correlation": rho,
+                                             "SNR": snr,
+                                             "signal_type": beta_type,
+                                             "risk": output[5],
+                                             "coverage": output[8],
+                                             "length": output[11],
+                                             "power": output[14],
+                                             "method": "Naive"}, index=[0])
+
+                metrics = pd.DataFrame({"sample_size": n,
+                                        "regression_dim": p,
+                                        "correlation": rho,
+                                        "SNR": snr,
+                                        "signal_type": beta_type,
+                                        "Risk_selMLE": output[0],
+                                        "Risk_indest": output[1],
+                                        "Risk_LASSO_rand": output[2],
+                                        "Risk_relLASSO_rand": output[3],
+                                        "Risk_relLASSO_nonrand": output[4],
+                                        "Risk_LASSO_nonrand": output[5]}, index=[0])
+
+                df_master = df_master.append(metrics_selective, ignore_index=True)
+                df_master = df_master.append(metrics_Lee, ignore_index=True)
+                df_master = df_master.append(metrics_unad, ignore_index=True)
+                df_risk = df_risk.append(metrics, ignore_index=True)
+
+        elif target == "full":
+            if n > p:
+                full_dispersion = True
+            else:
+                full_dispersion = False
+            for i in range(ndraw):
+                output = comparison_risk_inference_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
                                                         randomizer_scale=np.sqrt(0.25), target=target,
-                                                        full_dispersion=True)
-
-            risk_selMLE += output[0]
-            risk_indest += output[1]
-            risk_LASSO_rand += output[2]
-            risk_relLASSO_rand += output[3]
-            risk_relLASSO_nonrand += output[4]
-            risk_LASSO_nonrand += output[5]
-
-            coverage_selMLE += output[6]
-            coverage_Lee += output[7]
-            coverage_unad += output[8]
-
-            length_sel += output[9]
-            length_Lee += output[10]
-            length_unad += output[11]
-
-            power_sel += output[12]
-            power_Lee += output[13]
-            power_unad += output[14]
-
-            sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
-            sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
-            sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
-            sys.stderr.write(
-                "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
-
-            sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-            sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
-
-            sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
-
-            sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
-
-            sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
-
-            sys.stderr.write("iteration completed " + str(i + 1) + "\n")
-
-    elif target == "full":
-        if n > p:
-            full_dispersion = True
-        else:
-            full_dispersion = False
-        for i in range(ndraw):
-            output = comparison_risk_inference_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
-                                                    randomizer_scale=np.sqrt(0.25), target=target,
-                                                    full_dispersion=full_dispersion)
-
-            risk_selMLE += output[0]
-            risk_indest += output[1]
-            risk_LASSO_rand += output[2]
-            risk_relLASSO_rand += output[3]
-            risk_relLASSO_nonrand += output[4]
-            risk_LASSO_nonrand += output[5]
-
-            coverage_selMLE += output[6]
-            coverage_unad += output[7]
+                                                        full_dispersion=full_dispersion)
 
-            length_sel += output[8]
-            length_unad += output[9]
+                risk_selMLE += output[0]
+                risk_indest += output[1]
+                risk_LASSO_rand += output[2]
+                risk_relLASSO_rand += output[3]
+                risk_relLASSO_nonrand += output[4]
+                risk_LASSO_nonrand += output[5]
 
-            power_sel += output[10]
-            power_unad += output[11]
+                coverage_selMLE += output[6]
+                coverage_unad += output[7]
 
-            sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
-            sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
-            sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
-            sys.stderr.write(
-                "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
+                length_sel += output[8]
+                length_unad += output[9]
 
-            sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-            sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+                power_sel += output[10]
+                power_unad += output[11]
 
-            sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+                sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
+                sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
+                sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+                sys.stderr.write(
+                    "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+                sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
+                sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+                sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
+                sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("iteration completed " + str(i + 1) + "\n")
+                sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+                sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
 
+                sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+                sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
 
+                sys.stderr.write("iteration completed " + str(i + 1) + "\n")
 
+    df_master.to_csv("/Users/snigdhapanigrahi/adjusted_MLE/results/metrics_selected_target_medium.csv", index=False)
+    df_risk.to_csv("/Users/snigdhapanigrahi/adjusted_MLE/results/risk_selected_target_medium.csv", index=False)
\ No newline at end of file

From d544fb47e5649eea0fb392be2d621e750cc8deaa Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 10 Apr 2018 00:17:27 -0700
Subject: [PATCH 561/617] tried to tune randomization scale

---
 .../tests/test_inferential_metrics.py         | 50 +++++++++++--------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 9013252ba..7b1f6518e 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -129,8 +129,11 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
 
         if full_dispersion:
             dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+        else:
+            dispersion = None
 
-        sigma_ = np.std(y)
+        #sigma_ = np.std(y)
+        sigma_ = np.sqrt(dispersion)
         LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
         soln = LASSO_py.fit()
         active_LASSO = (soln != 0)
@@ -138,28 +141,33 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
         glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
 
         const = highdim.gaussian
-        lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
+        num_seq = 25
+        lam_seq = sigma_* np.linspace(0.5, 3, num=num_seq) * \
                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        err = np.zeros(100)
-        for k in range(100):
-            W = lam_seq[k]
-            conv = const(X,
-                         y,
-                         W * np.ones(p),
-                         randomizer_scale=randomizer_scale * sigma_)
-            signs = conv.fit()
-            nonzero = signs != 0
-            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
-
-            full_estimate = np.zeros(p)
-            full_estimate[nonzero] = estimate
-            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
-
-        lam = lam_seq[np.argmin(err)]
+        scale_seq =  np.linspace(0.10, 0.60, num=10)
+        #lam_seq = np.sqrt(2 * np.log(p)) * sigma_* np.linspace(0.25, 2.75, num=100)
+        err = np.zeros((10, num_seq))
+        for m in range(10):
+            for k in range(num_seq):
+                W = lam_seq[k]
+                conv = const(X,
+                             y,
+                             W * np.ones(p),
+                             randomizer_scale=scale_seq[m] * sigma_)
+                signs = conv.fit()
+                nonzero = signs != 0
+                estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+
+                full_estimate = np.zeros(p)
+                full_estimate[nonzero] = estimate
+                err[m,k] =np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+
+        arg_min = np.argwhere(err == np.min(err))
+        lam = lam_seq[arg_min[0,1]]
+        randomizer_scale = scale_seq[arg_min[0,0]]
 
         # sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
-        #lam = np.sqrt(2 * np.log(p)) * sigma_
         randomized_lasso = const(X,
                                  y,
                                  lam*np.ones(p),
@@ -371,8 +379,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_Lee = 0.
     power_unad = 0.
 
-    target = "full"
-    n, p, rho, s, beta_type, snr = 200, 1000, 0.35, 10, 1, 0.3
+    target = "selected"
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
 
     if target == "selected":
         for i in range(ndraw):

From 38fb6093b5d58a7227590fb1e4af0c34d7ce5a9f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 10 Apr 2018 09:48:40 -0700
Subject: [PATCH 562/617] tolerance arguments for selective mle

---
 C-software                                   |   2 +-
 selection/randomized/lasso.py                |   3 +-
 selection/randomized/modelQ.py               | 105 ++++++++++++++++++-
 selection/randomized/query.py                |   9 +-
 selection/randomized/selective_MLE_utils.pyx |   5 +
 5 files changed, 116 insertions(+), 8 deletions(-)

diff --git a/C-software b/C-software
index aca77f1e3..b3acb5740 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit aca77f1e320dafba6041c4dc44cf9ffc049edec8
+Subproject commit b3acb57407e72605111423af2a4eb0e40cadffa7
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 348e4a397..16de3944d 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1662,7 +1662,7 @@ def selective_MLE(self,
                       level=0.9,
                       compute_intervals=False,
                       dispersion=None,
-                      solve_args={}):
+                      solve_args={'tol':1.e-12}):
         """
 
         Parameters
@@ -1758,6 +1758,7 @@ def selected_targets(self, features=None, dispersion=None):
         if dispersion is None: # use Pearson's X^2
             dispersion = ((y - self.loglike.saturated_loss.mean_function(Xfeat.dot(observed_target)))**2 / self._W).sum() / (n - Xfeat.shape[1])
 
+        print(dispersion, 'dispersion')
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     def full_targets(self, features=None, dispersion=None):
diff --git a/selection/randomized/modelQ.py b/selection/randomized/modelQ.py
index 9618c0be1..e194e6d54 100644
--- a/selection/randomized/modelQ.py
+++ b/selection/randomized/modelQ.py
@@ -309,7 +309,16 @@ def summary(self,
         if parameter is None:
             parameter = np.zeros(self.loss.shape[0])
 
-        observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
+        if target == 'selected':
+            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
+        else:
+            X, y = self.loglike.data
+            n, p = X.shape
+            if n > p and target == 'full':
+                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features, dispersion=dispersion)
+            else:
+                raise NotImplementedError
+                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features, dispersion=dispersion)
 
         if self._overall.sum() > 0:
             opt_sample = self.sampler.sample(ndraw,  burnin)
@@ -342,6 +351,59 @@ def summary(self,
             return [], [], []
 
 
+    def selective_MLE(self,
+                      target="selected",
+                      features=None,
+                      parameter=None,
+                      level=0.9,
+                      compute_intervals=False,
+                      dispersion=None,
+                      solve_args={'tol':1.e-12}):
+        """
+
+        Parameters
+        ----------
+
+        target : one of ['selected', 'full']
+
+        features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+
+        level : float
+            Confidence level.
+
+        ndraw : int (optional)
+            Defaults to 1000.
+
+        burnin : int (optional)
+            Defaults to 1000.
+
+        compute_intervals : bool
+            Compute confidence intervals?
+
+        dispersion : float (optional)
+            Use a known value for dispersion, or Pearson's X^2?
+
+        """
+
+        if parameter is None:
+            parameter = np.zeros(self.loss.shape[0])
+
+        observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features, dispersion=dispersion)
+
+        # working out conditional law of opt variables given
+        # target after decomposing score wrt target
+
+        return self.sampler.selective_MLE(observed_target, 
+                                          cov_target, 
+                                          cov_target_score, 
+                                          self.observed_opt_state,
+                                          solve_args=solve_args)
+
     def selected_targets(self, features=None, dispersion=None):
 
         X, y = self.X, self.y
@@ -353,12 +415,14 @@ def selected_targets(self, features=None, dispersion=None):
             noverall = active.sum() + unpenalized.sum()
             overall = active + unpenalized
 
+            Xfeat = X[:,overall]
             score_linear = self.score_transform[0]
             Q = -score_linear[overall]
-            cov_target = np.linalg.inv(Q)
+            Qi = np.linalg.inv(Q)
+            cov_target = Qi.dot(Xfeat.T.dot(Xfeat)).dot(Qi) # sandwich estimator
             observed_target = self._beta_full[overall]
             crosscov_target_score = score_linear.dot(cov_target)
-            Xfeat = X[:,overall]
+            print(cov_target[:5][:,:5])
             alternatives = [{1:'greater', -1:'less'}[int(s)] for s in self.selection_variable['sign'][active]] + ['twosided'] * unpenalized.sum()
 
         else:
@@ -372,13 +436,44 @@ def selected_targets(self, features=None, dispersion=None):
             Gfeat = self.loss.smooth_objective(self.initial_soln, 'grad')[features] - Xfeat.T.dot(y)
             Qfeat_inv = np.linalg.inv(Qfeat)
             one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat)
-            cov_target = Qfeat_inv
+            cov_target = Qfeat_inv.dot(Xfeat.T.dot(Xfeat)).dot(Qfeat_inv)
             _score_linear = -self.Q[features]
             crosscov_target_score = _score_linear.dot(cov_target)
             observed_target = one_step
             alternatives = ['twosided'] * features.sum()
 
         if dispersion is None: # use Pearson's X^2
-            dispersion = ((y - Xfeat.dot(observed_target))**2).sum() / (n - Xfeat.shape[1])
+            relaxed = np.linalg.pinv(Xfeat).dot(y)
+            dispersion = ((y - Xfeat.dot(relaxed))**2).sum() / (n - Xfeat.shape[1])
+        print(dispersion, 'dispersion')
 
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+
+    def full_targets(self, features=None, dispersion=None):
+
+        if features is None:
+            features = self._overall
+        features_bool = np.zeros(self._overall.shape, np.bool)
+        features_bool[features] = True
+        features = features_bool
+
+        X, y = self.loglike.data
+        n, p = X.shape
+
+        # target is one-step estimator
+
+        Qfull = self.Q
+        G = self.loss.smooth_objective(self.initial_soln, 'grad') - X.T.dot(y)
+        Qfull_inv = np.linalg.inv(Qfull)
+        one_step = self.initial_soln - Qfull_inv.dot(G)
+        cov_target = Qfull_inv[features][:,features]
+        observed_target = one_step[features]
+        crosscov_target_score = np.zeros((p, cov_target.shape[0]))
+        crosscov_target_score[features] = -np.identity(cov_target.shape[0])
+
+        if dispersion is None: # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step)))**2 / self._W).sum() / (n - p)
+
+        alternatives = ['twosided'] * features.sum()
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 92801be46..082a42b1a 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -475,7 +475,13 @@ def sample(self, ndraw, burnin):
                                        ndraw=ndraw,
                                        burnin=burnin)
 
-    def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_point, solve_args={}, alpha=0.1):
+    def selective_MLE(self, 
+                      observed_target, 
+                      cov_target, 
+                      cov_target_score, 
+                      feasible_point, 
+                      solve_args={'tol':1.e-12}, 
+                      alpha=0.1):
         """
         Selective MLE based on approximation of
         CGF.
@@ -495,6 +501,7 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
         conjugate_arg = prec_opt.dot(self.affine_con.mean)
 
         feasible_point = np.ones(prec_opt.shape[0])
+        print('solve', solve_args)
         val, soln, hess = solve_barrier_nonneg(conjugate_arg,
                                                prec_opt,
                                                feasible_point,
diff --git a/selection/randomized/selective_MLE_utils.pyx b/selection/randomized/selective_MLE_utils.pyx
index b5d2603d9..25c1be2de 100644
--- a/selection/randomized/selective_MLE_utils.pyx
+++ b/selection/randomized/selective_MLE_utils.pyx
@@ -17,6 +17,7 @@ cdef extern from "randomized_lasso.h":
                          double *scaling,                    # Diagonal scaling matrix for log barrier
                          int ndim,                           # Dimension of opt_variable
                          int max_iter,                       # Maximum number of iterations
+                         int min_iter,                       # Minimum number of iterations
                          double value_tol,                   # Tolerance for convergence based on value
                          double initial_step)                # Initial stepsize 
 
@@ -28,6 +29,7 @@ def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient v
                    np.ndarray[DTYPE_float_t, ndim=1] scaling,       # Diagonal scaling matrix for log barrier
                    double initial_step,
                    int max_iter=1000,
+                   int min_iter=50,
                    double value_tol=1.e-8):
    
     ndim = precision.shape[0]
@@ -40,6 +42,7 @@ def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient v
                            <double *>scaling.data,
                            ndim,
                            max_iter,
+                           min_iter,
                            value_tol,
                            initial_step)
 
@@ -52,6 +55,7 @@ def solve_barrier_nonneg(conjugate_arg,
                          feasible_point,
                          step=1,
                          max_iter=1000,
+         		 min_iter=50,
                          tol=1.e-8):
 
     gradient = np.zeros_like(conjugate_arg)
@@ -67,4 +71,5 @@ def solve_barrier_nonneg(conjugate_arg,
                           scaling,
                           step,
                           max_iter=max_iter,
+                          min_iter=min_iter,
                           value_tol=tol)

From aac8dd0be89ab6d4bb0d296ea93d29ac402404e9 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 10 Apr 2018 22:32:21 -0700
Subject: [PATCH 563/617] scale of LASSO in python made to agree with glmnet

---
 .../tests/test_inferential_metrics.py         | 85 ++++++++++---------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 7b1f6518e..0112bc377 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -1,4 +1,5 @@
 import numpy as np, sys
+
 from rpy2 import robjects
 import rpy2.robjects.numpy2ri
 rpy2.robjects.numpy2ri.activate()
@@ -15,8 +16,8 @@ def glmnet_lasso(X, y, lambda_val):
                 X = as.matrix(X)
                 lam = as.matrix(lambda)[1,1]
                 n = nrow(X)
-                fit = glmnet(X, y, standardize=TRUE, intercept=FALSE)
-                estimate = coef(fit, s=lam)[-1]
+                fit = glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10)
+                estimate = coef(fit, s=lam, exact=TRUE, x=X, y=y)[-1]
                 return(list(estimate = estimate))
                 }''')
 
@@ -59,21 +60,22 @@ def tuned_lasso(X, y, X_val,y_val):
         beta.hat.lasso = as.matrix(coef(LASSO))
         min.lam = min(rel.LASSO$lambda)
         max.lam = max(rel.LASSO$lambda)
-        #print(paste("max and min values of lambda", max.lam, min.lam))
+
         lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
+  
         muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
         muhat.val.lasso = as.matrix(predict(LASSO, X.val))
         err.val.rellasso = colMeans((muhat.val.rellasso - Y.val)^2)
         err.val.lasso = colMeans((muhat.val.lasso - Y.val)^2)
+
         opt_lam = ceiling(which.min(err.val.rellasso)/10)
         lambda.tuned.rellasso = lam.seq[opt_lam]
         lambda.tuned.lasso = lam.seq[which.min(err.val.lasso)]
         fit = glmnet(X, Y, standardize=TRUE, intercept=TRUE)
-        estimate.tuned = coef(fit, s=lambda.tuned.lasso)[-1]
-        #print(paste("compare estimates", max(abs(estimate.tuned-(beta.hat.lasso[,which.min(err.val.lasso)])[-1])),
-        #length(which(estimate.tuned!=0)), length(which((beta.hat.lasso[,which.min(err.val.lasso)])[-1]!=0))))
+        estimate.tuned = coef(fit, s=lambda.tuned.lasso, exact=TRUE, x=X, y=Y)[-1]
+        beta.hat.lasso = (beta.hat.lasso[,which.min(err.val.lasso)])[-1]
         return(list(beta.hat.rellasso = (beta.hat.rellasso[,which.min(err.val.rellasso)])[-1],
-        beta.hat.lasso = (beta.hat.lasso[,which.min(err.val.lasso)])[-1],
+        beta.hat.lasso = beta.hat.lasso,
         lambda.tuned.rellasso = lambda.tuned.rellasso, lambda.tuned.lasso= lambda.tuned.lasso,
         lambda.seq = lam.seq))
         }''')
@@ -90,8 +92,8 @@ def tuned_lasso(X, y, X_val,y_val):
     tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
     estimator_rellasso = np.array(tuned_est.rx2('beta.hat.rellasso'))
     estimator_lasso = np.array(tuned_est.rx2('beta.hat.lasso'))
-    lam_tuned_rellasso = np.array(tuned_est.rx2('lambda.tuned.rellasso'))
-    lam_tuned_lasso = np.array(tuned_est.rx2('lambda.tuned.lasso'))
+    lam_tuned_rellasso = np.asscalar(np.array(tuned_est.rx2('lambda.tuned.rellasso')))
+    lam_tuned_lasso = np.asscalar(np.array(tuned_est.rx2('lambda.tuned.lasso')))
     lam_seq = np.array(tuned_est.rx2('lambda.seq'))
     return estimator_rellasso, estimator_lasso, lam_tuned_rellasso, lam_tuned_lasso, lam_seq
 
@@ -134,8 +136,8 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
 
         #sigma_ = np.std(y)
         sigma_ = np.sqrt(dispersion)
-        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
-        soln = LASSO_py.fit()
+        LASSO_py = lasso.gaussian(X, y, np.sqrt(n-1) * lam_tuned_lasso, np.asscalar(sigma_))
+        soln = LASSO_py.fit(solve_args={'min_its':500})
         active_LASSO = (soln != 0)
         nactive_LASSO = active_LASSO.sum()
         glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
@@ -246,8 +248,8 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
                power_unad/float((beta != 0).sum())
 
 
-def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2, snr=0.2,
-                                   randomizer_scale=np.sqrt(0.25), target = "full",
+def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2,
+                                   snr=0.2, randomizer_scale=0.5, target = "full",
                                    full_dispersion = True):
 
     while True:
@@ -257,54 +259,55 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
         active_nonrand = (est_LASSO != 0)
         nactive_nonrand = active_nonrand.sum()
 
-        _std = X.std(0)
         X -= X.mean(0)[None, :]
-        X /= (X.std(0)[None, :] * np.sqrt(n))
+        X /= (X.std(0)[None, :] * np.sqrt(n/(n-1.)))
         X_val -= X_val.mean(0)[None, :]
-        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(n/(n-1.)))
 
         y = y - y.mean()
         y_val = y_val - y_val.mean()
 
-        sigma_ = np.std(y)
-        print("true and estimated sigma", sigma, sigma_)
-
         if full_dispersion:
             dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
         else:
             dispersion = None
 
-        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
+        sigma_ = np.sqrt(dispersion)
+        print("full estimated and true sigma", sigma, sigma_)
+
+        LASSO_py = lasso.gaussian(X, y, n * lam_tuned_lasso, sigma_)
         soln = LASSO_py.fit()
         active_LASSO = (soln != 0)
         nactive_LASSO = active_LASSO.sum()
-        glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
+        glm_LASSO = glmnet_lasso(X, y, lam_tuned_lasso)
 
-        const = highdim.gaussian
-        lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
+        lam_seq = sigma_ * np.linspace(0.25, 2.75, num=100) * \
                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
         err = np.zeros(100)
         for k in range(100):
             W = lam_seq[k]*np.ones(p)
-            conv = const(X,
-                         y,
-                         W,
-                         randomizer_scale=randomizer_scale * sigma_)
+            conv = highdim.gaussian(X,
+                                    y,
+                                    W,
+                                    randomizer_scale=np.sqrt(n) * 
+                                    randomizer_scale * sigma_)
             signs = conv.fit()
             nonzero = signs != 0
             estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
 
             full_estimate = np.zeros(p)
             full_estimate[nonzero] = estimate
+            #err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
             err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
-        #lam = np.sqrt(2 * np.log(p)) * sigma_
-        randomized_lasso = const(X,
-                                 y,
-                                 lam*np.ones(p),
-                                 randomizer_scale=randomizer_scale * sigma_)
+        #print(lam_tuned_lasso * n, lam, lam_seq)
+
+        randomized_lasso = highdim.gaussian(X,
+                                            y,
+                                            lam * np.ones(p),
+                                            randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
 
         signs = randomized_lasso.fit()
         nonzero = signs != 0
@@ -321,13 +324,13 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             sel_MLE = np.zeros(p)
             estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
                                                                                                          dispersion=dispersion)
-            sel_MLE[nonzero] = estimate / np.sqrt(n)
+            sel_MLE[nonzero] = estimate
             ind_estimator = np.zeros(p)
-            ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
+            ind_estimator[nonzero] = ind_unbiased_estimator 
 
-            beta_target_rand = np.sqrt(n)* _std[nonzero] * beta[nonzero]
-            beta_target_nonrand_py = np.sqrt(n)* _std[active_LASSO] * beta[active_LASSO]
-            beta_target_nonrand = np.sqrt(n)* _std[active_nonrand] * beta[active_nonrand]
+            beta_target_rand = beta[nonzero]
+            beta_target_nonrand_py = beta[active_LASSO]
+            beta_target_nonrand = beta[active_nonrand]
 
             post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
             unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
@@ -344,8 +347,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     if True:
         return relative_risk(sel_MLE, beta, Sigma), \
                relative_risk(ind_estimator, beta, Sigma), \
-               relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
-               relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
+               relative_risk(randomized_lasso.initial_soln , beta, Sigma), \
+               relative_risk(randomized_lasso._beta_full, beta, Sigma), \
                relative_risk(rel_LASSO, beta, Sigma), \
                relative_risk(est_LASSO, beta, Sigma), \
                cov_sel,\
@@ -379,7 +382,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_Lee = 0.
     power_unad = 0.
 
-    target = "selected"
+    target = "full"
     n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
 
     if target == "selected":
@@ -474,4 +477,4 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
             sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("iteration completed " + str(i + 1) + "\n")
\ No newline at end of file
+            sys.stderr.write("iteration completed " + str(i + 1) + "\n")

From 89561e127d0b876e16e20392eb6046609737e144 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 10 Apr 2018 23:04:21 -0700
Subject: [PATCH 564/617] corrected scales and reduced length of grid search
 for tuning randomized LASSO for selected targets

---
 .../tests/test_inferential_metrics.py         | 87 +++++++++----------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 0112bc377..831357526 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -116,15 +116,15 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
                                                         s=s, beta_type=beta_type, snr=snr)
+        true_mean = X.dot(beta)
         rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
         active_nonrand = (est_LASSO != 0)
         nactive_nonrand = active_nonrand.sum()
-        true_mean = X.dot(beta)
 
         X -= X.mean(0)[None, :]
-        X /= (X.std(0)[None, :] * np.sqrt(n))
+        X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
         X_val -= X_val.mean(0)[None, :]
-        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
+        X_val /= (X_val.std(0)[None, :] * np.sqrt(n / (n - 1.)))
 
         y = y - y.mean()
         y_val = y_val - y_val.mean()
@@ -134,49 +134,47 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
         else:
             dispersion = None
 
-        #sigma_ = np.std(y)
         sigma_ = np.sqrt(dispersion)
-        LASSO_py = lasso.gaussian(X, y, np.sqrt(n-1) * lam_tuned_lasso, np.asscalar(sigma_))
-        soln = LASSO_py.fit(solve_args={'min_its':500})
+        print("estimated and true sigma", sigma, sigma_)
+
+        LASSO_py = lasso.gaussian(X, y, n * lam_tuned_lasso, sigma_)
+        soln = LASSO_py.fit()
         active_LASSO = (soln != 0)
         nactive_LASSO = active_LASSO.sum()
-        glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
+        glm_LASSO = glmnet_lasso(X, y, lam_tuned_lasso)
 
-        const = highdim.gaussian
-        num_seq = 25
-        lam_seq = sigma_* np.linspace(0.5, 3, num=num_seq) * \
+        tune_num = 50
+        lam_seq = sigma_ * np.linspace(0.25, 2.75, num=tune_num) * \
                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        scale_seq =  np.linspace(0.10, 0.60, num=10)
-        #lam_seq = np.sqrt(2 * np.log(p)) * sigma_* np.linspace(0.25, 2.75, num=100)
-        err = np.zeros((10, num_seq))
-        for m in range(10):
-            for k in range(num_seq):
-                W = lam_seq[k]
-                conv = const(X,
-                             y,
-                             W * np.ones(p),
-                             randomizer_scale=scale_seq[m] * sigma_)
-                signs = conv.fit()
-                nonzero = signs != 0
-                estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
-
-                full_estimate = np.zeros(p)
-                full_estimate[nonzero] = estimate
-                err[m,k] =np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
-
-        arg_min = np.argwhere(err == np.min(err))
-        lam = lam_seq[arg_min[0,1]]
-        randomizer_scale = scale_seq[arg_min[0,0]]
-
-        # sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
+        err = np.zeros(tune_num)
+        for k in range(tune_num):
+            W = lam_seq[k] * np.ones(p)
+            conv = highdim.gaussian(X,
+                                    y,
+                                    W,
+                                    randomizer_scale=np.sqrt(n) *
+                                                     randomizer_scale * sigma_)
+            signs = conv.fit()
+            nonzero = signs != 0
+            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+
+            full_estimate = np.zeros(p)
+            full_estimate[nonzero] = estimate
+            # err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
+            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+
+        lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
-        randomized_lasso = const(X,
-                                 y,
-                                 lam*np.ones(p),
-                                 randomizer_scale=randomizer_scale * sigma_)
+        # print(lam_tuned_lasso * n, lam, lam_seq)
+
+        randomized_lasso = highdim.gaussian(X,
+                                            y,
+                                            lam * np.ones(p),
+                                            randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
 
         signs = randomized_lasso.fit()
         nonzero = signs != 0
+
         sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
         sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
         sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
@@ -273,7 +271,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             dispersion = None
 
         sigma_ = np.sqrt(dispersion)
-        print("full estimated and true sigma", sigma, sigma_)
+        print("estimated and true sigma", sigma, sigma_)
 
         LASSO_py = lasso.gaussian(X, y, n * lam_tuned_lasso, sigma_)
         soln = LASSO_py.fit()
@@ -281,10 +279,11 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
         nactive_LASSO = active_LASSO.sum()
         glm_LASSO = glmnet_lasso(X, y, lam_tuned_lasso)
 
-        lam_seq = sigma_ * np.linspace(0.25, 2.75, num=100) * \
+        tune_num = 50
+        lam_seq = sigma_ * np.linspace(0.25, 2.75, num=tune_num) * \
                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        err = np.zeros(100)
-        for k in range(100):
+        err = np.zeros(tune_num)
+        for k in range(tune_num):
             W = lam_seq[k]*np.ones(p)
             conv = highdim.gaussian(X,
                                     y,
@@ -382,13 +381,13 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_Lee = 0.
     power_unad = 0.
 
-    target = "full"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
+    target = "selected"
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.20
 
     if target == "selected":
         for i in range(ndraw):
             output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
-                                                        randomizer_scale=np.sqrt(0.25), target=target,
+                                                        randomizer_scale=np.sqrt(0.5), target=target,
                                                         full_dispersion=True)
 
             risk_selMLE += output[0]

From 6f000135cd4b075a41e2b603d99edf581ebbff74 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 00:28:29 -0700
Subject: [PATCH 565/617] corrected scales in selected target

---
 .../adjusted_MLE/tests/test_inferential_metrics.py     | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 831357526..fd904c244 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -188,9 +188,9 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
             sel_MLE = np.zeros(p)
             estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
                                                                                                          dispersion=dispersion)
-            sel_MLE[nonzero] = estimate / np.sqrt(n)
+            sel_MLE[nonzero] = estimate
             ind_estimator = np.zeros(p)
-            ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
+            ind_estimator[nonzero] = ind_unbiased_estimator
 
             beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
             beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
@@ -231,8 +231,8 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
     if True:
         return relative_risk(sel_MLE, beta, Sigma), \
                relative_risk(ind_estimator, beta, Sigma), \
-               relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
-               relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
+               relative_risk(randomized_lasso.initial_soln, beta, Sigma), \
+               relative_risk(randomized_lasso._beta_full, beta, Sigma), \
                relative_risk(rel_LASSO, beta, Sigma), \
                relative_risk(est_LASSO, beta, Sigma), \
                cov_sel,\
@@ -382,7 +382,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_unad = 0.
 
     target = "selected"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.20
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
 
     if target == "selected":
         for i in range(ndraw):

From 680ee7c35a769447c69cda046a243514ada33463 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 00:59:45 -0700
Subject: [PATCH 566/617] randomized LASSO est seems comparable in risk to
 tuned estimators when p>n

---
 .../adjusted_MLE/tests/test_inferential_metrics.py    | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index fd904c244..ecd19130f 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -267,10 +267,11 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
         if full_dispersion:
             dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+            sigma_ = np.sqrt(dispersion)
         else:
             dispersion = None
+            sigma_ = np.std(y)
 
-        sigma_ = np.sqrt(dispersion)
         print("estimated and true sigma", sigma, sigma_)
 
         LASSO_py = lasso.gaussian(X, y, n * lam_tuned_lasso, sigma_)
@@ -296,8 +297,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
             full_estimate = np.zeros(p)
             full_estimate[nonzero] = estimate
-            #err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
-            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+            err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
+            #err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
@@ -381,8 +382,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_Lee = 0.
     power_unad = 0.
 
-    target = "selected"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
+    target = "full"
+    n, p, rho, s, beta_type, snr = 200, 1000, 0.35, 5, 1, 0.20
 
     if target == "selected":
         for i in range(ndraw):

From 1e4bf658c6b3f84a87cd745fdf321c762034e419 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 13:18:17 -0700
Subject: [PATCH 567/617] added function fixedLassoInf from R

---
 .../tests/test_inferential_metrics.py         | 32 +++++++++++++++++--
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index ecd19130f..28373d896 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -9,6 +9,31 @@
 from selection.algorithms.lasso import lasso
 from scipy.stats import norm as ndist
 
+def selInf_R(X, y, beta, lam, sigma, alpha=0.1):
+    robjects.r('''
+               library("selectiveInference")
+               selInf = function(X, y, beta, lam, sigma, alpha= 0.1){
+               y = as.matrix(y)
+               X = as.matrix(X)
+               beta = as.matrix(beta)
+               lam = as.matrix(lam)[1,1]
+               sigma = as.matrix(sigma)[1,1]
+               inf = fixedLassoInf(x = X, y = y, beta = beta, lambda=lam, family = "gaussian",
+                                   intercept=FALSE, sigma=sigma, alpha=alpha,type="full")
+               print(paste("test",inf$ci))
+               return(list(ci = inf$ci))}
+               ''')
+
+    inf_R = robjects.globalenv['selInf']
+    n, p = X.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
+    r_beta = robjects.r.matrix(y, nrow=p, ncol=1)
+    r_lam = robjects.r.matrix(lam, nrow=1, ncol=1)
+    r_sigma = robjects.r.matrix(lam, nrow=1, ncol=1)
+    ci = np.array(inf_R(r_X, r_y, r_beta, r_lam, r_sigma).rx2('ci'))
+    return ci
+
 def glmnet_lasso(X, y, lambda_val):
     robjects.r('''
                 glmnet_LASSO = function(X,y,lambda){
@@ -279,6 +304,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
         active_LASSO = (soln != 0)
         nactive_LASSO = active_LASSO.sum()
         glm_LASSO = glmnet_lasso(X, y, lam_tuned_lasso)
+        sel_inf = selInf_R(X, y, glm_LASSO, lam_tuned_lasso, sigma_, alpha=0.1)
 
         tune_num = 50
         lam_seq = sigma_ * np.linspace(0.25, 2.75, num=tune_num) * \
@@ -297,8 +323,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
             full_estimate = np.zeros(p)
             full_estimate[nonzero] = estimate
-            err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
-            #err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+            #err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
+            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
@@ -383,7 +409,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_unad = 0.
 
     target = "full"
-    n, p, rho, s, beta_type, snr = 200, 1000, 0.35, 5, 1, 0.20
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
 
     if target == "selected":
         for i in range(ndraw):

From 60900c4c6786a979ff777bbcc0cde9266d75c7f5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 14:14:19 -0700
Subject: [PATCH 568/617] corrected arguments that are passed to fixedLassoInf

---
 .../adjusted_MLE/tests/test_inferential_metrics.py    | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 28373d896..82711b65e 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -19,7 +19,7 @@ def selInf_R(X, y, beta, lam, sigma, alpha=0.1):
                lam = as.matrix(lam)[1,1]
                sigma = as.matrix(sigma)[1,1]
                inf = fixedLassoInf(x = X, y = y, beta = beta, lambda=lam, family = "gaussian",
-                                   intercept=FALSE, sigma=sigma, alpha=alpha,type="full")
+                                   intercept=FALSE, sigma=sigma, alpha=alpha, type="full")
                print(paste("test",inf$ci))
                return(list(ci = inf$ci))}
                ''')
@@ -28,9 +28,9 @@ def selInf_R(X, y, beta, lam, sigma, alpha=0.1):
     n, p = X.shape
     r_X = robjects.r.matrix(X, nrow=n, ncol=p)
     r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-    r_beta = robjects.r.matrix(y, nrow=p, ncol=1)
+    r_beta = robjects.r.matrix(beta, nrow=p, ncol=1)
     r_lam = robjects.r.matrix(lam, nrow=1, ncol=1)
-    r_sigma = robjects.r.matrix(lam, nrow=1, ncol=1)
+    r_sigma = robjects.r.matrix(sigma, nrow=1, ncol=1)
     ci = np.array(inf_R(r_X, r_y, r_beta, r_lam, r_sigma).rx2('ci'))
     return ci
 
@@ -304,7 +304,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
         active_LASSO = (soln != 0)
         nactive_LASSO = active_LASSO.sum()
         glm_LASSO = glmnet_lasso(X, y, lam_tuned_lasso)
-        sel_inf = selInf_R(X, y, glm_LASSO, lam_tuned_lasso, sigma_, alpha=0.1)
+        print("shape", glm_LASSO.shape, glm_LASSO)
+        sel_inf = selInf_R(X, y, glm_LASSO, n * lam_tuned_lasso, sigma_, alpha=0.1)
 
         tune_num = 50
         lam_seq = sigma_ * np.linspace(0.25, 2.75, num=tune_num) * \
@@ -409,7 +410,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_unad = 0.
 
     target = "full"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 1.10
 
     if target == "selected":
         for i in range(ndraw):

From 7ad53406a5ef60f5e38f43721d59f462a14610f2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 15:27:24 -0700
Subject: [PATCH 569/617] calling R package for Lee inference, also for
 selected targets

---
 .../tests/test_inferential_metrics.py         | 84 +++++++++++--------
 1 file changed, 49 insertions(+), 35 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 82711b65e..dfb7a3db3 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -9,19 +9,23 @@
 from selection.algorithms.lasso import lasso
 from scipy.stats import norm as ndist
 
-def selInf_R(X, y, beta, lam, sigma, alpha=0.1):
+def selInf_R(X, y, beta, lam, sigma, Type, alpha=0.1):
     robjects.r('''
                library("selectiveInference")
-               selInf = function(X, y, beta, lam, sigma, alpha= 0.1){
+               selInf = function(X, y, beta, lam, sigma, Type, alpha= 0.1){
                y = as.matrix(y)
                X = as.matrix(X)
                beta = as.matrix(beta)
                lam = as.matrix(lam)[1,1]
                sigma = as.matrix(sigma)[1,1]
+               Type = as.matrix(Type)[1,1]
+               if(Type == 1){
+                   type = "full"} else{
+                   type = "partial"}
                inf = fixedLassoInf(x = X, y = y, beta = beta, lambda=lam, family = "gaussian",
-                                   intercept=FALSE, sigma=sigma, alpha=alpha, type="full")
-               print(paste("test",inf$ci))
-               return(list(ci = inf$ci))}
+                                   intercept=FALSE, sigma=sigma, alpha=alpha, type=type)
+               #print(inf$ci)
+               return(list(ci = inf$ci, pvalue = inf$pv))}
                ''')
 
     inf_R = robjects.globalenv['selInf']
@@ -31,8 +35,11 @@ def selInf_R(X, y, beta, lam, sigma, alpha=0.1):
     r_beta = robjects.r.matrix(beta, nrow=p, ncol=1)
     r_lam = robjects.r.matrix(lam, nrow=1, ncol=1)
     r_sigma = robjects.r.matrix(sigma, nrow=1, ncol=1)
-    ci = np.array(inf_R(r_X, r_y, r_beta, r_lam, r_sigma).rx2('ci'))
-    return ci
+    r_Type = robjects.r.matrix(Type, nrow=1, ncol=1)
+    output = inf_R(r_X, r_y, r_beta, r_lam, r_sigma, r_Type)
+    ci = np.array(output.rx2('ci'))
+    pvalue = np.array(output.rx2('pvalue'))
+    return ci, pvalue
 
 def glmnet_lasso(X, y, lambda_val):
     robjects.r('''
@@ -162,11 +169,13 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
         sigma_ = np.sqrt(dispersion)
         print("estimated and true sigma", sigma, sigma_)
 
-        LASSO_py = lasso.gaussian(X, y, n * lam_tuned_lasso, sigma_)
-        soln = LASSO_py.fit()
-        active_LASSO = (soln != 0)
-        nactive_LASSO = active_LASSO.sum()
+        #LASSO_py = lasso.gaussian(X, y, n * lam_tuned_lasso, sigma_)
+        #soln = LASSO_py.fit()
+        #active_LASSO = (soln != 0)
+        #nactive_LASSO = active_LASSO.sum()
         glm_LASSO = glmnet_lasso(X, y, lam_tuned_lasso)
+        active_LASSO = (glm_LASSO != 0)
+        nactive_LASSO = active_LASSO.sum()
 
         tune_num = 50
         lam_seq = sigma_ * np.linspace(0.25, 2.75, num=tune_num) * \
@@ -206,9 +215,10 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
         sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
 
         if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
-            Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-            Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
-            Lee_pval = np.asarray(Lee['pval'])
+            # Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
+            # Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
+            # Lee_pval = np.asarray(Lee['pval'])
+            Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_tuned_lasso, sigma_, Type=0, alpha=0.1)
 
             sel_MLE = np.zeros(p)
             estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
@@ -245,6 +255,7 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
                 active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
 
             cov_sel, _ = coverage(sel_intervals, sel_pval, beta_target_rand)
+            print("check shapes", Lee_pval.shape, beta_target_nonrand_py.shape)
             cov_Lee, _ = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
             cov_unad, _ = coverage(unad_intervals, unad_pval, beta_target_nonrand)
 
@@ -299,13 +310,9 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
         print("estimated and true sigma", sigma, sigma_)
 
-        LASSO_py = lasso.gaussian(X, y, n * lam_tuned_lasso, sigma_)
-        soln = LASSO_py.fit()
-        active_LASSO = (soln != 0)
-        nactive_LASSO = active_LASSO.sum()
         glm_LASSO = glmnet_lasso(X, y, lam_tuned_lasso)
-        print("shape", glm_LASSO.shape, glm_LASSO)
-        sel_inf = selInf_R(X, y, glm_LASSO, n * lam_tuned_lasso, sigma_, alpha=0.1)
+        active_LASSO = (glm_LASSO != 0)
+        nactive_LASSO = active_LASSO.sum()
 
         tune_num = 50
         lam_seq = sigma_ * np.linspace(0.25, 2.75, num=tune_num) * \
@@ -344,9 +351,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
         sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
 
         if nonzero.sum()>0 and nactive_nonrand>0 and nonzero.sum()<50:
-            # Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-            # Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
-            # Lee_pval = np.asarray(Lee['pval'])
+            Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_tuned_lasso, sigma_, Type=1, alpha=0.1)
 
             sel_MLE = np.zeros(p)
             estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
@@ -367,7 +372,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             unad_pval = ndist.cdf(post_LASSO_OLS/unad_sd)
 
             cov_sel, power_sel = coverage(sel_intervals, sel_pval, beta_target_rand)
-            #cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
+            cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
             cov_unad, power_unad = coverage(unad_intervals, unad_pval, beta_target_nonrand)
             break
 
@@ -378,11 +383,14 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
                relative_risk(randomized_lasso._beta_full, beta, Sigma), \
                relative_risk(rel_LASSO, beta, Sigma), \
                relative_risk(est_LASSO, beta, Sigma), \
-               cov_sel,\
+               cov_sel, \
+               cov_Lee,\
                cov_unad,\
-               (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
-               (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
-               power_sel/float((beta != 0).sum()),  \
+               np.mean(sel_intervals[:, 1] - sel_intervals[:, 0]), \
+               np.mean(Lee_intervals[:, 1] - Lee_intervals[:, 0]),\
+               np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]),\
+               power_sel/float((beta != 0).sum()),\
+               power_Lee/float((beta != 0).sum()),\
                power_unad/float((beta != 0).sum())
 
 if __name__ == "__main__":
@@ -409,8 +417,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_Lee = 0.
     power_unad = 0.
 
-    target = "full"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 1.10
+    target = "selected"
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
 
     if target == "selected":
         for i in range(ndraw):
@@ -478,13 +486,16 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             risk_LASSO_nonrand += output[5]
 
             coverage_selMLE += output[6]
-            coverage_unad += output[7]
+            coverage_Lee += output[7]
+            coverage_unad += output[8]
 
-            length_sel += output[8]
-            length_unad += output[9]
+            length_sel += output[9]
+            length_Lee += output[10]
+            length_unad += output[11]
 
-            power_sel += output[10]
-            power_unad += output[11]
+            power_sel += output[12]
+            power_Lee += output[13]
+            power_unad += output[14]
 
             sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
             sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
@@ -496,12 +507,15 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
 
             sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n")
             sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
 
             sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
             sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
 
             sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
             sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
 
             sys.stderr.write("iteration completed " + str(i + 1) + "\n")

From 88be59658dba03cfef1e716acc123a1d995e097e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 15:50:50 -0700
Subject: [PATCH 570/617] add power post BH filter

---
 .../tests/test_inferential_metrics.py         | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index dfb7a3db3..7ed2b5d58 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -9,6 +9,16 @@
 from selection.algorithms.lasso import lasso
 from scipy.stats import norm as ndist
 
+def BHfilter(pval, q=0.2):
+    robjects.r.assign('pval', pval)
+    robjects.r.assign('q', q)
+    robjects.r('Pval = p.adjust(pval, method="BH")')
+    robjects.r('S = which((Pval < q)) - 1')
+    S = robjects.r('S')
+    ind = np.zeros(pval.shape[0], np.bool)
+    ind[np.asarray(S, np.int)] = 1
+    return ind
+
 def selInf_R(X, y, beta, lam, sigma, Type, alpha=0.1):
     robjects.r('''
                library("selectiveInference")
@@ -262,6 +272,18 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
             power_sel = ((active_rand_bool)*(np.logical_or((0. < sel_intervals[:, 0]),(0. > sel_intervals[:,1])))).sum()
             power_Lee = ((active_LASSO_bool)*(np.logical_or((0. < Lee_intervals[:, 0]),(0. > Lee_intervals[:,1])))).sum()
             power_unad = ((active_nonrand_bool)*(np.logical_or((0. < unad_intervals[:, 0]),(0. > unad_intervals[:,1])))).sum()
+
+            sel_discoveries = BHfilter(sel_pval, q=0.2)
+            Lee_discoveries = BHfilter(Lee_pval, q=0.2)
+            unad_discoveries = BHfilter(unad_pval, q=0.2)
+
+            power_sel_dis = (sel_discoveries * active_rand_bool).sum()/float((beta != 0).sum())
+            power_Lee_dis = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
+            power_unad_dis = (unad_discoveries * active_nonrand_bool).sum() / float((beta != 0).sum())
+
+            fdr_sel_dis = (sel_discoveries * ~active_rand_bool).sum()/max(sel_discoveries.sum(), 1.)
+            fdr_Lee_dis = (Lee_discoveries * ~active_LASSO_bool).sum() / max(Lee_discoveries.sum(), 1.)
+            fdr_sel_dis = (unad_discoveries * ~active_nonrand_bool).sum() / max(unad_discoveries.sum(), 1.)
             break
 
     if True:

From b010ce19e6aa42f6e27acbc17c91b5a37860e862 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 20:16:03 -0700
Subject: [PATCH 571/617] added fdr and power post passing p-values through BH
 sieve

---
 .../tests/test_inferential_metrics.py         | 107 +++++++++++++++---
 1 file changed, 91 insertions(+), 16 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 7ed2b5d58..c9210ff72 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -179,10 +179,6 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
         sigma_ = np.sqrt(dispersion)
         print("estimated and true sigma", sigma, sigma_)
 
-        #LASSO_py = lasso.gaussian(X, y, n * lam_tuned_lasso, sigma_)
-        #soln = LASSO_py.fit()
-        #active_LASSO = (soln != 0)
-        #nactive_LASSO = active_LASSO.sum()
         glm_LASSO = glmnet_lasso(X, y, lam_tuned_lasso)
         active_LASSO = (glm_LASSO != 0)
         nactive_LASSO = active_LASSO.sum()
@@ -265,7 +261,7 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
                 active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
 
             cov_sel, _ = coverage(sel_intervals, sel_pval, beta_target_rand)
-            print("check shapes", Lee_pval.shape, beta_target_nonrand_py.shape)
+            print("check shapes", Lee_pval.shape, beta_target_nonrand_py.shape, Lee_pval)
             cov_Lee, _ = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
             cov_unad, _ = coverage(unad_intervals, unad_pval, beta_target_nonrand)
 
@@ -273,17 +269,17 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
             power_Lee = ((active_LASSO_bool)*(np.logical_or((0. < Lee_intervals[:, 0]),(0. > Lee_intervals[:,1])))).sum()
             power_unad = ((active_nonrand_bool)*(np.logical_or((0. < unad_intervals[:, 0]),(0. > unad_intervals[:,1])))).sum()
 
-            sel_discoveries = BHfilter(sel_pval, q=0.2)
-            Lee_discoveries = BHfilter(Lee_pval, q=0.2)
-            unad_discoveries = BHfilter(unad_pval, q=0.2)
+            sel_discoveries = BHfilter(sel_pval, q=0.1)
+            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
+            unad_discoveries = BHfilter(unad_pval, q=0.1)
 
             power_sel_dis = (sel_discoveries * active_rand_bool).sum()/float((beta != 0).sum())
             power_Lee_dis = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
             power_unad_dis = (unad_discoveries * active_nonrand_bool).sum() / float((beta != 0).sum())
 
-            fdr_sel_dis = (sel_discoveries * ~active_rand_bool).sum()/max(sel_discoveries.sum(), 1.)
-            fdr_Lee_dis = (Lee_discoveries * ~active_LASSO_bool).sum() / max(Lee_discoveries.sum(), 1.)
-            fdr_sel_dis = (unad_discoveries * ~active_nonrand_bool).sum() / max(unad_discoveries.sum(), 1.)
+            fdr_sel_dis = (sel_discoveries * ~active_rand_bool).sum() /float(max(sel_discoveries.sum(), 1.))
+            fdr_Lee_dis = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
+            fdr_unad_dis = (unad_discoveries * ~active_nonrand_bool).sum() / float(max(unad_discoveries.sum(), 1.))
             break
 
     if True:
@@ -301,7 +297,13 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
                np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]), \
                power_sel/float((beta != 0).sum()), \
                power_Lee/float((beta != 0).sum()), \
-               power_unad/float((beta != 0).sum())
+               power_unad/float((beta != 0).sum()), \
+               power_sel_dis, \
+               power_Lee_dis, \
+               power_unad_dis, \
+               fdr_sel_dis, \
+               fdr_Lee_dis, \
+               fdr_unad_dis
 
 
 def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2,
@@ -353,8 +355,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
             full_estimate = np.zeros(p)
             full_estimate[nonzero] = estimate
-            #err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
-            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+            err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
+            #err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
@@ -393,9 +395,38 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
                                         post_LASSO_OLS + 1.65 * unad_sd]).T
             unad_pval = ndist.cdf(post_LASSO_OLS/unad_sd)
 
+            true_signals = np.zeros(p, np.bool)
+            true_signals[beta != 0] = 1
+            true_set = np.asarray([u for u in range(p) if true_signals[u]])
+            active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+            active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
+            active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+
+            active_rand_bool = np.zeros(nonzero.sum(), np.bool)
+            for x in range(nonzero.sum()):
+                active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
+            active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
+            for w in range(nactive_nonrand):
+                active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
+            active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
+            for z in range(nactive_LASSO):
+                active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
+
             cov_sel, power_sel = coverage(sel_intervals, sel_pval, beta_target_rand)
             cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
             cov_unad, power_unad = coverage(unad_intervals, unad_pval, beta_target_nonrand)
+
+            sel_discoveries = BHfilter(sel_pval, q=0.1)
+            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
+            unad_discoveries = BHfilter(unad_pval, q=0.1)
+
+            power_sel_dis = (sel_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
+            power_Lee_dis = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
+            power_unad_dis = (unad_discoveries * active_nonrand_bool).sum() / float((beta != 0).sum())
+
+            fdr_sel_dis = (sel_discoveries * ~active_rand_bool).sum() / float(max(sel_discoveries.sum(), 1.))
+            fdr_Lee_dis = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
+            fdr_unad_dis = (unad_discoveries * ~active_nonrand_bool).sum() / float(max(unad_discoveries.sum(), 1.))
             break
 
     if True:
@@ -413,7 +444,14 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
                np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]),\
                power_sel/float((beta != 0).sum()),\
                power_Lee/float((beta != 0).sum()),\
-               power_unad/float((beta != 0).sum())
+               power_unad/float((beta != 0).sum()),\
+               power_sel_dis, \
+               power_Lee_dis, \
+               power_unad_dis, \
+               fdr_sel_dis, \
+               fdr_Lee_dis, \
+               fdr_unad_dis
+
 
 if __name__ == "__main__":
 
@@ -439,8 +477,15 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     power_Lee = 0.
     power_unad = 0.
 
+    power_sel_dis = 0.
+    power_Lee_dis = 0.
+    power_unad_dis = 0.
+    fdr_sel_dis = 0.
+    fdr_Lee_dis = 0.
+    fdr_unad_dis = 0.
+
     target = "selected"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.20
 
     if target == "selected":
         for i in range(ndraw):
@@ -467,6 +512,13 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             power_Lee += output[13]
             power_unad += output[14]
 
+            power_sel_dis += output[15]
+            power_Lee_dis += output[16]
+            power_unad_dis += output[17]
+            fdr_sel_dis += output[18]
+            fdr_Lee_dis += output[19]
+            fdr_unad_dis += output[20]
+
             sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
             sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
             sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
@@ -488,6 +540,14 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
             sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
 
+            sys.stderr.write("overall selective fdr " + str(fdr_sel_dis / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee fdr " + str(fdr_Lee_dis / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad fdr " + str(fdr_unad_dis / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective power post BH " + str(power_sel_dis / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power post BH  " + str(power_Lee_dis / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power post BH " + str(power_unad_dis / float(i + 1)) + "\n" + "\n")
+
             sys.stderr.write("iteration completed " + str(i + 1) + "\n")
 
     elif target == "full":
@@ -519,6 +579,13 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             power_Lee += output[13]
             power_unad += output[14]
 
+            power_sel_dis += output[15]
+            power_Lee_dis += output[16]
+            power_unad_dis += output[17]
+            fdr_sel_dis += output[18]
+            fdr_Lee_dis += output[19]
+            fdr_unad_dis += output[20]
+
             sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
             sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
             sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
@@ -540,4 +607,12 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
             sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
 
+            sys.stderr.write("overall selective fdr " + str(fdr_sel_dis / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee fdr " + str(fdr_Lee_dis / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad fdr " + str(fdr_unad_dis / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("overall selective power post BH " + str(power_sel_dis / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power post BH  " + str(power_Lee_dis / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power post BH " + str(power_unad_dis / float(i + 1)) + "\n" + "\n")
+
             sys.stderr.write("iteration completed " + str(i + 1) + "\n")

From ccedbd872a6f7984a0bf51aa7b1065562536a965 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 22:00:15 -0700
Subject: [PATCH 572/617] removed some unnecessary prints

---
 .../tests/test_inferential_metrics.py         | 216 ++++++------------
 1 file changed, 70 insertions(+), 146 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index c9210ff72..a78584dbf 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -355,8 +355,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
             full_estimate = np.zeros(p)
             full_estimate[nonzero] = estimate
-            err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
-            #err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+            #err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
+            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
@@ -430,123 +430,72 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             break
 
     if True:
-        return relative_risk(sel_MLE, beta, Sigma), \
-               relative_risk(ind_estimator, beta, Sigma), \
-               relative_risk(randomized_lasso.initial_soln , beta, Sigma), \
-               relative_risk(randomized_lasso._beta_full, beta, Sigma), \
-               relative_risk(rel_LASSO, beta, Sigma), \
-               relative_risk(est_LASSO, beta, Sigma), \
-               cov_sel, \
-               cov_Lee,\
-               cov_unad,\
-               np.mean(sel_intervals[:, 1] - sel_intervals[:, 0]), \
-               np.mean(Lee_intervals[:, 1] - Lee_intervals[:, 0]),\
-               np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]),\
-               power_sel/float((beta != 0).sum()),\
-               power_Lee/float((beta != 0).sum()),\
-               power_unad/float((beta != 0).sum()),\
-               power_sel_dis, \
-               power_Lee_dis, \
-               power_unad_dis, \
-               fdr_sel_dis, \
-               fdr_Lee_dis, \
-               fdr_unad_dis
+        return np.vstack((relative_risk(sel_MLE, beta, Sigma),
+                          relative_risk(ind_estimator, beta, Sigma),
+                          relative_risk(randomized_lasso.initial_soln , beta, Sigma),
+                          relative_risk(randomized_lasso._beta_full, beta, Sigma),
+                          relative_risk(rel_LASSO, beta, Sigma),
+                          relative_risk(est_LASSO, beta, Sigma),
+                          cov_sel,
+                          cov_Lee,
+                          cov_unad,
+                          np.mean(sel_intervals[:, 1] - sel_intervals[:, 0]),
+                          np.mean(Lee_intervals[:, 1] - Lee_intervals[:, 0]),
+                          np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]),
+                          power_sel/float((beta != 0).sum()),
+                          power_Lee/float((beta != 0).sum()),
+                          power_unad/float((beta != 0).sum()),
+                          power_sel_dis,
+                          power_Lee_dis,
+                          power_unad_dis,
+                          fdr_sel_dis,
+                          fdr_Lee_dis,
+                          fdr_unad_dis))
 
 
 if __name__ == "__main__":
 
     ndraw = 50
-    bias = 0.
-    risk_selMLE = 0.
-    risk_indest = 0.
-    risk_LASSO_rand = 0.
-    risk_relLASSO_rand = 0.
-
-    risk_relLASSO_nonrand = 0.
-    risk_LASSO_nonrand = 0.
-
-    coverage_selMLE = 0.
-    coverage_Lee = 0.
-    coverage_unad = 0.
-
-    length_sel = 0.
-    length_Lee = 0.
-    length_unad = 0.
-
-    power_sel = 0.
-    power_Lee = 0.
-    power_unad = 0.
-
-    power_sel_dis = 0.
-    power_Lee_dis = 0.
-    power_unad_dis = 0.
-    fdr_sel_dis = 0.
-    fdr_Lee_dis = 0.
-    fdr_unad_dis = 0.
+    output_overall = np.zeros(21)
 
-    target = "selected"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.20
+    target = "full"
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
 
     if target == "selected":
         for i in range(ndraw):
             output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
                                                         randomizer_scale=np.sqrt(0.5), target=target,
                                                         full_dispersion=True)
+            output_overall += np.squeeze(output)
 
-            risk_selMLE += output[0]
-            risk_indest += output[1]
-            risk_LASSO_rand += output[2]
-            risk_relLASSO_rand += output[3]
-            risk_relLASSO_nonrand += output[4]
-            risk_LASSO_nonrand += output[5]
-
-            coverage_selMLE += output[6]
-            coverage_Lee += output[7]
-            coverage_unad += output[8]
-
-            length_sel += output[9]
-            length_Lee += output[10]
-            length_unad += output[11]
-
-            power_sel += output[12]
-            power_Lee += output[13]
-            power_unad += output[14]
-
-            power_sel_dis += output[15]
-            power_Lee_dis += output[16]
-            power_unad_dis += output[17]
-            fdr_sel_dis += output[18]
-            fdr_Lee_dis += output[19]
-            fdr_unad_dis += output[20]
-
-            sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
-            sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
-            sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+            sys.stderr.write("overall selMLE risk " + str(output_overall[0] / float(i + 1)) + "\n")
+            sys.stderr.write("overall indep est risk " + str(output_overall[1] / float(i + 1)) + "\n")
+            sys.stderr.write("overall randomized LASSO est risk " + str(output_overall[2] / float(i + 1)) + "\n")
             sys.stderr.write(
-                "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
+                "overall relaxed rand LASSO est risk " + str(output_overall[3] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-            sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall relLASSO risk " + str(output_overall[4] / float(i + 1)) + "\n")
+            sys.stderr.write("overall LASSO risk " + str(output_overall[5] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective coverage " + str(output_overall[6] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee coverage " + str(output_overall[7] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad coverage " + str(output_overall[8] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective length " + str(output_overall[9] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee length " + str(output_overall[10] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad length " + str(output_overall[11] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective power " + str(output_overall[12] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power " + str(output_overall[13] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power " + str(output_overall[14] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective fdr " + str(fdr_sel_dis / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee fdr " + str(fdr_Lee_dis / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad fdr " + str(fdr_unad_dis / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective fdr " + str(output_overall[18] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee fdr " + str(output_overall[19] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad fdr " + str(output_overall[20] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective power post BH " + str(power_sel_dis / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee power post BH  " + str(power_Lee_dis / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad power post BH " + str(power_unad_dis / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective power post BH " + str(output_overall[15] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power post BH  " + str(output_overall[16] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power post BH " + str(output_overall[17] / float(i + 1)) + "\n" + "\n")
 
             sys.stderr.write("iteration completed " + str(i + 1) + "\n")
 
@@ -559,60 +508,35 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             output = comparison_risk_inference_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
                                                     randomizer_scale=np.sqrt(0.25), target=target,
                                                     full_dispersion=full_dispersion)
+            output_overall += np.squeeze(output)
 
-            risk_selMLE += output[0]
-            risk_indest += output[1]
-            risk_LASSO_rand += output[2]
-            risk_relLASSO_rand += output[3]
-            risk_relLASSO_nonrand += output[4]
-            risk_LASSO_nonrand += output[5]
-
-            coverage_selMLE += output[6]
-            coverage_Lee += output[7]
-            coverage_unad += output[8]
-
-            length_sel += output[9]
-            length_Lee += output[10]
-            length_unad += output[11]
-
-            power_sel += output[12]
-            power_Lee += output[13]
-            power_unad += output[14]
-
-            power_sel_dis += output[15]
-            power_Lee_dis += output[16]
-            power_unad_dis += output[17]
-            fdr_sel_dis += output[18]
-            fdr_Lee_dis += output[19]
-            fdr_unad_dis += output[20]
-
-            sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
-            sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
-            sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
+            sys.stderr.write("overall selMLE risk " + str(output_overall[0] / float(i + 1)) + "\n")
+            sys.stderr.write("overall indep est risk " + str(output_overall[1] / float(i + 1)) + "\n")
+            sys.stderr.write("overall randomized LASSO est risk " + str(output_overall[2] / float(i + 1)) + "\n")
             sys.stderr.write(
-                "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
+                "overall relaxed rand LASSO est risk " + str(output_overall[3] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-            sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall relLASSO risk " + str(output_overall[4] / float(i + 1)) + "\n")
+            sys.stderr.write("overall LASSO risk " + str(output_overall[5] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective coverage " + str(output_overall[6] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee coverage " + str(output_overall[7] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad coverage " + str(output_overall[8] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective length " + str(output_overall[9] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee length " + str(output_overall[10] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad length " + str(output_overall[11] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective power " + str(output_overall[12] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power " + str(output_overall[13] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power " + str(output_overall[14] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective fdr " + str(fdr_sel_dis / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee fdr " + str(fdr_Lee_dis / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad fdr " + str(fdr_unad_dis / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective fdr " + str(output_overall[18] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee fdr " + str(output_overall[19] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad fdr " + str(output_overall[20] / float(i + 1)) + "\n" + "\n")
 
-            sys.stderr.write("overall selective power post BH " + str(power_sel_dis / float(i + 1)) + "\n")
-            sys.stderr.write("overall Lee power post BH  " + str(power_Lee_dis / float(i + 1)) + "\n")
-            sys.stderr.write("overall unad power post BH " + str(power_unad_dis / float(i + 1)) + "\n" + "\n")
+            sys.stderr.write("overall selective power post BH " + str(output_overall[15] / float(i + 1)) + "\n")
+            sys.stderr.write("overall Lee power post BH  " + str(output_overall[16] / float(i + 1)) + "\n")
+            sys.stderr.write("overall unad power post BH " + str(output_overall[17] / float(i + 1)) + "\n" + "\n")
 
             sys.stderr.write("iteration completed " + str(i + 1) + "\n")

From 0adb83ff4632b1045c00ef44cf3812210239a787 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Wed, 11 Apr 2018 23:29:16 -0700
Subject: [PATCH 573/617] some more reorganization

---
 .../tests/test_inferential_metrics.py         | 260 +++++++++---------
 1 file changed, 133 insertions(+), 127 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index a78584dbf..5aca16561 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -221,90 +221,91 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
         sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
 
         if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
-            # Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-            # Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
-            # Lee_pval = np.asarray(Lee['pval'])
-            Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_tuned_lasso, sigma_, Type=0, alpha=0.1)
-
-            sel_MLE = np.zeros(p)
-            estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
-                                                                                                         dispersion=dispersion)
-            sel_MLE[nonzero] = estimate
-            ind_estimator = np.zeros(p)
-            ind_estimator[nonzero] = ind_unbiased_estimator
-
             beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
             beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
             beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
 
-            post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
-            unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
-            unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-                                        post_LASSO_OLS + 1.65 * unad_sd]).T
-            unad_pval = ndist.cdf(post_LASSO_OLS / unad_sd)
-
-            true_signals = np.zeros(p, np.bool)
-            true_signals[beta != 0] = 1
-            true_set = np.asarray([u for u in range(p) if true_signals[u]])
-            active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-            active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
-            active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-
-            active_rand_bool = np.zeros(nonzero.sum(), np.bool)
-            for x in range(nonzero.sum()):
-                active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
-            active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
-            for w in range(nactive_nonrand):
-                active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
-            active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
-            for z in range(nactive_LASSO):
-                active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
-
-            cov_sel, _ = coverage(sel_intervals, sel_pval, beta_target_rand)
-            print("check shapes", Lee_pval.shape, beta_target_nonrand_py.shape, Lee_pval)
-            cov_Lee, _ = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
-            cov_unad, _ = coverage(unad_intervals, unad_pval, beta_target_nonrand)
-
-            power_sel = ((active_rand_bool)*(np.logical_or((0. < sel_intervals[:, 0]),(0. > sel_intervals[:,1])))).sum()
-            power_Lee = ((active_LASSO_bool)*(np.logical_or((0. < Lee_intervals[:, 0]),(0. > Lee_intervals[:,1])))).sum()
-            power_unad = ((active_nonrand_bool)*(np.logical_or((0. < unad_intervals[:, 0]),(0. > unad_intervals[:,1])))).sum()
-
-            sel_discoveries = BHfilter(sel_pval, q=0.1)
-            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
-            unad_discoveries = BHfilter(unad_pval, q=0.1)
-
-            power_sel_dis = (sel_discoveries * active_rand_bool).sum()/float((beta != 0).sum())
-            power_Lee_dis = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-            power_unad_dis = (unad_discoveries * active_nonrand_bool).sum() / float((beta != 0).sum())
-
-            fdr_sel_dis = (sel_discoveries * ~active_rand_bool).sum() /float(max(sel_discoveries.sum(), 1.))
-            fdr_Lee_dis = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
-            fdr_unad_dis = (unad_discoveries * ~active_nonrand_bool).sum() / float(max(unad_discoveries.sum(), 1.))
-            break
+            Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_tuned_lasso, sigma_, Type=0, alpha=0.1)
+
+            if (Lee_pval.shape[0] == beta_target_nonrand_py.shape[0]):
+                sel_MLE = np.zeros(p)
+                estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(
+                    target=target,
+                    dispersion=dispersion)
+                sel_MLE[nonzero] = estimate
+                ind_estimator = np.zeros(p)
+                ind_estimator[nonzero] = ind_unbiased_estimator
+
+                post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+                unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
+                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                            post_LASSO_OLS + 1.65 * unad_sd]).T
+                unad_pval = ndist.cdf(post_LASSO_OLS / unad_sd)
+
+                true_signals = np.zeros(p, np.bool)
+                true_signals[beta != 0] = 1
+                true_set = np.asarray([u for u in range(p) if true_signals[u]])
+                active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+                active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
+                active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+
+                active_rand_bool = np.zeros(nonzero.sum(), np.bool)
+                for x in range(nonzero.sum()):
+                    active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
+                active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
+                for w in range(nactive_nonrand):
+                    active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
+                active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
+                for z in range(nactive_LASSO):
+                    active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
+
+                cov_sel, _ = coverage(sel_intervals, sel_pval, beta_target_rand)
+                # print("check shapes", Lee_pval.shape, beta_target_nonrand_py.shape, Lee_pval)
+                cov_Lee, _ = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
+                cov_unad, _ = coverage(unad_intervals, unad_pval, beta_target_nonrand)
+
+                power_sel = (
+                (active_rand_bool) * (np.logical_or((0. < sel_intervals[:, 0]), (0. > sel_intervals[:, 1])))).sum()
+                power_Lee = (
+                (active_LASSO_bool) * (np.logical_or((0. < Lee_intervals[:, 0]), (0. > Lee_intervals[:, 1])))).sum()
+                power_unad = (
+                (active_nonrand_bool) * (np.logical_or((0. < unad_intervals[:, 0]), (0. > unad_intervals[:, 1])))).sum()
+
+                sel_discoveries = BHfilter(sel_pval, q=0.1)
+                Lee_discoveries = BHfilter(Lee_pval, q=0.1)
+                unad_discoveries = BHfilter(unad_pval, q=0.1)
+
+                power_sel_dis = (sel_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
+                power_Lee_dis = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
+                power_unad_dis = (unad_discoveries * active_nonrand_bool).sum() / float((beta != 0).sum())
+
+                fdr_sel_dis = (sel_discoveries * ~active_rand_bool).sum() / float(max(sel_discoveries.sum(), 1.))
+                fdr_Lee_dis = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
+                fdr_unad_dis = (unad_discoveries * ~active_nonrand_bool).sum() / float(max(unad_discoveries.sum(), 1.))
+                break
 
     if True:
-        return relative_risk(sel_MLE, beta, Sigma), \
-               relative_risk(ind_estimator, beta, Sigma), \
-               relative_risk(randomized_lasso.initial_soln, beta, Sigma), \
-               relative_risk(randomized_lasso._beta_full, beta, Sigma), \
-               relative_risk(rel_LASSO, beta, Sigma), \
-               relative_risk(est_LASSO, beta, Sigma), \
-               cov_sel,\
-               cov_Lee,\
-               cov_unad,\
-               np.mean(sel_intervals[:, 1] - sel_intervals[:, 0]), \
-               np.mean(Lee_intervals[:, 1] - Lee_intervals[:, 0]), \
-               np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]), \
-               power_sel/float((beta != 0).sum()), \
-               power_Lee/float((beta != 0).sum()), \
-               power_unad/float((beta != 0).sum()), \
-               power_sel_dis, \
-               power_Lee_dis, \
-               power_unad_dis, \
-               fdr_sel_dis, \
-               fdr_Lee_dis, \
-               fdr_unad_dis
-
+        return np.vstack((relative_risk(sel_MLE, beta, Sigma),
+                          relative_risk(ind_estimator, beta, Sigma),
+                          relative_risk(randomized_lasso.initial_soln , beta, Sigma),
+                          relative_risk(randomized_lasso._beta_full, beta, Sigma),
+                          relative_risk(rel_LASSO, beta, Sigma),
+                          relative_risk(est_LASSO, beta, Sigma),
+                          cov_sel,
+                          cov_Lee,
+                          cov_unad,
+                          np.mean(sel_intervals[:, 1] - sel_intervals[:, 0]),
+                          np.mean(Lee_intervals[:, 1] - Lee_intervals[:, 0]),
+                          np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]),
+                          power_sel/float((beta != 0).sum()),
+                          power_Lee/float((beta != 0).sum()),
+                          power_unad/float((beta != 0).sum()),
+                          power_sel_dis,
+                          power_Lee_dis,
+                          power_unad_dis,
+                          fdr_sel_dis,
+                          fdr_Lee_dis,
+                          fdr_unad_dis))
 
 def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2,
                                    snr=0.2, randomizer_scale=0.5, target = "full",
@@ -375,59 +376,64 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
         sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
 
         if nonzero.sum()>0 and nactive_nonrand>0 and nonzero.sum()<50:
-            Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_tuned_lasso, sigma_, Type=1, alpha=0.1)
-
-            sel_MLE = np.zeros(p)
-            estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
-                                                                                                         dispersion=dispersion)
-            sel_MLE[nonzero] = estimate
-            ind_estimator = np.zeros(p)
-            ind_estimator[nonzero] = ind_unbiased_estimator 
-
             beta_target_rand = beta[nonzero]
             beta_target_nonrand_py = beta[active_LASSO]
             beta_target_nonrand = beta[active_nonrand]
 
-            post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
-            unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
-
-            unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-                                        post_LASSO_OLS + 1.65 * unad_sd]).T
-            unad_pval = ndist.cdf(post_LASSO_OLS/unad_sd)
-
-            true_signals = np.zeros(p, np.bool)
-            true_signals[beta != 0] = 1
-            true_set = np.asarray([u for u in range(p) if true_signals[u]])
-            active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-            active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
-            active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-
-            active_rand_bool = np.zeros(nonzero.sum(), np.bool)
-            for x in range(nonzero.sum()):
-                active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
-            active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
-            for w in range(nactive_nonrand):
-                active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
-            active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
-            for z in range(nactive_LASSO):
-                active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
-
-            cov_sel, power_sel = coverage(sel_intervals, sel_pval, beta_target_rand)
-            cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
-            cov_unad, power_unad = coverage(unad_intervals, unad_pval, beta_target_nonrand)
-
-            sel_discoveries = BHfilter(sel_pval, q=0.1)
-            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
-            unad_discoveries = BHfilter(unad_pval, q=0.1)
-
-            power_sel_dis = (sel_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
-            power_Lee_dis = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-            power_unad_dis = (unad_discoveries * active_nonrand_bool).sum() / float((beta != 0).sum())
-
-            fdr_sel_dis = (sel_discoveries * ~active_rand_bool).sum() / float(max(sel_discoveries.sum(), 1.))
-            fdr_Lee_dis = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
-            fdr_unad_dis = (unad_discoveries * ~active_nonrand_bool).sum() / float(max(unad_discoveries.sum(), 1.))
-            break
+            Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_tuned_lasso, sigma_, Type=1, alpha=0.1)
+
+            if (Lee_pval.shape[0] == beta_target_nonrand_py.shape[0]):
+                sel_MLE = np.zeros(p)
+                estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(
+                    target=target,
+                    dispersion=dispersion)
+                sel_MLE[nonzero] = estimate
+                ind_estimator = np.zeros(p)
+                ind_estimator[nonzero] = ind_unbiased_estimator
+
+                if Lee_pval.shape[0] != beta_target_nonrand_py.shape[0]:
+                    break
+
+                post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
+                unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
+
+                unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
+                                            post_LASSO_OLS + 1.65 * unad_sd]).T
+                unad_pval = ndist.cdf(post_LASSO_OLS / unad_sd)
+
+                true_signals = np.zeros(p, np.bool)
+                true_signals[beta != 0] = 1
+                true_set = np.asarray([u for u in range(p) if true_signals[u]])
+                active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+                active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
+                active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+
+                active_rand_bool = np.zeros(nonzero.sum(), np.bool)
+                for x in range(nonzero.sum()):
+                    active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
+                active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
+                for w in range(nactive_nonrand):
+                    active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
+                active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
+                for z in range(nactive_LASSO):
+                    active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
+
+                cov_sel, power_sel = coverage(sel_intervals, sel_pval, beta_target_rand)
+                cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
+                cov_unad, power_unad = coverage(unad_intervals, unad_pval, beta_target_nonrand)
+
+                sel_discoveries = BHfilter(sel_pval, q=0.1)
+                Lee_discoveries = BHfilter(Lee_pval, q=0.1)
+                unad_discoveries = BHfilter(unad_pval, q=0.1)
+
+                power_sel_dis = (sel_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
+                power_Lee_dis = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
+                power_unad_dis = (unad_discoveries * active_nonrand_bool).sum() / float((beta != 0).sum())
+
+                fdr_sel_dis = (sel_discoveries * ~active_rand_bool).sum() / float(max(sel_discoveries.sum(), 1.))
+                fdr_Lee_dis = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
+                fdr_unad_dis = (unad_discoveries * ~active_nonrand_bool).sum() / float(max(unad_discoveries.sum(), 1.))
+                break
 
     if True:
         return np.vstack((relative_risk(sel_MLE, beta, Sigma),

From 6bf84e8e2ed3ede438e6bcd57535aaaf3e5b87b9 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 12 Apr 2018 13:15:15 -0700
Subject: [PATCH 574/617] making notion of power consistent across tests

---
 .../adjusted_MLE/tests/test_inferential_metrics.py  | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 5aca16561..39b179e69 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -418,9 +418,16 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
                 for z in range(nactive_LASSO):
                     active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
 
-                cov_sel, power_sel = coverage(sel_intervals, sel_pval, beta_target_rand)
-                cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
-                cov_unad, power_unad = coverage(unad_intervals, unad_pval, beta_target_nonrand)
+                cov_sel, _ = coverage(sel_intervals, sel_pval, beta_target_rand)
+                cov_Lee, _ = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
+                cov_unad, _ = coverage(unad_intervals, unad_pval, beta_target_nonrand)
+
+                power_sel = ((active_rand_bool) * (np.logical_or((0. < sel_intervals[:, 0]),
+                                                                 (0. > sel_intervals[:, 1])))).sum()
+                power_Lee = ((active_LASSO_bool) * (np.logical_or((0. < Lee_intervals[:, 0]),
+                                                                  (0. > Lee_intervals[:, 1])))).sum()
+                power_unad = ((active_nonrand_bool) * (np.logical_or((0. < unad_intervals[:, 0]),
+                                                                     (0. > unad_intervals[:, 1])))).sum()
 
                 sel_discoveries = BHfilter(sel_pval, q=0.1)
                 Lee_discoveries = BHfilter(Lee_pval, q=0.1)

From 4e3d32a4535a7e8deec2b4528b95ac279779a80d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 12 Apr 2018 14:31:34 -0700
Subject: [PATCH 575/617] organizing output

---
 .../adjusted_MLE/tests/test_risk_coverage.py  | 360 +-----------------
 1 file changed, 14 insertions(+), 346 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 55f237351..76799db89 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -1,357 +1,25 @@
 import numpy as np, sys
+import pandas as pd
 from rpy2 import robjects
 import rpy2.robjects.numpy2ri
+
 rpy2.robjects.numpy2ri.activate()
 
-import pandas as pd
-import selection.randomized.lasso as L; reload(L)
+import selection.randomized.lasso as L;
+
+reload(L)
 from selection.randomized.lasso import highdim
 from selection.algorithms.lasso import lasso
 from scipy.stats import norm as ndist
-
-def glmnet_lasso(X, y, lambda_val):
-    robjects.r('''
-                glmnet_LASSO = function(X,y,lambda){
-                y = as.matrix(y)
-                X = as.matrix(X)
-                lam = as.matrix(lambda)[1,1]
-                n = nrow(X)
-                fit = glmnet(X, y, standardize=TRUE, intercept=FALSE)
-                estimate = coef(fit, s=lam)[-1]
-                return(list(estimate = estimate))
-                }''')
-
-    lambda_R = robjects.globalenv['glmnet_LASSO']
-    n, p = X.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-    r_lam = robjects.r.matrix(lambda_val, nrow=1, ncol=1)
-    estimate = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate'))
-    return estimate
-
-def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
-    robjects.r('''
-    library(bestsubset)
-    sim_xy = bestsubset::sim.xy
-    ''')
-
-    r_simulate = robjects.globalenv['sim_xy']
-    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
-    X = np.array(sim.rx2('x'))
-    y = np.array(sim.rx2('y'))
-    X_val = np.array(sim.rx2('xval'))
-    y_val = np.array(sim.rx2('yval'))
-    Sigma = np.array(sim.rx2('Sigma'))
-    beta = np.array(sim.rx2('beta'))
-    sigma = np.array(sim.rx2('sigma'))
-
-    return X, y, X_val, y_val, Sigma, beta, sigma
-
-def tuned_lasso(X, y, X_val,y_val):
-    robjects.r('''
-        tuned_lasso_estimator = function(X,Y,X.val,Y.val){
-        Y = as.matrix(Y)
-        X = as.matrix(X)
-        Y.val = as.vector(Y.val)
-        X.val = as.matrix(X.val)
-        rel.LASSO = lasso(X,Y,intercept=TRUE, nrelax=10, nlam=50, standardize=TRUE)
-        LASSO = lasso(X,Y,intercept=TRUE,nlam=50, standardize=TRUE)
-        beta.hat.rellasso = as.matrix(coef(rel.LASSO))
-        beta.hat.lasso = as.matrix(coef(LASSO))
-        min.lam = min(rel.LASSO$lambda)
-        max.lam = max(rel.LASSO$lambda)
-        #print(paste("max and min values of lambda", max.lam, min.lam))
-
-        lam.seq = exp(seq(log(max.lam),log(min.lam),length=rel.LASSO$nlambda))
-        muhat.val.rellasso = as.matrix(predict(rel.LASSO, X.val))
-        muhat.val.lasso = as.matrix(predict(LASSO, X.val))
-        err.val.rellasso = colMeans((muhat.val.rellasso - Y.val)^2)
-        err.val.lasso = colMeans((muhat.val.lasso - Y.val)^2)
-
-        opt_lam = ceiling(which.min(err.val.rellasso)/10)
-        lambda.tuned.rellasso = lam.seq[opt_lam]
-        lambda.tuned.lasso = lam.seq[which.min(err.val.lasso)]
-
-        fit = glmnet(X, Y, standardize=TRUE, intercept=TRUE)
-        estimate.tuned = coef(fit, s=lambda.tuned.lasso)[-1]
-
-        #print(paste("compare estimates", max(abs(estimate.tuned-(beta.hat.lasso[,which.min(err.val.lasso)])[-1])),
-        #length(which(estimate.tuned!=0)), length(which((beta.hat.lasso[,which.min(err.val.lasso)])[-1]!=0))))
-
-        return(list(beta.hat.rellasso = (beta.hat.rellasso[,which.min(err.val.rellasso)])[-1],
-        beta.hat.lasso = (beta.hat.lasso[,which.min(err.val.lasso)])[-1],
-        lambda.tuned.rellasso = lambda.tuned.rellasso, lambda.tuned.lasso= lambda.tuned.lasso,
-        lambda.seq = lam.seq))
-        }''')
-
-    r_lasso = robjects.globalenv['tuned_lasso_estimator']
-
-    n, p = X.shape
-    nval, _ = X_val.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-    r_X_val = robjects.r.matrix(X_val, nrow=nval, ncol=p)
-    r_y_val = robjects.r.matrix(y_val, nrow=nval, ncol=1)
-
-    tuned_est = r_lasso(r_X, r_y, r_X_val, r_y_val)
-    estimator_rellasso = np.array(tuned_est.rx2('beta.hat.rellasso'))
-    estimator_lasso = np.array(tuned_est.rx2('beta.hat.lasso'))
-    lam_tuned_rellasso = np.array(tuned_est.rx2('lambda.tuned.rellasso'))
-    lam_tuned_lasso = np.array(tuned_est.rx2('lambda.tuned.lasso'))
-    lam_seq = np.array(tuned_est.rx2('lambda.seq'))
-    return estimator_rellasso, estimator_lasso, lam_tuned_rellasso, lam_tuned_lasso, lam_seq
-
-def relative_risk(est, truth, Sigma):
-
-    return (est-truth).T.dot(Sigma).dot(est-truth)/truth.T.dot(Sigma).dot(truth)
-
-def coverage(intervals, pval, truth):
-    if (truth!=0).sum()!=0:
-        avg_power = np.mean(pval[truth != 0])
-    else:
-        avg_power = 0.
-    return np.mean((truth > intervals[:, 0])*(truth < intervals[:, 1])), avg_power
-
-
-def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.20,
-                                       randomizer_scale=np.sqrt(0.25), target = "selected",
-                                       full_dispersion = True):
-
-    while True:
-        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
-                                                        s=s, beta_type=beta_type, snr=snr)
-        rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
-        active_nonrand = (est_LASSO != 0)
-        nactive_nonrand = active_nonrand.sum()
-        true_mean = X.dot(beta)
-
-        X -= X.mean(0)[None, :]
-        X /= (X.std(0)[None, :] * np.sqrt(n))
-        X_val -= X_val.mean(0)[None, :]
-        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
-
-        y = y - y.mean()
-        y_val = y_val - y_val.mean()
-
-        if full_dispersion:
-            dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-
-        sigma_ = np.std(y)
-        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
-        soln = LASSO_py.fit()
-        active_LASSO = (soln != 0)
-        nactive_LASSO = active_LASSO.sum()
-        glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
-
-        const = highdim.gaussian
-        lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
-                  np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        err = np.zeros(100)
-        for k in range(100):
-            W = lam_seq[k]
-            conv = const(X,
-                         y,
-                         W * np.ones(p),
-                         randomizer_scale=randomizer_scale * sigma_)
-            signs = conv.fit()
-            nonzero = signs != 0
-            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
-
-            full_estimate = np.zeros(p)
-            full_estimate[nonzero] = estimate
-            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
-
-        lam = lam_seq[np.argmin(err)]
-
-        # sys.stderr.write("lambda from tuned relaxed LASSO " + str((sigma_**2)*lam_tuned_lasso) + "\n")
-        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
-        #lam = np.sqrt(2 * np.log(p)) * sigma_
-        randomized_lasso = const(X,
-                                 y,
-                                 lam*np.ones(p),
-                                 randomizer_scale=randomizer_scale * sigma_)
-
-        signs = randomized_lasso.fit()
-        nonzero = signs != 0
-        sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
-        sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
-        sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
-        sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
-
-        if nactive_LASSO>0 and nonzero.sum()>0 and nactive_nonrand>0:
-            Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-            Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
-            Lee_pval = np.asarray(Lee['pval'])
-
-            sel_MLE = np.zeros(p)
-            estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
-                                                                                                         dispersion=dispersion)
-            sel_MLE[nonzero] = estimate / np.sqrt(n)
-            ind_estimator = np.zeros(p)
-            ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
-
-            beta_target_rand = np.linalg.pinv(X[:, nonzero]).dot(true_mean)
-            beta_target_nonrand_py = np.linalg.pinv(X[:, active_LASSO]).dot(true_mean)
-            beta_target_nonrand = np.linalg.pinv(X[:, active_nonrand]).dot(true_mean)
-
-            post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
-            unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
-            unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-                                        post_LASSO_OLS + 1.65 * unad_sd]).T
-            unad_pval = ndist.cdf(post_LASSO_OLS / unad_sd)
-
-            true_signals = np.zeros(p, np.bool)
-            true_signals[beta != 0] = 1
-            true_set = np.asarray([u for u in range(p) if true_signals[u]])
-            active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-            active_set_nonrand = np.asarray([q for q in range(p) if active_nonrand[q]])
-            active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-
-            active_rand_bool = np.zeros(nonzero.sum(), np.bool)
-            for x in range(nonzero.sum()):
-                active_rand_bool[x] = (np.in1d(active_set_rand[x], true_set).sum() > 0)
-            active_nonrand_bool = np.zeros(nactive_nonrand, np.bool)
-            for w in range(nactive_nonrand):
-                active_nonrand_bool[w] = (np.in1d(active_set_nonrand[w], true_set).sum() > 0)
-            active_LASSO_bool = np.zeros(nactive_LASSO, np.bool)
-            for z in range(nactive_LASSO):
-                active_LASSO_bool[z] = (np.in1d(active_set_LASSO[z], true_set).sum() > 0)
-
-            cov_sel, _ = coverage(sel_intervals, sel_pval, beta_target_rand)
-            cov_Lee, _ = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
-            cov_unad, _ = coverage(unad_intervals, unad_pval, beta_target_nonrand)
-
-            power_sel = ((active_rand_bool)*(np.logical_or((0. < sel_intervals[:, 0]),(0. > sel_intervals[:,1])))).sum()
-            power_Lee = ((active_LASSO_bool)*(np.logical_or((0. < Lee_intervals[:, 0]),(0. > Lee_intervals[:,1])))).sum()
-            power_unad = ((active_nonrand_bool)*(np.logical_or((0. < unad_intervals[:, 0]),(0. > unad_intervals[:,1])))).sum()
-            break
-
-    if True:
-        return relative_risk(sel_MLE, beta, Sigma), \
-               relative_risk(ind_estimator, beta, Sigma), \
-               relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
-               relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
-               relative_risk(rel_LASSO, beta, Sigma), \
-               relative_risk(est_LASSO, beta, Sigma), \
-               cov_sel,\
-               cov_Lee,\
-               cov_unad,\
-               np.mean(sel_intervals[:, 1] - sel_intervals[:, 0]), \
-               np.mean(Lee_intervals[:, 1] - Lee_intervals[:, 0]), \
-               np.mean(unad_intervals[:, 1] - unad_intervals[:, 0]), \
-               power_sel/float((beta != 0).sum()), \
-               power_Lee/float((beta != 0).sum()), \
-               power_unad/float((beta != 0).sum())
-
-
-def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2, snr=0.2,
-                                   randomizer_scale=np.sqrt(0.25), target = "full",
-                                   full_dispersion = True):
-
-    while True:
-        X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
-                                                        s=s, beta_type=beta_type, snr=snr)
-        rel_LASSO, est_LASSO, lam_tuned_rellasso, lam_tuned_lasso, lam_seq = tuned_lasso(X, y, X_val, y_val)
-        active_nonrand = (est_LASSO != 0)
-        nactive_nonrand = active_nonrand.sum()
-
-        _std = X.std(0)
-        X -= X.mean(0)[None, :]
-        X /= (X.std(0)[None, :] * np.sqrt(n))
-        X_val -= X_val.mean(0)[None, :]
-        X_val /= (X_val.std(0)[None, :] * np.sqrt(nval))
-
-        y = y - y.mean()
-        y_val = y_val - y_val.mean()
-
-        sigma_ = np.std(y)
-        print("true and estimated sigma", sigma, sigma_)
-
-        if full_dispersion:
-            dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        else:
-            dispersion = None
-
-        LASSO_py = lasso.gaussian(X, y, np.asscalar((sigma_**2.) * lam_tuned_lasso), np.asscalar(sigma_))
-        soln = LASSO_py.fit()
-        active_LASSO = (soln != 0)
-        nactive_LASSO = active_LASSO.sum()
-        glm_LASSO = glmnet_lasso(X, y, np.asscalar(lam_tuned_lasso))
-
-        const = highdim.gaussian
-        lam_seq = sigma_* np.linspace(0.25, 2.75, num=100) * \
-                  np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        err = np.zeros(100)
-        for k in range(100):
-            W = lam_seq[k]*np.ones(p)
-            conv = const(X,
-                         y,
-                         W,
-                         randomizer_scale=randomizer_scale * sigma_)
-            signs = conv.fit()
-            nonzero = signs != 0
-            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
-
-            full_estimate = np.zeros(p)
-            full_estimate[nonzero] = estimate
-            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
-
-        lam = lam_seq[np.argmin(err)]
-        sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
-        #lam = np.sqrt(2 * np.log(p)) * sigma_
-        randomized_lasso = const(X,
-                                 y,
-                                 lam*np.ones(p),
-                                 randomizer_scale=randomizer_scale * sigma_)
-
-        signs = randomized_lasso.fit()
-        nonzero = signs != 0
-        sys.stderr.write("active variables selected by tuned LASSO " + str(nactive_nonrand) + "\n")
-        sys.stderr.write("active variables selected by LASSO in python " + str(nactive_LASSO) + "\n")
-        sys.stderr.write("recall glmnet at tuned lambda " + str((glm_LASSO != 0).sum()) + "\n")
-        sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
-
-        if nonzero.sum()>0 and nactive_nonrand>0 and nonzero.sum()<50:
-            # Lee = LASSO_py.summary(alternative='twosided', alpha=0.10, UMAU=False, compute_intervals=True)
-            # Lee_intervals = np.vstack([np.asarray(Lee['lower_confidence']), np.asarray(Lee['upper_confidence'])]).T
-            # Lee_pval = np.asarray(Lee['pval'])
-
-            sel_MLE = np.zeros(p)
-            estimate, _, _, sel_pval, sel_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(target=target,
-                                                                                                         dispersion=dispersion)
-            sel_MLE[nonzero] = estimate / np.sqrt(n)
-            ind_estimator = np.zeros(p)
-            ind_estimator[nonzero] = ind_unbiased_estimator / np.sqrt(n)
-
-            beta_target_rand = np.sqrt(n)* _std[nonzero] * beta[nonzero]
-            beta_target_nonrand_py = np.sqrt(n)* _std[active_LASSO] * beta[active_LASSO]
-            beta_target_nonrand = np.sqrt(n)* _std[active_nonrand] * beta[active_nonrand]
-
-            post_LASSO_OLS = np.linalg.pinv(X[:, active_nonrand]).dot(y)
-            unad_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_nonrand].T.dot(X[:, active_nonrand])))))
-
-            unad_intervals = np.vstack([post_LASSO_OLS - 1.65 * unad_sd,
-                                        post_LASSO_OLS + 1.65 * unad_sd]).T
-            unad_pval = ndist.cdf(post_LASSO_OLS/unad_sd)
-
-            cov_sel, power_sel = coverage(sel_intervals, sel_pval, beta_target_rand)
-            #cov_Lee, power_Lee = coverage(Lee_intervals, Lee_pval, beta_target_nonrand_py)
-            cov_unad, power_unad = coverage(unad_intervals, unad_pval, beta_target_nonrand)
-            break
-
-    if True:
-        return relative_risk(sel_MLE, beta, Sigma), \
-               relative_risk(ind_estimator, beta, Sigma), \
-               relative_risk(randomized_lasso.initial_soln / np.sqrt(n), beta, Sigma), \
-               relative_risk(randomized_lasso._beta_full / np.sqrt(n), beta, Sigma), \
-               relative_risk(rel_LASSO, beta, Sigma), \
-               relative_risk(est_LASSO, beta, Sigma), \
-               cov_sel,\
-               cov_unad,\
-               (sel_intervals[:, 1] - sel_intervals[:, 0]).sum() / float(nonzero.sum()), \
-               (unad_intervals[:, 1] - unad_intervals[:, 0]).sum() / float(nactive_nonrand), \
-               power_sel/float((beta != 0).sum()),  \
-               power_unad/float((beta != 0).sum())
+from selection.adjusted_MLE.tests.test_inferential_metrics import (BHfilter,
+                                                                   selInf_R,
+                                                                   glmnet_lasso,
+                                                                   sim_xy,
+                                                                   tuned_lasso,
+                                                                   relative_risk,
+                                                                   coverage,
+                                                                   comparison_risk_inference_selected,
+                                                                   comparison_risk_inference_full)
 
 if __name__ == "__main__":
 

From 6495d0bca9731c9c495e41e66767a9a0510a92d0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 12 Apr 2018 14:52:55 -0700
Subject: [PATCH 576/617] generate outputs

---
 .../tests/test_inferential_metrics.py         |  39 +--
 .../adjusted_MLE/tests/test_risk_coverage.py  | 270 +++++++-----------
 2 files changed, 122 insertions(+), 187 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 39b179e69..72c7d632e 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -153,7 +153,7 @@ def coverage(intervals, pval, truth):
 
 def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=2, snr=0.20,
                                        randomizer_scale=np.sqrt(0.25), target = "selected",
-                                       full_dispersion = True):
+                                       tuning = "selective_MLE", full_dispersion = True):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
@@ -192,16 +192,17 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
             conv = highdim.gaussian(X,
                                     y,
                                     W,
-                                    randomizer_scale=np.sqrt(n) *
-                                                     randomizer_scale * sigma_)
+                                    randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
             signs = conv.fit()
             nonzero = signs != 0
-            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+            if tuning == "selective_MLE":
+                estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+                full_estimate = np.zeros(p)
+                full_estimate[nonzero] = estimate
+                err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+            elif tuning == "randomized_LASSO":
+                err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
 
-            full_estimate = np.zeros(p)
-            full_estimate[nonzero] = estimate
-            # err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
-            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
@@ -309,7 +310,7 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
 
 def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2,
                                    snr=0.2, randomizer_scale=0.5, target = "full",
-                                   full_dispersion = True):
+                                   tuning = "selective_MLE", full_dispersion = True):
 
     while True:
         X, y, X_val, y_val, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho,
@@ -352,12 +353,13 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
                                     randomizer_scale * sigma_)
             signs = conv.fit()
             nonzero = signs != 0
-            estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
-
-            full_estimate = np.zeros(p)
-            full_estimate[nonzero] = estimate
-            #err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
-            err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+            if tuning == "selective_MLE":
+                estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+                full_estimate = np.zeros(p)
+                full_estimate[nonzero] = estimate
+                err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+            elif tuning == "randomized_LASSO":
+                err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
 
         lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
@@ -471,13 +473,14 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     ndraw = 50
     output_overall = np.zeros(21)
 
-    target = "full"
+    target = "selected"
+    tuning = "selective_MLE"
     n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
 
     if target == "selected":
         for i in range(ndraw):
             output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
-                                                        randomizer_scale=np.sqrt(0.5), target=target,
+                                                        randomizer_scale=np.sqrt(0.5), target=target, tuning= tuning,
                                                         full_dispersion=True)
             output_overall += np.squeeze(output)
 
@@ -519,7 +522,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             full_dispersion = False
         for i in range(ndraw):
             output = comparison_risk_inference_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
-                                                    randomizer_scale=np.sqrt(0.25), target=target,
+                                                    randomizer_scale=np.sqrt(0.25), target=target, tuning= tuning,
                                                     full_dispersion=full_dispersion)
             output_overall += np.squeeze(output)
 
diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 76799db89..ec596d1bd 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -1,16 +1,10 @@
-import numpy as np, sys
+import numpy as np, sys, os
 import pandas as pd
 from rpy2 import robjects
 import rpy2.robjects.numpy2ri
-
 rpy2.robjects.numpy2ri.activate()
 
-import selection.randomized.lasso as L;
-
-reload(L)
-from selection.randomized.lasso import highdim
-from selection.algorithms.lasso import lasso
-from scipy.stats import norm as ndist
+import selection.randomized.lasso as L; reload(L)
 from selection.adjusted_MLE.tests.test_inferential_metrics import (BHfilter,
                                                                    selInf_R,
                                                                    glmnet_lasso,
@@ -21,137 +15,26 @@
                                                                    comparison_risk_inference_selected,
                                                                    comparison_risk_inference_full)
 
-if __name__ == "__main__":
+
+def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selected", tuning = "selective_MLE",
+                randomizing_scale= np.sqrt(0.25), ndraw = 50):
 
     df_master = pd.DataFrame()
     df_risk = pd.DataFrame()
 
-    target = "selected"
-    snr_values = np.array([0.10, 0.15, 0.20, 0.25, 0.30, 0.42, 0.71, 1.22])
-
+    snr_values = np.array([0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.42, 0.71, 1.22, 2.07])
+    #snr_values = np.array([0.05, 0.10])
     for snr in snr_values:
-        ndraw = 50
-        bias = 0.
-        risk_selMLE = 0.
-        risk_indest = 0.
-        risk_LASSO_rand = 0.
-        risk_relLASSO_rand = 0.
-
-        risk_relLASSO_nonrand = 0.
-        risk_LASSO_nonrand = 0.
 
-        coverage_selMLE = 0.
-        coverage_Lee = 0.
-        coverage_unad = 0.
-
-        length_sel = 0.
-        length_Lee = 0.
-        length_unad = 0.
-
-        power_sel = 0.
-        power_Lee = 0.
-        power_unad = 0.
-        n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, snr
+        output_overall = np.zeros(21)
 
         if target == "selected":
             for i in range(ndraw):
                 output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type,
-                                                            snr=snr,
-                                                            randomizer_scale=np.sqrt(0.25), target=target,
+                                                            snr=snr,randomizer_scale=randomizing_scale,
+                                                            target=target, tuning=tuning,
                                                             full_dispersion=True)
-
-                risk_selMLE += output[0]
-                risk_indest += output[1]
-                risk_LASSO_rand += output[2]
-                risk_relLASSO_rand += output[3]
-                risk_relLASSO_nonrand += output[4]
-                risk_LASSO_nonrand += output[5]
-
-                coverage_selMLE += output[6]
-                coverage_Lee += output[7]
-                coverage_unad += output[8]
-
-                length_sel += output[9]
-                length_Lee += output[10]
-                length_unad += output[11]
-
-                power_sel += output[12]
-                power_Lee += output[13]
-                power_unad += output[14]
-
-                sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
-                sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
-                sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
-                sys.stderr.write(
-                    "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-                sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
-                sys.stderr.write("overall Lee coverage " + str(coverage_Lee / float(i + 1)) + "\n")
-                sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
-                sys.stderr.write("overall Lee length " + str(length_Lee / float(i + 1)) + "\n")
-                sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
-                sys.stderr.write("overall Lee power " + str(power_Lee / float(i + 1)) + "\n")
-                sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("iteration completed " + str(i + 1) + "\n")
-
-                # metrics = pd.DataFrame()
-                metrics_selective = pd.DataFrame({"sample_size": n,
-                                                  "regression_dim": p,
-                                                  "correlation": rho,
-                                                  "SNR": snr,
-                                                  "signal_type": beta_type,
-                                                  "risk": output[0],
-                                                  "coverage": output[6],
-                                                  "length": output[9],
-                                                  "power": output[12],
-                                                  "method": "Selective"}, index=[0])
-
-                metrics_Lee = pd.DataFrame({"sample_size": n,
-                                            "regression_dim": p,
-                                            "correlation": rho,
-                                            "SNR": snr,
-                                            "signal_type": beta_type,
-                                            "risk": output[5],
-                                            "coverage": output[7],
-                                            "length": output[10],
-                                            "power": output[13],
-                                            "method": "Lee"}, index=[0])
-
-                metrics_unad = pd.DataFrame({"sample_size": n,
-                                             "regression_dim": p,
-                                             "correlation": rho,
-                                             "SNR": snr,
-                                             "signal_type": beta_type,
-                                             "risk": output[5],
-                                             "coverage": output[8],
-                                             "length": output[11],
-                                             "power": output[14],
-                                             "method": "Naive"}, index=[0])
-
-                metrics = pd.DataFrame({"sample_size": n,
-                                        "regression_dim": p,
-                                        "correlation": rho,
-                                        "SNR": snr,
-                                        "signal_type": beta_type,
-                                        "Risk_selMLE": output[0],
-                                        "Risk_indest": output[1],
-                                        "Risk_LASSO_rand": output[2],
-                                        "Risk_relLASSO_rand": output[3],
-                                        "Risk_relLASSO_nonrand": output[4],
-                                        "Risk_LASSO_nonrand": output[5]}, index=[0])
-
-                df_master = df_master.append(metrics_selective, ignore_index=True)
-                df_master = df_master.append(metrics_Lee, ignore_index=True)
-                df_master = df_master.append(metrics_unad, ignore_index=True)
-                df_risk = df_risk.append(metrics, ignore_index=True)
+                output_overall += np.squeeze(output)
 
         elif target == "full":
             if n > p:
@@ -159,45 +42,94 @@
             else:
                 full_dispersion = False
             for i in range(ndraw):
-                output = comparison_risk_inference_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
-                                                        randomizer_scale=np.sqrt(0.25), target=target,
+                output = comparison_risk_inference_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type,
+                                                        snr=snr,
+                                                        randomizer_scale=randomizing_scale,
+                                                        target=target, tuning=tuning,
                                                         full_dispersion=full_dispersion)
-
-                risk_selMLE += output[0]
-                risk_indest += output[1]
-                risk_LASSO_rand += output[2]
-                risk_relLASSO_rand += output[3]
-                risk_relLASSO_nonrand += output[4]
-                risk_LASSO_nonrand += output[5]
-
-                coverage_selMLE += output[6]
-                coverage_unad += output[7]
-
-                length_sel += output[8]
-                length_unad += output[9]
-
-                power_sel += output[10]
-                power_unad += output[11]
-
-                sys.stderr.write("overall selMLE risk " + str(risk_selMLE / float(i + 1)) + "\n")
-                sys.stderr.write("overall indep est risk " + str(risk_indest / float(i + 1)) + "\n")
-                sys.stderr.write("overall randomized LASSO est risk " + str(risk_LASSO_rand / float(i + 1)) + "\n")
-                sys.stderr.write(
-                    "overall relaxed rand LASSO est risk " + str(risk_relLASSO_rand / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("overall relLASSO risk " + str(risk_relLASSO_nonrand / float(i + 1)) + "\n")
-                sys.stderr.write("overall LASSO risk " + str(risk_LASSO_nonrand / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("overall selective coverage " + str(coverage_selMLE / float(i + 1)) + "\n")
-                sys.stderr.write("overall unad coverage " + str(coverage_unad / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("overall selective length " + str(length_sel / float(i + 1)) + "\n")
-                sys.stderr.write("overall unad length " + str(length_unad / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("overall selective power " + str(power_sel / float(i + 1)) + "\n")
-                sys.stderr.write("overall unad power " + str(power_unad / float(i + 1)) + "\n" + "\n")
-
-                sys.stderr.write("iteration completed " + str(i + 1) + "\n")
-
-    df_master.to_csv("/Users/snigdhapanigrahi/adjusted_MLE/results/metrics_selected_target_medium.csv", index=False)
-    df_risk.to_csv("/Users/snigdhapanigrahi/adjusted_MLE/results/risk_selected_target_medium.csv", index=False)
\ No newline at end of file
+                output_overall += np.squeeze(output)
+
+        output_overall /= ndraw
+        metrics_selective_MLE = pd.DataFrame({"sample_size": n,
+                                              "regression_dim": p,
+                                              "correlation": rho,
+                                              "SNR": snr,
+                                              "signal_type": beta_type,
+                                              "risk": output_overall[0],
+                                              "coverage": output_overall[6],
+                                              "length": output_overall[9],
+                                              "power": output_overall[12],
+                                              "fdr": output_overall[18],
+                                              "power_post_BH": output_overall[15],
+                                              "method": "Selective MLE",
+                                              "tuning": tuning}, index=[0])
+
+        metrics_randomized_LASSO = pd.DataFrame({"sample_size": n,
+                                                 "regression_dim": p,
+                                                 "correlation": rho,
+                                                 "SNR": snr,
+                                                 "signal_type": beta_type,
+                                                 "risk": output_overall[2],
+                                                 "coverage": 0.,
+                                                 "length": 0.,
+                                                 "power": 0.,
+                                                 "fdr": 0.,
+                                                 "power_post_BH": 0.,
+                                                 "method": "Randomized LASSO",
+                                                 "tuning": tuning}, index=[0])
+
+
+        metrics_Lee = pd.DataFrame({"sample_size": n,
+                                    "regression_dim": p,
+                                    "correlation": rho,
+                                    "SNR": snr,
+                                    "signal_type": beta_type,
+                                    "risk": output_overall[5],
+                                    "coverage": output_overall[7],
+                                    "length": output_overall[10],
+                                    "power": output_overall[13],
+                                    "fdr": output_overall[19],
+                                    "power_post_BH": output_overall[16],
+                                    "method": "Lee",
+                                    "tuning": tuning}, index=[0])
+
+        metrics_unad = pd.DataFrame({"sample_size": n,
+                                     "regression_dim": p,
+                                     "correlation": rho,
+                                     "SNR": snr,
+                                     "signal_type": beta_type,
+                                     "risk": output_overall[5],
+                                     "coverage": output_overall[8],
+                                     "length": output_overall[11],
+                                     "power": output_overall[14],
+                                     "fdr": output_overall[20],
+                                     "power_post_BH": output_overall[17],
+                                     "method": "Naive",
+                                     "tuning": tuning}, index=[0])
+
+        metrics = pd.DataFrame({"sample_size": n,
+                                "regression_dim": p,
+                                "correlation": rho,
+                                "SNR": snr,
+                                "signal_type": beta_type,
+                                "Risk_selMLE": output_overall[0],
+                                "Risk_indest": output_overall[1],
+                                "Risk_LASSO_rand": output_overall[2],
+                                "Risk_relLASSO_rand": output_overall[3],
+                                "Risk_relLASSO_nonrand": output_overall[4],
+                                "Risk_LASSO_nonrand": output_overall[5],
+                                "tuning": tuning}, index=[0])
+
+        df_master = df_master.append(metrics_selective_MLE, ignore_index=True)
+        df_master = df_master.append(metrics_randomized_LASSO, ignore_index=True)
+        df_master = df_master.append(metrics_Lee, ignore_index=True)
+        df_master = df_master.append(metrics_unad, ignore_index=True)
+        df_risk = df_risk.append(metrics, ignore_index=True)
+
+    outfile_metrics = os.path.join(outpath, "metrics_beta_type"+ str(beta_type)+"_"+target+".csv")
+    outfile_risk = os.path.join(outpath, "risk_beta_type" + str(beta_type) + "_" + target + ".csv")
+    df_master.to_csv(outfile_metrics, index=False)
+    df_risk.to_csv(outfile_risk, index=False)
+
+write_ouput("/Users/snigdhapanigrahi/adjusted_MLE/results", n=500, p=100, rho=0.35, s=5, beta_type=1,
+            target="selected", tuning = "selective_MLE", randomizing_scale= np.sqrt(0.25), ndraw = 50)

From 2b293cc76e3ab8461f6e0b84d2192b8b28bee3db Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 12 Apr 2018 15:07:07 -0700
Subject: [PATCH 577/617] adding average selected size and discoveries

---
 .../adjusted_MLE/tests/test_inferential_metrics.py     |  8 +++++++-
 selection/adjusted_MLE/tests/test_risk_coverage.py     | 10 +++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 72c7d632e..e28de2afe 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -465,7 +465,13 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
                           power_unad_dis,
                           fdr_sel_dis,
                           fdr_Lee_dis,
-                          fdr_unad_dis))
+                          fdr_unad_dis,
+                          nonzero.sum(),
+                          nactive_LASSO,
+                          nactive_nonrand,
+                          sel_discoveries.sum(),
+                          Lee_discoveries.sum(),
+                          unad_discoveries.sum()))
 
 
 if __name__ == "__main__":
diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index ec596d1bd..518d1ace2 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -49,7 +49,7 @@ def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selec
                                                         full_dispersion=full_dispersion)
                 output_overall += np.squeeze(output)
 
-        output_overall /= ndraw
+        output_overall /= float(ndraw)
         metrics_selective_MLE = pd.DataFrame({"sample_size": n,
                                               "regression_dim": p,
                                               "correlation": rho,
@@ -61,6 +61,8 @@ def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selec
                                               "power": output_overall[12],
                                               "fdr": output_overall[18],
                                               "power_post_BH": output_overall[15],
+                                              "nactive": output_overall[21],
+                                              "ndiscoveries": output_overall[24],
                                               "method": "Selective MLE",
                                               "tuning": tuning}, index=[0])
 
@@ -75,6 +77,8 @@ def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selec
                                                  "power": 0.,
                                                  "fdr": 0.,
                                                  "power_post_BH": 0.,
+                                                 "nactive": output_overall[21],
+                                                 "ndiscoveries": 0.,
                                                  "method": "Randomized LASSO",
                                                  "tuning": tuning}, index=[0])
 
@@ -90,6 +94,8 @@ def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selec
                                     "power": output_overall[13],
                                     "fdr": output_overall[19],
                                     "power_post_BH": output_overall[16],
+                                    "nactive": output_overall[22],
+                                    "ndiscoveries": output_overall[25],
                                     "method": "Lee",
                                     "tuning": tuning}, index=[0])
 
@@ -104,6 +110,8 @@ def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selec
                                      "power": output_overall[14],
                                      "fdr": output_overall[20],
                                      "power_post_BH": output_overall[17],
+                                     "nactive": output_overall[23],
+                                     "ndiscoveries": output_overall[26],
                                      "method": "Naive",
                                      "tuning": tuning}, index=[0])
 

From ffddcaa23b12c5a297cb7713d6d02ac3ca638d76 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 12 Apr 2018 16:11:02 -0700
Subject: [PATCH 578/617] run test for a grid of snr values

---
 .../tests/test_inferential_metrics.py         | 28 +++++++++++++++++--
 .../adjusted_MLE/tests/test_risk_coverage.py  |  2 +-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index e28de2afe..10863ec67 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -306,7 +306,13 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
                           power_unad_dis,
                           fdr_sel_dis,
                           fdr_Lee_dis,
-                          fdr_unad_dis))
+                          fdr_unad_dis,
+                          nonzero.sum(),
+                          nactive_LASSO,
+                          nactive_nonrand,
+                          sel_discoveries.sum(),
+                          Lee_discoveries.sum(),
+                          unad_discoveries.sum()))
 
 def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_type=2,
                                    snr=0.2, randomizer_scale=0.5, target = "full",
@@ -477,7 +483,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 if __name__ == "__main__":
 
     ndraw = 50
-    output_overall = np.zeros(21)
+    output_overall = np.zeros(27)
 
     target = "selected"
     tuning = "selective_MLE"
@@ -488,6 +494,8 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
                                                         randomizer_scale=np.sqrt(0.5), target=target, tuning= tuning,
                                                         full_dispersion=True)
+
+            print("output", output)
             output_overall += np.squeeze(output)
 
             sys.stderr.write("overall selMLE risk " + str(output_overall[0] / float(i + 1)) + "\n")
@@ -519,6 +527,14 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             sys.stderr.write("overall Lee power post BH  " + str(output_overall[16] / float(i + 1)) + "\n")
             sys.stderr.write("overall unad power post BH " + str(output_overall[17] / float(i + 1)) + "\n" + "\n")
 
+            sys.stderr.write("average selective nactive " + str(output_overall[21] / float(i + 1)) + "\n")
+            sys.stderr.write("average Lee nactive  " + str(output_overall[22] / float(i + 1)) + "\n")
+            sys.stderr.write("average tuned LASSO nactive " + str(output_overall[23] / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("average selective discoveries " + str(output_overall[24] / float(i + 1)) + "\n")
+            sys.stderr.write("average Lee discoveries " + str(output_overall[25] / float(i + 1)) + "\n")
+            sys.stderr.write("average tuned LASSO discoveries " + str(output_overall[26] / float(i + 1)) + "\n" + "\n")
+
             sys.stderr.write("iteration completed " + str(i + 1) + "\n")
 
     elif target == "full":
@@ -561,4 +577,12 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
             sys.stderr.write("overall Lee power post BH  " + str(output_overall[16] / float(i + 1)) + "\n")
             sys.stderr.write("overall unad power post BH " + str(output_overall[17] / float(i + 1)) + "\n" + "\n")
 
+            sys.stderr.write("average selective nactive " + str(output_overall[21] / float(i + 1)) + "\n")
+            sys.stderr.write("average Lee nactive  " + str(output_overall[22] / float(i + 1)) + "\n")
+            sys.stderr.write("average tuned LASSO nactive " + str(output_overall[23] / float(i + 1)) + "\n" + "\n")
+
+            sys.stderr.write("average selective discoveries " + str(output_overall[24] / float(i + 1)) + "\n")
+            sys.stderr.write("average Lee discoveries " + str(output_overall[25] / float(i + 1)) + "\n")
+            sys.stderr.write("average tuned LASSO discoveries " + str(output_overall[26] / float(i + 1)) + "\n" + "\n")
+
             sys.stderr.write("iteration completed " + str(i + 1) + "\n")
diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index 518d1ace2..baac01118 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -26,7 +26,7 @@ def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selec
     #snr_values = np.array([0.05, 0.10])
     for snr in snr_values:
 
-        output_overall = np.zeros(21)
+        output_overall = np.zeros(27)
 
         if target == "selected":
             for i in range(ndraw):

From 95ee767855b54a0411afaf509aa182a46d66abf7 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 12 Apr 2018 23:04:07 -0700
Subject: [PATCH 579/617] add output files

---
 .../metrics_beta_type1_full_rho_0.35.csv      | 41 +++++++++++++++++++
 .../metrics_beta_type1_full_rho_0.7.csv       | 41 +++++++++++++++++++
 .../output/metrics_beta_type1_full_rho_0.csv  | 41 +++++++++++++++++++
 .../metrics_beta_type1_selected_rho_0.35.csv  | 41 +++++++++++++++++++
 .../metrics_beta_type1_selected_rho_0.7.csv   | 41 +++++++++++++++++++
 .../metrics_beta_type1_selected_rho_0.csv     | 41 +++++++++++++++++++
 .../output/risk_beta_type1_full_rho_0.35.csv  | 11 +++++
 .../output/risk_beta_type1_full_rho_0.7.csv   | 11 +++++
 .../output/risk_beta_type1_full_rho_0.csv     | 11 +++++
 .../risk_beta_type1_selected_rho_0.35.csv     | 11 +++++
 .../risk_beta_type1_selected_rho_0.7.csv      | 11 +++++
 .../output/risk_beta_type1_selected_rho_0.csv | 11 +++++
 12 files changed, 312 insertions(+)
 create mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.35.csv
 create mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.7.csv
 create mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.csv
 create mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.35.csv
 create mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.7.csv
 create mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.csv
 create mode 100644 selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.35.csv
 create mode 100644 selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.7.csv
 create mode 100644 selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.csv
 create mode 100644 selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.35.csv
 create mode 100644 selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.7.csv
 create mode 100644 selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.csv

diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.35.csv b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.35.csv
new file mode 100644
index 000000000..709cab5b5
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.35.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0.35,0.931461038961,0.1,2.41544824566,Selective MLE,6.16,0.58,0.152,0.088,100,1.19947480531,500,1,selective_MLE
+0.05,0.35,0.0,0.0,0.0,Randomized LASSO,6.16,0.0,0.0,0.0,100,0.838899806485,500,1,selective_MLE
+0.05,0.35,0.884926599127,0.05425,inf,Lee,17.12,0.78,0.188,0.072,100,0.724816854623,500,1,selective_MLE
+0.05,0.35,0.65701749871,0.38,1.55605689956,Naive,19.96,0.88,0.552,0.0,100,0.724816854623,500,1,selective_MLE
+0.1,0.35,0.923951051872,0.0723333333333,1.60286679569,Selective MLE,6.68,2.02,0.464,0.372,100,0.617980303537,500,1,selective_MLE
+0.1,0.35,0.0,0.0,0.0,Randomized LASSO,6.68,0.0,0.0,0.0,100,0.623250677108,500,1,selective_MLE
+0.1,0.35,0.829751327499,0.136298427063,inf,Lee,19.16,2.74,0.488,0.316,100,0.385265083675,500,1,selective_MLE
+0.1,0.35,0.674292607555,0.32,1.0957955719,Naive,22.72,0.62,0.848,0.0,100,0.385265083675,500,1,selective_MLE
+0.15,0.35,0.919792596293,0.0436666666667,1.24993790514,Selective MLE,7.34,3.34,0.704,0.64,100,0.375999447603,500,1,selective_MLE
+0.15,0.35,0.0,0.0,0.0,Randomized LASSO,7.34,0.0,0.0,0.0,100,0.542201834918,500,1,selective_MLE
+0.15,0.35,0.860987230522,0.0820341880342,inf,Lee,20.62,3.34,0.656,0.54,100,0.270390483342,500,1,selective_MLE
+0.15,0.35,0.652876573256,0.34,0.893430986125,Naive,23.84,0.68,0.952,0.0,100,0.270390483342,500,1,selective_MLE
+0.2,0.35,0.926208791209,0.013,1.07711888638,Selective MLE,7.52,4.06,0.86,0.8,100,0.222436708189,500,1,selective_MLE
+0.2,0.35,0.0,0.0,0.0,Randomized LASSO,7.52,0.0,0.0,0.0,100,0.446913741016,500,1,selective_MLE
+0.2,0.35,0.832607143904,0.0939413919414,inf,Lee,21.2,3.5,0.648,0.572,100,0.217031859955,500,1,selective_MLE
+0.2,0.35,0.656193739552,0.34,0.778513197816,Naive,23.04,0.58,0.984,0.0,100,0.217031859955,500,1,selective_MLE
+0.25,0.35,0.896191475191,0.028,0.954438262285,Selective MLE,7.96,4.76,0.92,0.92,100,0.136180132365,500,1,selective_MLE
+0.25,0.35,0.0,0.0,0.0,Randomized LASSO,7.96,0.0,0.0,0.0,100,0.369746575113,500,1,selective_MLE
+0.25,0.35,0.867119718639,0.126863636364,inf,Lee,22.62,3.7,0.692,0.528,100,0.183191135704,500,1,selective_MLE
+0.25,0.35,0.673454163252,0.36,0.70260871614,Naive,24.7,0.72,0.996,0.0,100,0.183191135704,500,1,selective_MLE
+0.3,0.35,0.922422355422,0.018,0.846864516823,Selective MLE,7.12,4.78,0.944,0.936,100,0.124306493466,500,1,selective_MLE
+0.3,0.35,0.0,0.0,0.0,Randomized LASSO,7.12,0.0,0.0,0.0,100,0.370077049834,500,1,selective_MLE
+0.3,0.35,0.900479439176,0.059,inf,Lee,22.32,3.32,0.668,0.604,100,0.139899752608,500,1,selective_MLE
+0.3,0.35,0.653521031881,0.44,0.639842749189,Naive,25.14,0.96,1.0,0.0,100,0.139899752608,500,1,selective_MLE
+0.42,0.35,0.89451037851,0.0233333333333,0.695195505914,Selective MLE,6.82,5.12,0.996,0.996,100,0.067374298508,500,1,selective_MLE
+0.42,0.35,0.0,0.0,0.0,Randomized LASSO,6.82,0.0,0.0,0.0,100,0.310468898242,500,1,selective_MLE
+0.42,0.35,0.866246270431,0.131911255411,inf,Lee,21.74,4.34,0.776,0.704,100,0.101985001419,500,1,selective_MLE
+0.42,0.35,0.645621038488,0.32,0.535115175216,Naive,23.98,0.68,1.0,0.0,100,0.101985001419,500,1,selective_MLE
+0.71,0.35,0.915206349206,0.00666666666667,0.517475359883,Selective MLE,6.68,5.04,1.0,1.0,100,0.0317729502039,500,1,selective_MLE
+0.71,0.35,0.0,0.0,0.0,Randomized LASSO,6.68,0.0,0.0,0.0,100,0.218910141131,500,1,selective_MLE
+0.71,0.35,0.841226328389,0.153599439776,inf,Lee,22.34,5.2,0.844,0.716,100,0.0569139003612,500,1,selective_MLE
+0.71,0.35,0.662128719316,0.46,0.411939807863,Naive,25.74,0.88,1.0,0.0,100,0.0569139003612,500,1,selective_MLE
+1.22,0.35,0.896861111111,0.00333333333333,0.399786803636,Selective MLE,6.52,5.02,1.0,1.0,100,0.0176700251849,500,1,selective_MLE
+1.22,0.35,0.0,0.0,0.0,Randomized LASSO,6.52,0.0,0.0,0.0,100,0.182617145112,500,1,selective_MLE
+1.22,0.35,0.877158606178,0.072,inf,Lee,22.22,4.54,0.868,0.812,100,0.0329382817335,500,1,selective_MLE
+1.22,0.35,0.683593512131,0.26,0.321334855624,Naive,25.94,0.7,1.0,0.0,100,0.0329382817335,500,1,selective_MLE
+2.07,0.35,0.883165223665,0.0157142857143,0.301333150726,Selective MLE,6.24,5.1,1.0,1.0,100,0.0116313177681,500,1,selective_MLE
+2.07,0.35,0.0,0.0,0.0,Randomized LASSO,6.24,0.0,0.0,0.0,100,0.100893025098,500,1,selective_MLE
+2.07,0.35,0.881958794089,0.101575091575,inf,Lee,19.76,5.28,0.932,0.9,100,0.0207267202668,500,1,selective_MLE
+2.07,0.35,0.626224030054,0.42,0.242265511428,Naive,23.18,1.08,1.0,0.0,100,0.0207267202668,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.7.csv b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.7.csv
new file mode 100644
index 000000000..b9ea473ce
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.7.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0.7,0.905238095238,0.04,2.95333681974,Selective MLE,3.72,0.3,0.112,0.048,100,1.11864047232,500,1,selective_MLE
+0.05,0.7,0.0,0.0,0.0,Randomized LASSO,3.72,0.0,0.0,0.0,100,0.826745258299,500,1,selective_MLE
+0.05,0.7,0.896339366858,0.0583333333333,inf,Lee,16.06,0.48,0.16,0.064,100,0.606481746444,500,1,selective_MLE
+0.05,0.7,0.718009953293,0.36,1.80323034055,Naive,18.22,0.78,0.38,0.0,100,0.606481746444,500,1,selective_MLE
+0.1,0.7,0.897138167388,0.0666666666667,2.03805744419,Selective MLE,5.58,1.24,0.328,0.228,100,0.812188963578,500,1,selective_MLE
+0.1,0.7,0.0,0.0,0.0,Randomized LASSO,5.58,0.0,0.0,0.0,100,0.700295664431,500,1,selective_MLE
+0.1,0.7,0.834135047629,0.109545454545,inf,Lee,19.6,1.5,0.356,0.18,100,0.398650296901,500,1,selective_MLE
+0.1,0.7,0.724421219274,0.34,1.2924447882,Naive,21.66,0.4,0.652,0.0,100,0.398650296901,500,1,selective_MLE
+0.15,0.7,0.869679172679,0.0613333333333,1.64987078154,Selective MLE,7.48,1.82,0.48,0.332,100,0.591789402777,500,1,selective_MLE
+0.15,0.7,0.0,0.0,0.0,Randomized LASSO,7.48,0.0,0.0,0.0,100,0.586732001573,500,1,selective_MLE
+0.15,0.7,0.871529817256,0.113658730159,inf,Lee,21.84,2.18,0.452,0.308,100,0.266817960717,500,1,selective_MLE
+0.15,0.7,0.735953965022,0.32,1.08356718193,Naive,23.92,0.56,0.76,0.0,100,0.266817960717,500,1,selective_MLE
+0.2,0.7,0.851695443445,0.061380952381,1.39842783719,Selective MLE,7.5,2.96,0.624,0.544,100,0.40776192466,500,1,selective_MLE
+0.2,0.7,0.0,0.0,0.0,Randomized LASSO,7.5,0.0,0.0,0.0,100,0.487626752228,500,1,selective_MLE
+0.2,0.7,0.867552980668,0.0930555555556,inf,Lee,20.28,2.18,0.552,0.336,100,0.207599545724,500,1,selective_MLE
+0.2,0.7,0.692427739069,0.34,0.925488873517,Naive,22.3,0.5,0.82,0.0,100,0.207599545724,500,1,selective_MLE
+0.25,0.7,0.895587313014,0.0733333333333,1.27619828265,Selective MLE,8.16,3.38,0.728,0.62,100,0.300554430254,500,1,selective_MLE
+0.25,0.7,0.0,0.0,0.0,Randomized LASSO,8.16,0.0,0.0,0.0,100,0.451547708341,500,1,selective_MLE
+0.25,0.7,0.875634221242,0.115936507937,inf,Lee,21.28,3.02,0.576,0.428,100,0.178457205606,500,1,selective_MLE
+0.25,0.7,0.726470926607,0.38,0.841723670385,Naive,23.24,0.7,0.9,0.0,100,0.178457205606,500,1,selective_MLE
+0.3,0.7,0.88966045066,0.0506666666667,1.12991162944,Selective MLE,7.08,4.08,0.808,0.768,100,0.239662294933,500,1,selective_MLE
+0.3,0.7,0.0,0.0,0.0,Randomized LASSO,7.08,0.0,0.0,0.0,100,0.417466476111,500,1,selective_MLE
+0.3,0.7,0.898605992125,0.118976190476,inf,Lee,20.38,3.12,0.628,0.528,100,0.142653661284,500,1,selective_MLE
+0.3,0.7,0.714628649891,0.46,0.754701079716,Naive,22.32,0.64,0.96,0.0,100,0.142653661284,500,1,selective_MLE
+0.42,0.7,0.898163780664,0.02,0.952282599856,Selective MLE,7.3,4.78,0.948,0.932,100,0.135011251127,500,1,selective_MLE
+0.42,0.7,0.0,0.0,0.0,Randomized LASSO,7.3,0.0,0.0,0.0,100,0.343633849642,500,1,selective_MLE
+0.42,0.7,0.862383839929,0.113658730159,inf,Lee,22.16,3.32,0.692,0.54,100,0.100564129182,500,1,selective_MLE
+0.42,0.7,0.728642923069,0.42,0.645102579648,Naive,24.06,0.68,0.98,0.0,100,0.100564129182,500,1,selective_MLE
+0.71,0.7,0.905436507937,0.022380952381,0.725954560251,Selective MLE,6.62,5.08,0.988,0.988,100,0.0660453156033,500,1,selective_MLE
+0.71,0.7,0.0,0.0,0.0,Randomized LASSO,6.62,0.0,0.0,0.0,100,0.325589733329,500,1,selective_MLE
+0.71,0.7,0.879464321309,0.0939285714286,inf,Lee,20.46,4.26,0.816,0.744,100,0.0622398248064,500,1,selective_MLE
+0.71,0.7,0.706791161013,0.38,0.498224619244,Naive,23.26,0.8,1.0,0.0,100,0.0622398248064,500,1,selective_MLE
+1.22,0.7,0.897117604618,0.0233333333333,0.553150093591,Selective MLE,6.66,5.14,1.0,1.0,100,0.0314691475029,500,1,selective_MLE
+1.22,0.7,0.0,0.0,0.0,Randomized LASSO,6.66,0.0,0.0,0.0,100,0.20922378322,500,1,selective_MLE
+1.22,0.7,0.84938062082,0.11780952381,inf,Lee,22.3,4.78,0.832,0.764,100,0.034510480008,500,1,selective_MLE
+1.22,0.7,0.734174716546,0.38,0.384944868613,Naive,25.12,0.64,1.0,0.0,100,0.034510480008,500,1,selective_MLE
+2.07,0.7,0.895259018759,0.0233333333333,0.41944806981,Selective MLE,6.62,5.14,1.0,1.0,100,0.0178486248352,500,1,selective_MLE
+2.07,0.7,0.0,0.0,0.0,Randomized LASSO,6.62,0.0,0.0,0.0,100,0.115974002994,500,1,selective_MLE
+2.07,0.7,0.853498348449,0.117346153846,inf,Lee,22.68,4.68,0.82,0.772,100,0.0205041933808,500,1,selective_MLE
+2.07,0.7,0.753284561051,0.34,0.296225025241,Naive,24.9,0.66,1.0,0.0,100,0.0205041933808,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.csv b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.csv
new file mode 100644
index 000000000..8bbf349b8
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0,0.937457042957,0.02,2.43870954381,Selective MLE,4.72,0.56,0.192,0.1,100,1.02796717205,500,1,selective_MLE
+0.05,0,0.0,0.0,0.0,Randomized LASSO,4.72,0.0,0.0,0.0,100,0.820946505923,500,1,selective_MLE
+0.05,0,0.902203680618,0.035,inf,Lee,15.54,0.94,0.276,0.132,100,0.652411550711,500,1,selective_MLE
+0.05,0,0.565987015067,0.5,1.50601151103,Naive,18.58,1.34,0.7,0.0,100,0.652411550711,500,1,selective_MLE
+0.1,0,0.926107992008,0.0416666666667,1.60063013697,Selective MLE,8.68,2.0,0.48,0.38,100,0.682772681521,500,1,selective_MLE
+0.1,0,0.0,0.0,0.0,Randomized LASSO,8.68,0.0,0.0,0.0,100,0.615859220351,500,1,selective_MLE
+0.1,0,0.788245175539,0.166719169719,inf,Lee,20.36,3.2,0.556,0.372,100,0.418810019872,500,1,selective_MLE
+0.1,0,0.593770391156,0.48,1.06776996874,Naive,24.36,1.22,0.912,0.0,100,0.418810019872,500,1,selective_MLE
+0.15,0,0.938626762127,0.004,1.22111486797,Selective MLE,7.28,3.22,0.708,0.64,100,0.325984583304,500,1,selective_MLE
+0.15,0,0.0,0.0,0.0,Randomized LASSO,7.28,0.0,0.0,0.0,100,0.5151162648,500,1,selective_MLE
+0.15,0,0.873978371044,0.0903992673993,inf,Lee,21.74,2.86,0.644,0.464,100,0.280431627709,500,1,selective_MLE
+0.15,0,0.631333350474,0.38,0.873398104552,Naive,25.08,0.92,0.964,0.0,100,0.280431627709,500,1,selective_MLE
+0.2,0,0.891768897769,0.0206666666667,1.0338155556,Selective MLE,8.28,4.32,0.872,0.844,100,0.215462021939,500,1,selective_MLE
+0.2,0,0.0,0.0,0.0,Randomized LASSO,8.28,0.0,0.0,0.0,100,0.401905491611,500,1,selective_MLE
+0.2,0,0.861183444566,0.0970952380952,inf,Lee,23.46,3.48,0.74,0.536,100,0.214846497925,500,1,selective_MLE
+0.2,0,0.630855949609,0.34,0.759580774553,Naive,26.48,0.86,0.992,0.0,100,0.214846497925,500,1,selective_MLE
+0.25,0,0.905975468975,0.024,0.899819168512,Selective MLE,7.42,4.54,0.9,0.88,100,0.174473785317,500,1,selective_MLE
+0.25,0,0.0,0.0,0.0,Randomized LASSO,7.42,0.0,0.0,0.0,100,0.421809411384,500,1,selective_MLE
+0.25,0,0.864400247066,0.125833333333,inf,Lee,21.38,4.28,0.764,0.668,100,0.182037721298,500,1,selective_MLE
+0.25,0,0.608578806998,0.48,0.676868448936,Naive,24.06,1.3,0.996,0.0,100,0.182037721298,500,1,selective_MLE
+0.3,0,0.906860805861,0.0197142857143,0.791999074151,Selective MLE,7.0,4.94,0.964,0.964,100,0.118313600765,500,1,selective_MLE
+0.3,0,0.0,0.0,0.0,Randomized LASSO,7.0,0.0,0.0,0.0,100,0.333848112123,500,1,selective_MLE
+0.3,0,0.883543995909,0.0591904761905,inf,Lee,20.82,3.72,0.736,0.656,100,0.150299675758,500,1,selective_MLE
+0.3,0,0.615124498408,0.34,0.616692047402,Naive,24.16,0.8,1.0,0.0,100,0.150299675758,500,1,selective_MLE
+0.42,0,0.895063492063,0.022380952381,0.656207992641,Selective MLE,7.32,5.1,0.996,0.992,100,0.0685267959665,500,1,selective_MLE
+0.42,0,0.0,0.0,0.0,Randomized LASSO,7.32,0.0,0.0,0.0,100,0.278841228658,500,1,selective_MLE
+0.42,0,0.853230856303,0.144404761905,inf,Lee,21.96,4.54,0.8,0.72,100,0.122385160693,500,1,selective_MLE
+0.42,0,0.597283994482,0.44,0.52081007883,Naive,25.86,1.3,1.0,0.0,100,0.122385160693,500,1,selective_MLE
+0.71,0,0.895963768116,0.01,0.489990645513,Selective MLE,6.5,5.06,1.0,1.0,100,0.0302118943543,500,1,selective_MLE
+0.71,0,0.0,0.0,0.0,Randomized LASSO,6.5,0.0,0.0,0.0,100,0.200842080649,500,1,selective_MLE
+0.71,0,0.840865259701,0.129703463203,inf,Lee,21.3,4.52,0.78,0.736,100,0.064742081091,500,1,selective_MLE
+0.71,0,0.605603797089,0.44,0.404439089414,Naive,24.74,0.98,1.0,0.0,100,0.064742081091,500,1,selective_MLE
+1.22,0,0.878015151515,0.0,0.368012101716,Selective MLE,6.48,5.0,1.0,1.0,100,0.0178112548381,500,1,selective_MLE
+1.22,0,0.0,0.0,0.0,Randomized LASSO,6.48,0.0,0.0,0.0,100,0.153741474347,500,1,selective_MLE
+1.22,0,0.887908101558,0.0727619047619,inf,Lee,22.08,5.1,0.916,0.872,100,0.0355829221315,500,1,selective_MLE
+1.22,0,0.600077278822,0.44,0.305925814842,Naive,24.76,1.22,1.0,0.0,100,0.0355829221315,500,1,selective_MLE
+2.07,0,0.884706349206,0.0114285714286,0.27689442939,Selective MLE,6.18,5.08,1.0,1.0,100,0.0105093060895,500,1,selective_MLE
+2.07,0,0.0,0.0,0.0,Randomized LASSO,6.18,0.0,0.0,0.0,100,0.0905511133875,500,1,selective_MLE
+2.07,0,0.856255336237,0.12569047619,inf,Lee,21.82,5.48,0.94,0.904,100,0.0192982775325,500,1,selective_MLE
+2.07,0,0.611937525472,0.42,0.234382449577,Naive,25.3,0.84,1.0,0.0,100,0.0192982775325,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.35.csv b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.35.csv
new file mode 100644
index 000000000..37717b576
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.35.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0.35,0.953446391446,0.01,2.62255933497,Selective MLE,6.74,0.3,0.128,0.056,100,1.37873397223,500,1,selective_MLE
+0.05,0.35,0.0,0.0,0.0,Randomized LASSO,6.74,0.0,0.0,0.0,100,0.85718568517,500,1,selective_MLE
+0.05,0.35,0.91206634392,0.0466666666667,inf,Lee,14.82,0.64,0.18,0.088,100,0.697798250784,500,1,selective_MLE
+0.05,0.35,0.623911071893,0.54,1.53350350149,Naive,17.72,1.18,0.6,0.0,100,0.697798250784,500,1,selective_MLE
+0.1,0.35,0.94304956155,0.038,1.71400822216,Selective MLE,7.9,1.6,0.416,0.304,100,0.72190312741,500,1,selective_MLE
+0.1,0.35,0.0,0.0,0.0,Randomized LASSO,7.9,0.0,0.0,0.0,100,0.636428859402,500,1,selective_MLE
+0.1,0.35,0.831318293013,0.107522536287,inf,Lee,19.74,2.22,0.5,0.252,100,0.419309318668,500,1,selective_MLE
+0.1,0.35,0.647898230764,0.3,1.09488163635,Naive,23.06,0.64,0.868,0.0,100,0.419309318668,500,1,selective_MLE
+0.15,0.35,0.893418470418,0.0477142857143,1.33303417535,Selective MLE,8.8,3.24,0.656,0.608,100,0.527093447425,500,1,selective_MLE
+0.15,0.35,0.0,0.0,0.0,Randomized LASSO,8.8,0.0,0.0,0.0,100,0.532820557278,500,1,selective_MLE
+0.15,0.35,0.883129892952,0.0510303030303,inf,Lee,22.82,2.46,0.532,0.392,100,0.30931592898,500,1,selective_MLE
+0.15,0.35,0.656039279891,0.4,0.904728692949,Naive,25.4,1.12,0.94,0.0,100,0.30931592898,500,1,selective_MLE
+0.2,0.35,0.904584804085,0.0482142857143,1.09913086753,Selective MLE,9.22,3.96,0.772,0.744,100,0.323355132192,500,1,selective_MLE
+0.2,0.35,0.0,0.0,0.0,Randomized LASSO,9.22,0.0,0.0,0.0,100,0.444429877595,500,1,selective_MLE
+0.2,0.35,0.881195349887,0.0685714285714,inf,Lee,21.24,3.4,0.692,0.588,100,0.246305559448,500,1,selective_MLE
+0.2,0.35,0.642143598466,0.36,0.771359441676,Naive,23.94,0.84,0.988,0.0,100,0.246305559448,500,1,selective_MLE
+0.25,0.35,0.888728485567,0.0173333333333,0.937853190268,Selective MLE,8.18,4.76,0.94,0.932,100,0.18706333101,500,1,selective_MLE
+0.25,0.35,0.0,0.0,0.0,Randomized LASSO,8.18,0.0,0.0,0.0,100,0.360765235691,500,1,selective_MLE
+0.25,0.35,0.864023356123,0.0857748917749,inf,Lee,21.66,3.42,0.704,0.584,100,0.174246008689,500,1,selective_MLE
+0.25,0.35,0.645451554632,0.38,0.699039380918,Naive,23.56,0.74,0.996,0.0,100,0.174246008689,500,1,selective_MLE
+0.3,0.35,0.900941284206,0.0166666666667,0.81640366547,Selective MLE,7.18,4.96,0.98,0.972,100,0.11590795158,500,1,selective_MLE
+0.3,0.35,0.0,0.0,0.0,Randomized LASSO,7.18,0.0,0.0,0.0,100,0.336916782573,500,1,selective_MLE
+0.3,0.35,0.910495466961,0.0765,inf,Lee,20.5,3.78,0.78,0.676,100,0.134503703797,500,1,selective_MLE
+0.3,0.35,0.651415225722,0.32,0.635206913155,Naive,23.18,0.82,1.0,0.0,100,0.134503703797,500,1,selective_MLE
+0.42,0.35,0.930399240856,0.00333333333333,0.639483506134,Selective MLE,6.84,5.02,1.0,1.0,100,0.0500593814501,500,1,selective_MLE
+0.42,0.35,0.0,0.0,0.0,Randomized LASSO,6.84,0.0,0.0,0.0,100,0.256875358635,500,1,selective_MLE
+0.42,0.35,0.832160402818,0.127043015808,inf,Lee,21.72,4.44,0.808,0.688,100,0.101018740148,500,1,selective_MLE
+0.42,0.35,0.686047173525,0.22,0.537081992933,Naive,24.7,0.68,1.0,0.0,100,0.101018740148,500,1,selective_MLE
+0.71,0.35,0.876014430014,0.01,0.480635758239,Selective MLE,6.94,5.06,1.0,1.0,100,0.0354428715806,500,1,selective_MLE
+0.71,0.35,0.0,0.0,0.0,Randomized LASSO,6.94,0.0,0.0,0.0,100,0.177950947921,500,1,selective_MLE
+0.71,0.35,0.811317398691,0.147659340659,inf,Lee,20.72,4.9,0.82,0.768,100,0.0588696020544,500,1,selective_MLE
+0.71,0.35,0.656579716621,0.38,0.412422762436,Naive,23.82,0.64,1.0,0.0,100,0.0588696020544,500,1,selective_MLE
+1.22,0.35,0.862783846872,0.00666666666667,0.357782078979,Selective MLE,6.88,5.04,1.0,1.0,100,0.0196990246932,500,1,selective_MLE
+1.22,0.35,0.0,0.0,0.0,Randomized LASSO,6.88,0.0,0.0,0.0,100,0.131259024663,500,1,selective_MLE
+1.22,0.35,0.907285507789,0.062880952381,inf,Lee,21.6,4.62,0.876,0.852,100,0.0361438615056,500,1,selective_MLE
+1.22,0.35,0.616838530693,0.42,0.312798676849,Naive,24.38,1.18,1.0,0.0,100,0.0361438615056,500,1,selective_MLE
+2.07,0.35,0.87792979243,0.00666666666667,0.263935686642,Selective MLE,6.2,5.04,1.0,1.0,100,0.0111903101344,500,1,selective_MLE
+2.07,0.35,0.0,0.0,0.0,Randomized LASSO,6.2,0.0,0.0,0.0,100,0.103825117154,500,1,selective_MLE
+2.07,0.35,0.870705509603,0.0995,inf,Lee,21.42,4.76,0.888,0.824,100,0.0227142973009,500,1,selective_MLE
+2.07,0.35,0.638173272898,0.38,0.241994303429,Naive,24.26,0.92,1.0,0.0,100,0.0227142973009,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.7.csv b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.7.csv
new file mode 100644
index 000000000..b9f7b5d3d
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.7.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0.7,0.941353246753,0.0466666666667,3.12847862728,Selective MLE,8.08,0.34,0.12,0.052,100,1.66586374221,500,1,selective_MLE
+0.05,0.7,0.0,0.0,0.0,Randomized LASSO,8.08,0.0,0.0,0.0,100,0.801798637534,500,1,selective_MLE
+0.05,0.7,0.84664048404,0.106333333333,inf,Lee,18.28,1.28,0.204,0.092,100,0.661064182407,500,1,selective_MLE
+0.05,0.7,0.759772511809,0.32,1.81220998005,Naive,20.7,0.6,0.388,0.0,100,0.661064182407,500,1,selective_MLE
+0.1,0.7,0.923905114493,0.065,2.04022769938,Selective MLE,8.38,1.48,0.384,0.26,100,0.799341484436,500,1,selective_MLE
+0.1,0.7,0.0,0.0,0.0,Randomized LASSO,8.38,0.0,0.0,0.0,100,0.562341962093,500,1,selective_MLE
+0.1,0.7,0.934468458444,0.0416666666667,inf,Lee,18.56,0.82,0.328,0.148,100,0.362204790134,500,1,selective_MLE
+0.1,0.7,0.772552814909,0.36,1.30374672061,Naive,20.16,0.62,0.6,0.0,100,0.362204790134,500,1,selective_MLE
+0.15,0.7,0.909303241203,0.0996666666667,1.61825315428,Selective MLE,9.32,2.62,0.576,0.464,100,0.480043897059,500,1,selective_MLE
+0.15,0.7,0.0,0.0,0.0,Randomized LASSO,9.32,0.0,0.0,0.0,100,0.46261866559,500,1,selective_MLE
+0.15,0.7,0.857411817184,0.0915555555556,inf,Lee,20.02,2.04,0.484,0.3,100,0.246989970283,500,1,selective_MLE
+0.15,0.7,0.746438916071,0.38,1.06442385769,Naive,22.18,0.64,0.784,0.0,100,0.246989970283,500,1,selective_MLE
+0.2,0.7,0.893055028305,0.0746666666667,1.34162708639,Selective MLE,9.2,3.46,0.7,0.632,100,0.350465323309,500,1,selective_MLE
+0.2,0.7,0.0,0.0,0.0,Randomized LASSO,9.2,0.0,0.0,0.0,100,0.399987898639,500,1,selective_MLE
+0.2,0.7,0.899794766829,0.0613333333333,inf,Lee,20.04,2.3,0.544,0.4,100,0.202248144831,500,1,selective_MLE
+0.2,0.7,0.723670204707,0.36,0.936604099722,Naive,22.14,0.66,0.828,0.0,100,0.202248144831,500,1,selective_MLE
+0.25,0.7,0.901028776779,0.0600476190476,1.10528070685,Selective MLE,7.96,4.3,0.824,0.796,100,0.231265018526,500,1,selective_MLE
+0.25,0.7,0.0,0.0,0.0,Randomized LASSO,7.96,0.0,0.0,0.0,100,0.391931305213,500,1,selective_MLE
+0.25,0.7,0.869938608551,0.058,inf,Lee,19.4,2.36,0.584,0.408,100,0.172239159064,500,1,selective_MLE
+0.25,0.7,0.734517071822,0.3,0.825343778303,Naive,20.96,0.52,0.9,0.0,100,0.172239159064,500,1,selective_MLE
+0.3,0.7,0.903070593622,0.0580952380952,1.07247799185,Selective MLE,9.46,4.5,0.868,0.836,100,0.207613886764,500,1,selective_MLE
+0.3,0.7,0.0,0.0,0.0,Randomized LASSO,9.46,0.0,0.0,0.0,100,0.365459757906,500,1,selective_MLE
+0.3,0.7,0.837387555884,0.131878787879,inf,Lee,20.3,3.48,0.66,0.536,100,0.137834199808,500,1,selective_MLE
+0.3,0.7,0.725759395522,0.32,0.76482979869,Naive,22.32,0.46,0.944,0.0,100,0.137834199808,500,1,selective_MLE
+0.42,0.7,0.916862914863,0.0423333333333,0.792847708267,Selective MLE,7.96,4.94,0.952,0.944,100,0.103537820619,500,1,selective_MLE
+0.42,0.7,0.0,0.0,0.0,Randomized LASSO,7.96,0.0,0.0,0.0,100,0.321212638744,500,1,selective_MLE
+0.42,0.7,0.876272476718,0.082,inf,Lee,22.58,3.2,0.672,0.552,100,0.101927117901,500,1,selective_MLE
+0.42,0.7,0.745566797024,0.32,0.651727263064,Naive,24.72,0.64,0.988,0.0,100,0.101927117901,500,1,selective_MLE
+0.71,0.7,0.911663780664,0.00666666666667,0.574890188171,Selective MLE,7.18,5.02,1.0,0.996,100,0.0397673470199,500,1,selective_MLE
+0.71,0.7,0.0,0.0,0.0,Randomized LASSO,7.18,0.0,0.0,0.0,100,0.250400422185,500,1,selective_MLE
+0.71,0.7,0.868175712041,0.105714285714,inf,Lee,20.78,4.44,0.828,0.74,100,0.0603137823088,500,1,selective_MLE
+0.71,0.7,0.737111160385,0.5,0.493834490485,Naive,23.82,0.86,1.0,0.0,100,0.0603137823088,500,1,selective_MLE
+1.22,0.7,0.893790598291,0.0238095238095,0.421277992252,Selective MLE,7.06,5.16,1.0,1.0,100,0.0197899774304,500,1,selective_MLE
+1.22,0.7,0.0,0.0,0.0,Randomized LASSO,7.06,0.0,0.0,0.0,100,0.171959642058,500,1,selective_MLE
+1.22,0.7,0.85568554212,0.0900952380952,inf,Lee,20.96,4.4,0.836,0.768,100,0.0331405157854,500,1,selective_MLE
+1.22,0.7,0.69805206367,0.5,0.376074177624,Naive,23.7,1.1,1.0,0.0,100,0.0331405157854,500,1,selective_MLE
+2.07,0.7,0.918686094951,0.0166666666667,0.31458774565,Selective MLE,6.98,5.1,1.0,1.0,100,0.0132487406717,500,1,selective_MLE
+2.07,0.7,0.0,0.0,0.0,Randomized LASSO,6.98,0.0,0.0,0.0,100,0.105343758224,500,1,selective_MLE
+2.07,0.7,0.896404172114,0.0737619047619,inf,Lee,18.86,4.74,0.876,0.872,100,0.0196362653582,500,1,selective_MLE
+2.07,0.7,0.745607621443,0.4,0.284394427217,Naive,21.04,0.68,1.0,0.0,100,0.0196362653582,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.csv b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.csv
new file mode 100644
index 000000000..f07d7949e
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0,0.932472356019,0.03,2.6324941767,Selective MLE,5.98,0.34,0.144,0.06,100,1.32630280485,500,1,selective_MLE
+0.05,0,0.0,0.0,0.0,Randomized LASSO,5.98,0.0,0.0,0.0,100,0.850486099629,500,1,selective_MLE
+0.05,0,0.860325496125,0.0786666666667,inf,Lee,15.4,1.16,0.248,0.128,100,0.743435422189,500,1,selective_MLE
+0.05,0,0.58166636283,0.36,1.50526063476,Naive,18.64,0.72,0.624,0.0,100,0.743435422189,500,1,selective_MLE
+0.1,0,0.918493841556,0.0636666666667,1.60463392779,Selective MLE,7.62,2.08,0.452,0.368,100,0.790410383997,500,1,selective_MLE
+0.1,0,0.0,0.0,0.0,Randomized LASSO,7.62,0.0,0.0,0.0,100,0.633263657991,500,1,selective_MLE
+0.1,0,0.772588728079,0.143692918193,inf,Lee,18.2,3.18,0.592,0.384,100,0.402338513706,500,1,selective_MLE
+0.1,0,0.638335673122,0.48,1.06272306187,Naive,22.58,1.14,0.9,0.0,100,0.402338513706,500,1,selective_MLE
+0.15,0,0.930189535954,0.0426666666667,1.26801056055,Selective MLE,8.9,3.36,0.72,0.64,100,0.441970517896,500,1,selective_MLE
+0.15,0,0.0,0.0,0.0,Randomized LASSO,8.9,0.0,0.0,0.0,100,0.500278735638,500,1,selective_MLE
+0.15,0,0.861172095308,0.0819047619048,inf,Lee,23.32,2.64,0.584,0.412,100,0.311910915364,500,1,selective_MLE
+0.15,0,0.631503502131,0.4,0.87618977193,Naive,26.48,0.78,0.976,0.0,100,0.311910915364,500,1,selective_MLE
+0.2,0,0.891537668214,0.045380952381,1.06823603924,Selective MLE,9.58,4.38,0.88,0.828,100,0.295231118235,500,1,selective_MLE
+0.2,0,0.0,0.0,0.0,Randomized LASSO,9.58,0.0,0.0,0.0,100,0.41184090871,500,1,selective_MLE
+0.2,0,0.873406617318,0.0773709273183,inf,Lee,22.54,3.38,0.676,0.552,100,0.225929760535,500,1,selective_MLE
+0.2,0,0.615013356706,0.26,0.754970800244,Naive,26.28,0.58,0.992,0.0,100,0.225929760535,500,1,selective_MLE
+0.25,0,0.89275951826,0.0173333333333,0.88119704876,Selective MLE,8.18,4.64,0.924,0.908,100,0.182150423954,500,1,selective_MLE
+0.25,0,0.0,0.0,0.0,Randomized LASSO,8.18,0.0,0.0,0.0,100,0.329875217599,500,1,selective_MLE
+0.25,0,0.862133418685,0.0904706959707,inf,Lee,23.42,4.14,0.756,0.64,100,0.178438719613,500,1,selective_MLE
+0.25,0,0.611743771144,0.48,0.674957724008,Naive,26.86,1.42,1.0,0.0,100,0.178438719613,500,1,selective_MLE
+0.3,0,0.916427925016,0.0285714285714,0.79173975785,Selective MLE,7.5,5.02,0.976,0.968,100,0.111715425255,500,1,selective_MLE
+0.3,0,0.0,0.0,0.0,Randomized LASSO,7.5,0.0,0.0,0.0,100,0.298821814837,500,1,selective_MLE
+0.3,0,0.911144418584,0.0596168831169,inf,Lee,22.0,4.12,0.84,0.736,100,0.137883197407,500,1,selective_MLE
+0.3,0,0.623022913068,0.3,0.616177690356,Naive,25.68,0.82,1.0,0.0,100,0.137883197407,500,1,selective_MLE
+0.42,0,0.902132034632,0.0157142857143,0.635633387241,Selective MLE,7.18,5.06,0.992,0.992,100,0.0713444446047,500,1,selective_MLE
+0.42,0,0.0,0.0,0.0,Randomized LASSO,7.18,0.0,0.0,0.0,100,0.243721553208,500,1,selective_MLE
+0.42,0,0.892962359305,0.056380952381,inf,Lee,22.28,3.96,0.748,0.688,100,0.0969747510687,500,1,selective_MLE
+0.42,0,0.601893799756,0.38,0.519658907133,Naive,26.24,1.18,1.0,0.0,100,0.0969747510687,500,1,selective_MLE
+0.71,0,0.913706349206,0.00666666666667,0.458282318816,Selective MLE,6.28,5.0,0.992,0.992,100,0.0321801187824,500,1,selective_MLE
+0.71,0,0.0,0.0,0.0,Randomized LASSO,6.28,0.0,0.0,0.0,100,0.218274335294,500,1,selective_MLE
+0.71,0,0.907448196543,0.0443846153846,inf,Lee,22.62,4.5,0.872,0.832,100,0.0601112928232,500,1,selective_MLE
+0.71,0,0.645894221103,0.32,0.400115092722,Naive,26.46,0.94,1.0,0.0,100,0.0601112928232,500,1,selective_MLE
+1.22,0,0.89423981574,0.0190476190476,0.36355554238,Selective MLE,6.96,5.12,1.0,1.0,100,0.024659280186,500,1,selective_MLE
+1.22,0,0.0,0.0,0.0,Randomized LASSO,6.96,0.0,0.0,0.0,100,0.110645464006,500,1,selective_MLE
+1.22,0,0.843731225696,0.129650793651,inf,Lee,21.5,4.9,0.844,0.78,100,0.0361396721766,500,1,selective_MLE
+1.22,0,0.573358425381,0.36,0.304981895518,Naive,24.02,0.88,1.0,0.0,100,0.0361396721766,500,1,selective_MLE
+2.07,0,0.903992063492,0.00666666666667,0.267634909387,Selective MLE,6.66,5.04,1.0,1.0,100,0.00916534444897,500,1,selective_MLE
+2.07,0,0.0,0.0,0.0,Randomized LASSO,6.66,0.0,0.0,0.0,100,0.0798053674236,500,1,selective_MLE
+2.07,0,0.864089754713,0.109571428571,inf,Lee,22.6,4.92,0.88,0.808,100,0.0217887602061,500,1,selective_MLE
+2.07,0,0.63382150953,0.44,0.234850586616,Naive,25.6,0.84,1.0,0.0,100,0.0217887602061,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.35.csv b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.35.csv
new file mode 100644
index 000000000..a6ec55380
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.35.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+0.724816854623,0.838899806485,1.62965821078,0.724224013793,1.33106465713,1.19947480531,0.05,0.35,100,500,1,selective_MLE
+0.385265083675,0.623250677108,0.895897013543,0.361045191295,0.60611889663,0.617980303537,0.1,0.35,100,500,1,selective_MLE
+0.270390483342,0.542201834918,0.542516757338,0.194962371313,0.372711293725,0.375999447603,0.15,0.35,100,500,1,selective_MLE
+0.217031859955,0.446913741016,0.380461749893,0.127195036097,0.227063885605,0.222436708189,0.2,0.35,100,500,1,selective_MLE
+0.183191135704,0.369746575113,0.287851483974,0.0701930323035,0.132418997893,0.136180132365,0.25,0.35,100,500,1,selective_MLE
+0.139899752608,0.370077049834,0.229602473852,0.0696566148775,0.129604816339,0.124306493466,0.3,0.35,100,500,1,selective_MLE
+0.101985001419,0.310468898242,0.155101021839,0.0285528565579,0.0690563735948,0.067374298508,0.42,0.35,100,500,1,selective_MLE
+0.0569139003612,0.218910141131,0.0741056132107,0.0148122885092,0.0328322740991,0.0317729502039,0.71,0.35,100,500,1,selective_MLE
+0.0329382817335,0.182617145112,0.045243085294,0.00958924135652,0.0198175219444,0.0176700251849,1.22,0.35,100,500,1,selective_MLE
+0.0207267202668,0.100893025098,0.026965625387,0.00498697963158,0.0111318165399,0.0116313177681,2.07,0.35,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.7.csv b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.7.csv
new file mode 100644
index 000000000..bb1ea0979
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.7.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+0.606481746444,0.826745258299,1.33305308527,0.62271913104,0.980415841111,1.11864047232,0.05,0.7,100,500,1,selective_MLE
+0.398650296901,0.700295664431,1.02004385461,0.36712331116,0.630021857222,0.812188963578,0.1,0.7,100,500,1,selective_MLE
+0.266817960717,0.586732001573,0.716854128753,0.222798693376,0.413654992164,0.591789402777,0.15,0.7,100,500,1,selective_MLE
+0.207599545724,0.487626752228,0.492860811183,0.130128412475,0.245453395708,0.40776192466,0.2,0.7,100,500,1,selective_MLE
+0.178457205606,0.451547708341,0.41839803002,0.101150720899,0.191089891637,0.300554430254,0.25,0.7,100,500,1,selective_MLE
+0.142653661284,0.417466476111,0.29398318169,0.0763905428181,0.159325062914,0.239662294933,0.3,0.7,100,500,1,selective_MLE
+0.100564129182,0.343633849642,0.202650571086,0.0360311178731,0.0746274086812,0.135011251127,0.42,0.7,100,500,1,selective_MLE
+0.0622398248064,0.325589733329,0.0951241582053,0.0188866395806,0.0358910916596,0.0660453156033,0.71,0.7,100,500,1,selective_MLE
+0.034510480008,0.20922378322,0.0489181354491,0.012197026661,0.018067922928,0.0314691475029,1.22,0.7,100,500,1,selective_MLE
+0.0205041933808,0.115974002994,0.0320890511388,0.00618113465831,0.0109080617738,0.0178486248352,2.07,0.7,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.csv b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.csv
new file mode 100644
index 000000000..9c1ca727a
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+0.652411550711,0.820946505923,1.4070040248,0.661438798105,1.16213302331,1.02796717205,0.05,0,100,500,1,selective_MLE
+0.418810019872,0.615859220351,1.08859877204,0.396310997244,0.730827245437,0.682772681521,0.1,0,100,500,1,selective_MLE
+0.280431627709,0.5151162648,0.53810847739,0.202537367658,0.362203372763,0.325984583304,0.15,0,100,500,1,selective_MLE
+0.214846497925,0.401905491611,0.42362790596,0.11670955253,0.22108750486,0.215462021939,0.2,0,100,500,1,selective_MLE
+0.182037721298,0.421809411384,0.319733900683,0.0912351556428,0.201887706538,0.174473785317,0.25,0,100,500,1,selective_MLE
+0.150299675758,0.333848112123,0.217944505315,0.0590215304306,0.127539754074,0.118313600765,0.3,0,100,500,1,selective_MLE
+0.122385160693,0.278841228658,0.159635815479,0.0357065622719,0.0846994005377,0.0685267959665,0.42,0,100,500,1,selective_MLE
+0.064742081091,0.200842080649,0.075943258678,0.0175017280137,0.0352320848703,0.0302118943543,0.71,0,100,500,1,selective_MLE
+0.0355829221315,0.153741474347,0.055041462649,0.0120802822177,0.019930314589,0.0178112548381,1.22,0,100,500,1,selective_MLE
+0.0192982775325,0.0905511133875,0.0321402100347,0.00550207449333,0.0116545903161,0.0105093060895,2.07,0,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.35.csv b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.35.csv
new file mode 100644
index 000000000..3b4b877b0
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.35.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+0.697798250784,0.85718568517,2.21896878479,0.699571498105,1.33274011885,1.37873397223,0.05,0.35,100,500,1,selective_MLE
+0.419309318668,0.636428859402,1.1987352918,0.380829530637,0.646123024361,0.72190312741,0.1,0.35,100,500,1,selective_MLE
+0.30931592898,0.532820557278,1.02217246606,0.249026330394,0.431733783231,0.527093447425,0.15,0.35,100,500,1,selective_MLE
+0.246305559448,0.444429877595,0.673491149536,0.154679163925,0.320423659938,0.323355132192,0.2,0.35,100,500,1,selective_MLE
+0.174246008689,0.360765235691,0.467873778027,0.0760494000571,0.164079376842,0.18706333101,0.25,0.35,100,500,1,selective_MLE
+0.134503703797,0.336916782573,0.345490972051,0.0459261611936,0.0935937159224,0.11590795158,0.3,0.35,100,500,1,selective_MLE
+0.101018740148,0.256875358635,0.221607861887,0.0257195421617,0.0553450654339,0.0500593814501,0.42,0.35,100,500,1,selective_MLE
+0.0588696020544,0.177950947921,0.132963527587,0.0201241127366,0.0424956636144,0.0354428715806,0.71,0.35,100,500,1,selective_MLE
+0.0361438615056,0.131259024663,0.0838490306946,0.0122029950952,0.0242627335914,0.0196990246932,1.22,0.35,100,500,1,selective_MLE
+0.0227142973009,0.103825117154,0.039772197288,0.00664066401051,0.0118976464415,0.0111903101344,2.07,0.35,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.7.csv b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.7.csv
new file mode 100644
index 000000000..b0a461397
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.7.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+0.661064182407,0.801798637534,3.75841271437,0.66794182998,1.30489419765,1.66586374221,0.05,0.7,100,500,1,selective_MLE
+0.362204790134,0.562341962093,1.88335993038,0.335434741644,0.565653950118,0.799341484436,0.1,0.7,100,500,1,selective_MLE
+0.246989970283,0.46261866559,1.19598629058,0.192855215933,0.34910692817,0.480043897059,0.15,0.7,100,500,1,selective_MLE
+0.202248144831,0.399987898639,0.910333623448,0.119039329576,0.230405329048,0.350465323309,0.2,0.7,100,500,1,selective_MLE
+0.172239159064,0.391931305213,0.792634324635,0.107346196542,0.168426306761,0.231265018526,0.25,0.7,100,500,1,selective_MLE
+0.137834199808,0.365459757906,0.643725343517,0.0769725923295,0.148819449516,0.207613886764,0.3,0.7,100,500,1,selective_MLE
+0.101927117901,0.321212638744,0.386211423156,0.0429049071332,0.0843358069426,0.103537820619,0.42,0.7,100,500,1,selective_MLE
+0.0603137823088,0.250400422185,0.199884223847,0.0197333709389,0.0342016623851,0.0397673470199,0.71,0.7,100,500,1,selective_MLE
+0.0331405157854,0.171959642058,0.111838231528,0.0111907083798,0.0183320601807,0.0197899774304,1.22,0.7,100,500,1,selective_MLE
+0.0196362653582,0.105343758224,0.0683338359143,0.00567750470076,0.0108766113923,0.0132487406717,2.07,0.7,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.csv b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.csv
new file mode 100644
index 000000000..be23c3507
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+0.743435422189,0.850486099629,2.02596730455,0.725569100518,1.31529845576,1.32630280485,0.05,0,100,500,1,selective_MLE
+0.402338513706,0.633263657991,1.20378586671,0.350213321137,0.656021851188,0.790410383997,0.1,0,100,500,1,selective_MLE
+0.311910915364,0.500278735638,0.826297999063,0.210857868418,0.420782103491,0.441970517896,0.15,0,100,500,1,selective_MLE
+0.225929760535,0.41184090871,0.569616166985,0.125815448077,0.270196807028,0.295231118235,0.2,0,100,500,1,selective_MLE
+0.178438719613,0.329875217599,0.440095415652,0.0917532172973,0.189823026931,0.182150423954,0.25,0,100,500,1,selective_MLE
+0.137883197407,0.298821814837,0.313436366994,0.0402924350131,0.117190963254,0.111715425255,0.3,0,100,500,1,selective_MLE
+0.0969747510687,0.243721553208,0.176178413144,0.0278034606202,0.0711334925696,0.0713444446047,0.42,0,100,500,1,selective_MLE
+0.0601112928232,0.218274335294,0.113176600439,0.018583278581,0.0382532254237,0.0321801187824,0.71,0,100,500,1,selective_MLE
+0.0361396721766,0.110645464006,0.062664606523,0.0104018131365,0.0245477860903,0.024659280186,1.22,0,100,500,1,selective_MLE
+0.0217887602061,0.0798053674236,0.0332560523286,0.00578911789716,0.0131973279945,0.00916534444897,2.07,0,100,500,1,selective_MLE

From d106ee45848c4d7505e114795ba8d73966f3eca5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 13 Apr 2018 14:19:32 -0700
Subject: [PATCH 580/617] added SLOPE to current working branch

---
 selection/SLOPE/__init__.py             |   0
 selection/SLOPE/slope.py                | 300 ++++++++++++++++++++++++
 selection/SLOPE/tests/__init__.py       |   0
 selection/SLOPE/tests/slope_run_test.py | 114 +++++++++
 4 files changed, 414 insertions(+)
 create mode 100644 selection/SLOPE/__init__.py
 create mode 100644 selection/SLOPE/slope.py
 create mode 100644 selection/SLOPE/tests/__init__.py
 create mode 100644 selection/SLOPE/tests/slope_run_test.py

diff --git a/selection/SLOPE/__init__.py b/selection/SLOPE/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
new file mode 100644
index 000000000..ab56bb88a
--- /dev/null
+++ b/selection/SLOPE/slope.py
@@ -0,0 +1,300 @@
+"""
+Implementation of the SLOPE proximal operator of
+https://statweb.stanford.edu/~candes/papers/SLOPE.pdf
+"""
+from copy import copy
+import numpy as np
+import regreg.api as rr
+from scipy import sparse
+
+have_isotonic = False
+try:
+    from sklearn.isotonic import IsotonicRegression
+
+    have_isotonic = True
+except ImportError:
+    raise ValueError('unable to import isotonic regression from sklearn')
+
+
+from regreg.atoms.seminorms import seminorm
+
+from regreg.atoms import _work_out_conjugate
+from regreg.objdoctemplates import objective_doc_templater
+from regreg.doctemplates import (doc_template_user, doc_template_provider)
+
+
+@objective_doc_templater()
+class slope(seminorm):
+    """
+    The SLOPE penalty
+    """
+
+    objective_template = r"""\sum_j \lambda_j |(var)s_{(j)}|"""
+
+    def __init__(self, weights, lagrange=None, bound=None,
+                 offset=None,
+                 quadratic=None,
+                 initial=None):
+
+        weights = np.array(weights, np.float)
+        if not np.allclose(-weights, np.sort(-weights)):
+            raise ValueError('weights should be non-increasing')
+        if not np.all(weights > 0):
+            raise ValueError('weights must be positive')
+
+        self.weights = weights
+        self._dummy = np.arange(self.weights.shape[0])
+
+        seminorm.__init__(self, self.weights.shape,
+                          lagrange=lagrange,
+                          bound=bound,
+                          quadratic=quadratic,
+                          initial=initial,
+                          offset=offset)
+
+    def seminorm(self, x, lagrange=None, check_feasibility=False):
+        lagrange = seminorm.seminorm(self, x,
+                                     check_feasibility=check_feasibility,
+                                     lagrange=lagrange)
+        xsort = np.sort(np.fabs(x))[::-1]
+        return lagrange * np.fabs(xsort * self.weights).sum()
+
+    @doc_template_user
+    def constraint(self, x, bound=None):
+        bound = seminorm.constraint(self, x, bound=bound)
+        inbox = self.seminorm(x, lagrange=1,
+                              check_feasibility=True) <= bound * (1 + self.tol)
+        if inbox:
+            return 0
+        else:
+            return np.inf
+
+    @doc_template_user
+    def lagrange_prox(self, x, lipschitz=1, lagrange=None):
+        lagrange = seminorm.lagrange_prox(self, x, lipschitz, lagrange)
+        return _basic_proximal_map(x, self.weights * lagrange / lipschitz)
+
+    @doc_template_user
+    def bound_prox(self, x, bound=None):
+        raise NotImplementedError
+
+    def __copy__(self):
+        return self.__class__(self.weights.copy(),
+                              quadratic=self.quadratic,
+                              initial=self.coefs,
+                              bound=copy(self.bound),
+                              lagrange=copy(self.lagrange),
+                              offset=copy(self.offset))
+
+    def __repr__(self):
+        if self.lagrange is not None:
+            if not self.quadratic.iszero:
+                return "%s(%s, lagrange=%f, offset=%s)" % \
+                       (self.__class__.__name__,
+                        str(self.weights),
+                        self.lagrange,
+                        str(self.offset))
+            else:
+                return "%s(%s, lagrange=%f, offset=%s, quadratic=%s)" % \
+                       (self.__class__.__name__,
+                        str(self.weights),
+                        self.lagrange,
+                        str(self.offset),
+                        self.quadratic)
+        else:
+            if not self.quadratic.iszero:
+                return "%s(%s, bound=%f, offset=%s)" % \
+                       (self.__class__.__name__,
+                        str(self.weights),
+                        self.bound,
+                        str(self.offset))
+            else:
+                return "%s(%s, bound=%f, offset=%s, quadratic=%s)" % \
+                       (self.__class__.__name__,
+                        str(self.weights),
+                        self.bound,
+                        str(self.offset),
+                        self.quadratic)
+
+    def get_conjugate(self):
+        if self.quadratic.coef == 0:
+
+            offset, outq = _work_out_conjugate(self.offset, self.quadratic)
+
+            if self.bound is None:
+                cls = conjugate_slope_pairs[self.__class__]
+                atom = cls(self.weights,
+                           bound=self.lagrange,
+                           lagrange=None,
+                           offset=offset,
+                           quadratic=outq)
+            else:
+                cls = conjugate_slope_pairs[self.__class__]
+                atom = cls(self.weights,
+                           lagrange=self.bound,
+                           bound=None,
+                           offset=offset,
+                           quadratic=outq)
+        else:
+            atom = smooth_conjugate(self)
+
+        self._conjugate = atom
+        self._conjugate._conjugate = self
+        return self._conjugate
+
+    conjugate = property(get_conjugate)
+
+
+@objective_doc_templater()
+class slope_conjugate(slope):
+    r"""
+    The dual of the slope penalty:math:`\ell_{\infty}` norm
+    """
+
+    objective_template = r"""P^*(%(var)s)"""
+
+    @doc_template_user
+    def seminorm(self, x, lagrange=None, check_feasibility=False):
+        lagrange = seminorm.seminorm(self, x,
+                                     check_feasibility=check_feasibility,
+                                     lagrange=lagrange)
+        xsort = np.sort(np.fabs(x))[::-1]
+        return lagrange * np.fabs(xsort / self.weights).max()
+
+    @doc_template_user
+    def constraint(self, x, bound=None):
+        bound = seminorm.constraint(self, x, bound=bound)
+        inbox = self.seminorm(x, lagrange=1,
+                              check_feasibility=True) <= bound * (1 + self.tol)
+        if inbox:
+            return 0
+        else:
+            return np.inf
+
+    @doc_template_user
+    def lagrange_prox(self, x, lipschitz=1, lagrange=None):
+        raise NotImplementedError
+
+    @doc_template_user
+    def bound_prox(self, x, bound=None):
+        bound = seminorm.bound_prox(self, x, bound)
+
+        # the proximal map is evaluated
+        # by working out the SLOPE proximal
+        # map and computing the residual
+
+        # might be better to just find the correct cython function instead
+        # of always constructing IsotonicRegression
+
+        _slope_prox = _basic_proximal_map(x, self.weights * bound)
+        return x - _slope_prox
+
+
+def _basic_proximal_map(center, weights):
+    """
+    Proximal algorithm described (2.3) of SLOPE
+    though sklearn isotonic has ordering reversed.
+    """
+
+    # the proximal map sorts the absolute values,
+    # runs isotonic regression with an offset
+    # reassigns the signs
+
+    # might be better to just find the correct cython function instead
+    # of always constructing IsotonicRegression
+
+    ir = IsotonicRegression()
+
+    _dummy = np.arange(center.shape[0])
+    _arg = np.argsort(np.fabs(center))
+    shifted_center = np.fabs(center)[_arg] - weights[::-1]
+    _prox_val = np.clip(ir.fit_transform(_dummy, shifted_center), 0, np.inf)
+    _return_val = np.zeros_like(_prox_val)
+    _return_val[_arg] = _prox_val
+    _return_val *= np.sign(center)
+    return _return_val
+
+
+def _projection_onto_selected_subgradients(prox_arg,
+                                           weights,
+                                           ordering,
+                                           cluster_sizes,
+                                           active_signs,
+                                           last_value_zero=True):
+    """
+    Compute the projection of a point onto the set of
+    subgradients of the SLOPE penalty with a given
+    clustering of the solution and signs of the variables.
+    This is a projection onto a lower dimensional set. The dimension
+    of this set is p -- the dimensions of the `prox_arg` minus
+    the number of unique values in `ordered_clustering` + 1 if the
+    last value of the solution was zero (i.e. solution was sparse).
+    Parameters
+    ----------
+    prox_arg : np.ndarray(p, np.float)
+        Point to project
+    weights : np.ndarray(p, np.float)
+        Weights of the SLOPE penalty.
+    ordering : np.ndarray(p, np.int)
+        Order of original argument to SLOPE prox.
+        First entry corresponds to largest argument of SLOPE prox.
+    cluster_sizes : sequence
+        Sizes of clusters, starting with
+        largest in absolute value.
+    active_signs : np.ndarray(p, np.int)
+         Signs of non-zero coefficients.
+    last_value_zero : bool
+        Is the last solution value equal to 0?
+    """
+
+    result = np.zeros_like(prox_arg)
+
+    ordered_clustering = []
+    cur_idx = 0
+    for cluster_size in cluster_sizes:
+        ordered_clustering.append([ordering[j + cur_idx] for j in range(cluster_size)])
+        cur_idx += cluster_size
+
+    # Now, run appropriate SLOPE prox on each cluster
+    cur_idx = 0
+    for i, cluster in enumerate(ordered_clustering):
+        prox_subarg = np.array([prox_arg[j] for j in cluster])
+
+        # If the value of the soln to the prox was non-zero
+        # then we solve a SLOPE of size 1 smaller than the cluster
+
+        # If the cluster size is 1, the value is just
+        # the corresponding signed weight
+
+        if i < len(ordered_clustering) - 1 or not last_value_zero:
+            if len(cluster) == 1:
+                result[cluster[0]] = weights[cur_idx] * active_signs[cluster[0]]
+            else:
+                indices = [j + cur_idx for j in range(len(cluster))]
+                cluster_weights = weights[indices]
+
+                ir = IsotonicRegression()
+                _ir_result = ir.fit_transform(np.arange(len(cluster)), cluster_weights[::-1])[::-1]
+                result[indices] = -np.multiply(active_signs[indices], _ir_result/2.)
+
+        else:
+            indices = np.array([j + cur_idx for j in range(len(cluster))])
+            cluster_weights = weights[indices]
+
+            pen = slope(cluster_weights, lagrange=1.)
+            loss = rr.squared_error(np.identity(len(cluster)), prox_subarg)
+            slope_problem = rr.simple_problem(loss, pen)
+            result[indices] = prox_subarg - slope_problem.solve()
+
+        cur_idx += len(cluster)
+
+    return result
+
+"""
+For a cluster of size bigger than 1, we solve
+"""
+
+conjugate_slope_pairs = {}
+for n1, n2 in [(slope, slope_conjugate)]:
+    conjugate_slope_pairs[n1] = n2
+    conjugate_slope_pairs[n2] = n1
\ No newline at end of file
diff --git a/selection/SLOPE/tests/__init__.py b/selection/SLOPE/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
new file mode 100644
index 000000000..2673220df
--- /dev/null
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -0,0 +1,114 @@
+from rpy2.robjects.packages import importr
+from rpy2 import robjects
+
+SLOPE = importr('SLOPE')
+
+import rpy2.robjects.numpy2ri
+rpy2.robjects.numpy2ri.activate()
+
+import numpy as np
+import sys
+
+from regreg.atoms.slope import slope
+
+import regreg.api as rr
+
+
+def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian"):
+    robjects.r('''
+    slope = function(X, Y, W=NA, normalize, choice_weights, fdr = NA, sigma = 1){
+      if(is.na(sigma)){
+      sigma = NULL}
+      if(is.na(fdr)){
+      fdr = 0.1 }
+      if(normalize=="TRUE"){
+       normalize = TRUE} else{
+       normalize = FALSE}
+      if(is.na(W))
+      {
+        if(choice_weights == "gaussian"){
+        lambda = "gaussian"} else{
+        lambda = "bhq"}
+        result = SLOPE(X, Y, fdr = fdr, lambda = lambda, sigma = sigma, normalize = normalize)
+       } else{
+        result = SLOPE(X, Y, fdr = fdr, lambda = W, sigma = sigma, normalize = normalize)
+      }
+      return(list(beta = result$beta, E = result$selected, lambda_seq = result$lambda, sigma = result$sigma))
+    }''')
+
+    r_slope = robjects.globalenv['slope']
+
+    n, p = X.shape
+    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
+    r_Y = robjects.r.matrix(Y, nrow=n, ncol=1)
+
+    if normalize is True:
+        r_normalize = robjects.StrVector('True')
+    else:
+        r_normalize = robjects.StrVector('False')
+
+    if W is None:
+        r_W = robjects.NA_Logical
+        if choice_weights is "gaussian":
+            r_choice_weights  = robjects.StrVector('gaussian')
+        elif choice_weights is "bhq":
+            r_choice_weights = robjects.StrVector('bhq')
+
+    else:
+        r_W = robjects.r.matrix(W, nrow=p, ncol=1)
+
+    result = r_slope(r_X, r_Y, r_W, r_normalize, r_choice_weights)
+
+    return result[0], result[1], result[2], result[3]
+
+def compare_outputs_prechosen_weights():
+
+    n, p = 500, 50
+
+    X = np.random.standard_normal((n, p))
+    Y = np.random.standard_normal(n)
+    W = np.linspace(3, 3.5, p)[::-1]
+
+    output_R = test_slope_R(X, Y, W)
+    r_beta = output_R[0]
+    print("output of est coefs R", r_beta)
+
+    pen = slope(W, lagrange=1.)
+    loss = rr.squared_error(X, Y)
+    problem = rr.simple_problem(loss, pen)
+    soln = problem.solve()
+    print("output of est coefs python", soln)
+
+    print("difference in solns", soln-r_beta)
+
+#compare_outputs_prechosen_weights()
+
+def compare_outputs_SLOPE_weights():
+
+    n, p = 500, 50
+
+    X = np.random.standard_normal((n, p))
+    #Y = np.random.standard_normal(n)
+    X -= X.mean(0)[None, :]
+    X /= (X.std(0)[None, :] * np.sqrt(n))
+    beta = np.zeros(p)
+    beta[:5] = 5.
+
+    Y = X.dot(beta) + np.random.standard_normal(n)
+
+    output_R = test_slope_R(X, Y, W = None, normalize = True, choice_weights = "bhq")
+    r_beta = output_R[0]
+    r_lambda_seq = output_R[2]
+    print("output of est coefs R", r_beta)
+
+    W = r_lambda_seq
+    pen = slope(W, lagrange=1.)
+
+    loss = rr.squared_error(X, Y)
+    problem = rr.simple_problem(loss, pen)
+    soln = problem.solve()
+    print("output of est coefs python", soln)
+
+    print("difference in solns", soln-r_beta)
+
+compare_outputs_SLOPE_weights()
\ No newline at end of file

From 21814f7d5f1d69835e9b6b4117f3d4229826589e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 13 Apr 2018 16:13:23 -0700
Subject: [PATCH 581/617] test for SLOPE

---
 selection/SLOPE/tests/slope_run_test.py       | 90 ++++++++++++-------
 .../adjusted_MLE/tests/test_risk_coverage.py  |  8 +-
 2 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index 2673220df..5832cff21 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -6,19 +6,15 @@
 import rpy2.robjects.numpy2ri
 rpy2.robjects.numpy2ri.activate()
 
-import numpy as np
-import sys
-
-from regreg.atoms.slope import slope
+from selection.tests.instance import gaussian_instance
 
+import numpy as np
+from selection.SLOPE.slope import slope
 import regreg.api as rr
 
-
 def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian"):
     robjects.r('''
-    slope = function(X, Y, W=NA, normalize, choice_weights, fdr = NA, sigma = 1){
-      if(is.na(sigma)){
-      sigma = NULL}
+    slope = function(X, Y, W=NA, normalize, choice_weights, fdr = NA){
       if(is.na(fdr)){
       fdr = 0.1 }
       if(normalize=="TRUE"){
@@ -29,10 +25,11 @@ def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian"):
         if(choice_weights == "gaussian"){
         lambda = "gaussian"} else{
         lambda = "bhq"}
-        result = SLOPE(X, Y, fdr = fdr, lambda = lambda, sigma = sigma, normalize = normalize)
+        result = SLOPE(X, Y, fdr = fdr, lambda = lambda, normalize = normalize)
        } else{
-        result = SLOPE(X, Y, fdr = fdr, lambda = W, sigma = sigma, normalize = normalize)
+        result = SLOPE(X, Y, fdr = fdr, lambda = W, normalize = normalize)
       }
+      print(paste("estimated sigma", class(result$sigma)))
       return(list(beta = result$beta, E = result$selected, lambda_seq = result$lambda, sigma = result$sigma))
     }''')
 
@@ -59,7 +56,8 @@ def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian"):
 
     result = r_slope(r_X, r_Y, r_W, r_normalize, r_choice_weights)
 
-    return result[0], result[1], result[2], result[3]
+    return np.asarray(result.rx2('beta')), np.asarray(result.rx2('E')), \
+           np.asarray(result.rx2('lambda_seq')), np.asscalar(np.array(result.rx2('sigma')))
 
 def compare_outputs_prechosen_weights():
 
@@ -70,8 +68,9 @@ def compare_outputs_prechosen_weights():
     W = np.linspace(3, 3.5, p)[::-1]
 
     output_R = test_slope_R(X, Y, W)
-    r_beta = output_R[0]
-    print("output of est coefs R", r_beta)
+    print("output R", output_R)
+    beta_R = output_R[0]
+    print("output of est coefs R", beta_R)
 
     pen = slope(W, lagrange=1.)
     loss = rr.squared_error(X, Y)
@@ -79,36 +78,61 @@ def compare_outputs_prechosen_weights():
     soln = problem.solve()
     print("output of est coefs python", soln)
 
-    print("difference in solns", soln-r_beta)
+    print("relative difference in solns", np.linalg.norm(soln-beta_R)/np.linalg.norm(beta_R))
 
 #compare_outputs_prechosen_weights()
 
-def compare_outputs_SLOPE_weights():
-
-    n, p = 500, 50
-
-    X = np.random.standard_normal((n, p))
-    #Y = np.random.standard_normal(n)
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n))
-    beta = np.zeros(p)
-    beta[:5] = 5.
-
-    Y = X.dot(beta) + np.random.standard_normal(n)
-
-    output_R = test_slope_R(X, Y, W = None, normalize = True, choice_weights = "bhq")
-    r_beta = output_R[0]
-    r_lambda_seq = output_R[2]
+# def compare_outputs_SLOPE_weights():
+#
+#     n, p = 500, 50
+#
+#     X = np.random.standard_normal((n, p))
+#     X -= X.mean(0)[None, :]
+#     X /= (X.std(0)[None, :] * np.sqrt(n))
+#     beta = np.zeros(p)
+#     beta[:5] = 5.
+#
+#     Y = X.dot(beta) + np.random.standard_normal(n)
+#
+#     output_R = test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian")
+#     r_beta = output_R[0]
+#     r_lambda_seq = output_R[2]
+#     print("output of est coefs R", r_beta)
+#
+#     W = r_lambda_seq
+#     pen = slope(W, lagrange=1.)
+#
+#     loss = rr.squared_error(X, Y)
+#     problem = rr.simple_problem(loss, pen)
+#     soln = problem.solve()
+#     print("output of est coefs python", soln)
+#
+#     print("relative difference in solns", np.linalg.norm(soln-r_beta)/np.linalg.norm(r_beta))
+
+def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1.1, s=5, sigma=3., rho=0.):
+
+    inst = gaussian_instance
+    signal = np.sqrt(signal_fac * 2. * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian")
+    print("estimated sigma", r_sigma)
     print("output of est coefs R", r_beta)
 
-    W = r_lambda_seq
-    pen = slope(W, lagrange=1.)
+    pen = slope(r_sigma* r_lambda_seq, lagrange=1.)
 
     loss = rr.squared_error(X, Y)
     problem = rr.simple_problem(loss, pen)
     soln = problem.solve()
     print("output of est coefs python", soln)
 
-    print("difference in solns", soln-r_beta)
+    print("relative difference in solns", np.linalg.norm(soln-r_beta)/np.linalg.norm(r_beta))
 
 compare_outputs_SLOPE_weights()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index baac01118..ed2b84c5d 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -134,10 +134,10 @@ def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selec
         df_master = df_master.append(metrics_unad, ignore_index=True)
         df_risk = df_risk.append(metrics, ignore_index=True)
 
-    outfile_metrics = os.path.join(outpath, "metrics_beta_type"+ str(beta_type)+"_"+target+".csv")
-    outfile_risk = os.path.join(outpath, "risk_beta_type" + str(beta_type) + "_" + target + ".csv")
+    outfile_metrics = os.path.join(outpath, "metrics_high_beta_type"+ str(beta_type) + "_"+ target + "_rho_"+ str(rho) +".csv")
+    outfile_risk = os.path.join(outpath, "risk_high_beta_type" + str(beta_type) + "_" + target +"_rho_"+ str(rho) + ".csv")
     df_master.to_csv(outfile_metrics, index=False)
     df_risk.to_csv(outfile_risk, index=False)
 
-write_ouput("/Users/snigdhapanigrahi/adjusted_MLE/results", n=500, p=100, rho=0.35, s=5, beta_type=1,
-            target="selected", tuning = "selective_MLE", randomizing_scale= np.sqrt(0.25), ndraw = 50)
+write_ouput("/Users/snigdhapanigrahi/adjusted_MLE/results", n=200, p=1000, rho=0, s=10, beta_type=1,
+            target="full", tuning = "randomized_LASSO", randomizing_scale= np.sqrt(0.25), ndraw = 50)

From 7ed837d185555b616c5a110eda9375d52c778073 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 13 Apr 2018 16:33:48 -0700
Subject: [PATCH 582/617] cleaned up test for SLOPE

---
 selection/SLOPE/tests/slope_run_test.py | 82 +++++++------------------
 1 file changed, 21 insertions(+), 61 deletions(-)

diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index 5832cff21..c66c9d334 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -12,9 +12,12 @@
 from selection.SLOPE.slope import slope
 import regreg.api as rr
 
-def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian"):
+def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma = None):
     robjects.r('''
-    slope = function(X, Y, W=NA, normalize, choice_weights, fdr = NA){
+    slope = function(X, Y, W , normalize, choice_weights, sigma, fdr = NA){
+      if(is.na(sigma)){
+      sigma=NULL} else{
+      sigma = as.matrix(sigma)[1,1]}
       if(is.na(fdr)){
       fdr = 0.1 }
       if(normalize=="TRUE"){
@@ -25,11 +28,10 @@ def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian"):
         if(choice_weights == "gaussian"){
         lambda = "gaussian"} else{
         lambda = "bhq"}
-        result = SLOPE(X, Y, fdr = fdr, lambda = lambda, normalize = normalize)
+        result = SLOPE(X, Y, fdr = fdr, lambda = lambda, normalize = normalize, sigma = sigma)
        } else{
-        result = SLOPE(X, Y, fdr = fdr, lambda = W, normalize = normalize)
+        result = SLOPE(X, Y, fdr = fdr, lambda = W, normalize = normalize, sigma = sigma)
       }
-      print(paste("estimated sigma", class(result$sigma)))
       return(list(beta = result$beta, E = result$selected, lambda_seq = result$lambda, sigma = result$sigma))
     }''')
 
@@ -50,66 +52,20 @@ def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian"):
             r_choice_weights  = robjects.StrVector('gaussian')
         elif choice_weights is "bhq":
             r_choice_weights = robjects.StrVector('bhq')
-
     else:
         r_W = robjects.r.matrix(W, nrow=p, ncol=1)
 
-    result = r_slope(r_X, r_Y, r_W, r_normalize, r_choice_weights)
+    if sigma is None:
+        r_sigma = robjects.NA_Logical
+    else:
+        r_sigma = robjects.r.matrix(sigma, nrow=1, ncol=1)
+
+    result = r_slope(r_X, r_Y, r_W, r_normalize, r_choice_weights, r_sigma)
 
     return np.asarray(result.rx2('beta')), np.asarray(result.rx2('E')), \
            np.asarray(result.rx2('lambda_seq')), np.asscalar(np.array(result.rx2('sigma')))
 
-def compare_outputs_prechosen_weights():
-
-    n, p = 500, 50
-
-    X = np.random.standard_normal((n, p))
-    Y = np.random.standard_normal(n)
-    W = np.linspace(3, 3.5, p)[::-1]
-
-    output_R = test_slope_R(X, Y, W)
-    print("output R", output_R)
-    beta_R = output_R[0]
-    print("output of est coefs R", beta_R)
-
-    pen = slope(W, lagrange=1.)
-    loss = rr.squared_error(X, Y)
-    problem = rr.simple_problem(loss, pen)
-    soln = problem.solve()
-    print("output of est coefs python", soln)
-
-    print("relative difference in solns", np.linalg.norm(soln-beta_R)/np.linalg.norm(beta_R))
-
-#compare_outputs_prechosen_weights()
-
-# def compare_outputs_SLOPE_weights():
-#
-#     n, p = 500, 50
-#
-#     X = np.random.standard_normal((n, p))
-#     X -= X.mean(0)[None, :]
-#     X /= (X.std(0)[None, :] * np.sqrt(n))
-#     beta = np.zeros(p)
-#     beta[:5] = 5.
-#
-#     Y = X.dot(beta) + np.random.standard_normal(n)
-#
-#     output_R = test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian")
-#     r_beta = output_R[0]
-#     r_lambda_seq = output_R[2]
-#     print("output of est coefs R", r_beta)
-#
-#     W = r_lambda_seq
-#     pen = slope(W, lagrange=1.)
-#
-#     loss = rr.squared_error(X, Y)
-#     problem = rr.simple_problem(loss, pen)
-#     soln = problem.solve()
-#     print("output of est coefs python", soln)
-#
-#     print("relative difference in solns", np.linalg.norm(soln-r_beta)/np.linalg.norm(r_beta))
-
-def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1.1, s=5, sigma=3., rho=0.):
+def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.):
 
     inst = gaussian_instance
     signal = np.sqrt(signal_fac * 2. * np.log(p))
@@ -122,11 +78,15 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1.1, s=5, sigma=3., r
                       sigma=sigma,
                       random_signs=True)[:3]
 
-    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian")
-    print("estimated sigma", r_sigma)
+    sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
+    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X, Y, W = None,
+                                                      normalize = True,
+                                                      choice_weights = "gaussian",
+                                                      sigma = sigma_)
+    print("estimated sigma", sigma_, r_sigma)
     print("output of est coefs R", r_beta)
 
-    pen = slope(r_sigma* r_lambda_seq, lagrange=1.)
+    pen = slope(r_sigma * r_lambda_seq, lagrange=1.)
 
     loss = rr.squared_error(X, Y)
     problem = rr.simple_problem(loss, pen)

From 8f4473c6eeaa2e5a17207dedead117b30d255b3e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 14 Apr 2018 22:48:31 -0700
Subject: [PATCH 583/617] check soln of randomized SLOPE

---
 selection/SLOPE/tests/slope_run_test.py | 63 +++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 3 deletions(-)

diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index c66c9d334..0f14cba74 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -65,7 +65,7 @@ def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian",
     return np.asarray(result.rx2('beta')), np.asarray(result.rx2('E')), \
            np.asarray(result.rx2('lambda_seq')), np.asscalar(np.array(result.rx2('sigma')))
 
-def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.):
+def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35):
 
     inst = gaussian_instance
     signal = np.sqrt(signal_fac * 2. * np.log(p))
@@ -79,11 +79,14 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rh
                       random_signs=True)[:3]
 
     sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
-    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X, Y, W = None,
+    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X,
+                                                      Y,
+                                                      W = None,
                                                       normalize = True,
                                                       choice_weights = "gaussian",
                                                       sigma = sigma_)
     print("estimated sigma", sigma_, r_sigma)
+    print("weights output by R", r_lambda_seq)
     print("output of est coefs R", r_beta)
 
     pen = slope(r_sigma * r_lambda_seq, lagrange=1.)
@@ -95,4 +98,58 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rh
 
     print("relative difference in solns", np.linalg.norm(soln-r_beta)/np.linalg.norm(r_beta))
 
-compare_outputs_SLOPE_weights()
\ No newline at end of file
+#compare_outputs_SLOPE_weights()
+
+def randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
+                     randomizer_scale= np.sqrt(0.25),
+                     solve_args={'tol':1.e-12, 'min_its':50}):
+
+    inst = gaussian_instance
+    signal = np.sqrt(signal_fac * 2. * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
+    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X,
+                                                      Y,
+                                                      W=None,
+                                                      normalize=True,
+                                                      choice_weights="gaussian",
+                                                      sigma=sigma_)
+
+    pen = slope(r_sigma * r_lambda_seq, lagrange=1.)
+
+    loglike = rr.glm.gaussian(X, Y, coef=1., quadratic=None)
+    _initial_omega = randomizer_scale * sigma_* np.random.standard_normal(p)
+    quad = rr.identity_quadratic(0, 0, -_initial_omega, 0)
+    problem = rr.simple_problem(loglike, pen)
+    initial_soln = problem.solve(quad, **solve_args)
+
+    print("initial_soln", initial_soln)
+
+    initial_subgrad = -(loglike.smooth_objective(initial_soln, 'grad') + quad.objective(initial_soln, 'grad'))
+    #print("weights returned by R", r_lambda_seq)
+    print("initial subgrad", np.abs(initial_subgrad))
+
+    indices = np.argsort(-np.abs(initial_soln))
+    print("sorted soln", initial_soln[indices], np.abs(initial_subgrad[indices]))
+    sorted_soln = initial_soln[indices]
+
+    cur_indx_array = []
+    cur_indx_array .append(0)
+    cur_indx = 0
+    for j in range(p-1):
+        if np.abs(sorted_soln[j+1]) != np.abs(sorted_soln[cur_indx]):
+            cur_indx_array.append(j+1)
+            cur_indx = j+1
+            if sorted_soln[j+1]== 0:
+                break
+
+    print("start indices of clusters", cur_indx_array)
+randomized_slope()
\ No newline at end of file

From 0cfecd5c0eb86b8a38db1e4b7f87c89c394a1523 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sat, 14 Apr 2018 23:37:03 -0700
Subject: [PATCH 584/617] detect clusters and rearange subgradient in
 decreasing order

---
 selection/SLOPE/tests/slope_run_test.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index 0f14cba74..0d63a9164 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -140,16 +140,26 @@ def randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
     indices = np.argsort(-np.abs(initial_soln))
     print("sorted soln", initial_soln[indices], np.abs(initial_subgrad[indices]))
     sorted_soln = initial_soln[indices]
+    sorted_subgrad = initial_subgrad[indices]
 
     cur_indx_array = []
     cur_indx_array .append(0)
     cur_indx = 0
+    pointer = 0
+    subgrad_cluster_indices = np.zeros(p, np.int)
     for j in range(p-1):
         if np.abs(sorted_soln[j+1]) != np.abs(sorted_soln[cur_indx]):
             cur_indx_array.append(j+1)
             cur_indx = j+1
+            subgrad_cluster_indices[cur_indx_array[pointer]:(j+1)] = (np.argsort(-np.abs(sorted_subgrad
+                                                                                         [cur_indx_array[pointer]:(j+1)]))
+                                                                      + cur_indx_array[pointer])
+            pointer = pointer + 1
             if sorted_soln[j+1]== 0:
+                subgrad_cluster_indices[(j+1):] = (np.argsort(-np.abs(sorted_subgrad[j+1:]))+(j+1))
                 break
 
     print("start indices of clusters", cur_indx_array)
+    print("sorted indices of inactive cluster", subgrad_cluster_indices,
+          np.abs(sorted_subgrad[subgrad_cluster_indices]))
 randomized_slope()
\ No newline at end of file

From cfb29ead1b5239c11579c84d34546f62f5ecf0e9 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sun, 15 Apr 2018 20:00:45 -0700
Subject: [PATCH 585/617] created a temporary class for SLOPE for now

---
 selection/SLOPE/slope.py                      | 569 +++++++++---------
 selection/SLOPE/tests/slope_run_test.py       |  68 ++-
 .../tests/test_inferential_metrics.py         |   4 +-
 3 files changed, 330 insertions(+), 311 deletions(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index ab56bb88a..4a52629c3 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -1,300 +1,289 @@
-"""
-Implementation of the SLOPE proximal operator of
-https://statweb.stanford.edu/~candes/papers/SLOPE.pdf
-"""
-from copy import copy
+from __future__ import print_function
+import functools
 import numpy as np
+from regreg.atoms.slope import slope
+from selection.randomized.randomization import randomization
 import regreg.api as rr
-from scipy import sparse
-
-have_isotonic = False
-try:
-    from sklearn.isotonic import IsotonicRegression
-
-    have_isotonic = True
-except ImportError:
-    raise ValueError('unable to import isotonic regression from sklearn')
-
-
-from regreg.atoms.seminorms import seminorm
-
-from regreg.atoms import _work_out_conjugate
-from regreg.objdoctemplates import objective_doc_templater
-from regreg.doctemplates import (doc_template_user, doc_template_provider)
-
-
-@objective_doc_templater()
-class slope(seminorm):
-    """
-    The SLOPE penalty
-    """
-
-    objective_template = r"""\sum_j \lambda_j |(var)s_{(j)}|"""
-
-    def __init__(self, weights, lagrange=None, bound=None,
-                 offset=None,
-                 quadratic=None,
-                 initial=None):
-
-        weights = np.array(weights, np.float)
-        if not np.allclose(-weights, np.sort(-weights)):
-            raise ValueError('weights should be non-increasing')
-        if not np.all(weights > 0):
-            raise ValueError('weights must be positive')
-
-        self.weights = weights
-        self._dummy = np.arange(self.weights.shape[0])
-
-        seminorm.__init__(self, self.weights.shape,
-                          lagrange=lagrange,
-                          bound=bound,
-                          quadratic=quadratic,
-                          initial=initial,
-                          offset=offset)
-
-    def seminorm(self, x, lagrange=None, check_feasibility=False):
-        lagrange = seminorm.seminorm(self, x,
-                                     check_feasibility=check_feasibility,
-                                     lagrange=lagrange)
-        xsort = np.sort(np.fabs(x))[::-1]
-        return lagrange * np.fabs(xsort * self.weights).sum()
-
-    @doc_template_user
-    def constraint(self, x, bound=None):
-        bound = seminorm.constraint(self, x, bound=bound)
-        inbox = self.seminorm(x, lagrange=1,
-                              check_feasibility=True) <= bound * (1 + self.tol)
-        if inbox:
-            return 0
-        else:
-            return np.inf
-
-    @doc_template_user
-    def lagrange_prox(self, x, lipschitz=1, lagrange=None):
-        lagrange = seminorm.lagrange_prox(self, x, lipschitz, lagrange)
-        return _basic_proximal_map(x, self.weights * lagrange / lipschitz)
-
-    @doc_template_user
-    def bound_prox(self, x, bound=None):
-        raise NotImplementedError
-
-    def __copy__(self):
-        return self.__class__(self.weights.copy(),
-                              quadratic=self.quadratic,
-                              initial=self.coefs,
-                              bound=copy(self.bound),
-                              lagrange=copy(self.lagrange),
-                              offset=copy(self.offset))
-
-    def __repr__(self):
-        if self.lagrange is not None:
-            if not self.quadratic.iszero:
-                return "%s(%s, lagrange=%f, offset=%s)" % \
-                       (self.__class__.__name__,
-                        str(self.weights),
-                        self.lagrange,
-                        str(self.offset))
-            else:
-                return "%s(%s, lagrange=%f, offset=%s, quadratic=%s)" % \
-                       (self.__class__.__name__,
-                        str(self.weights),
-                        self.lagrange,
-                        str(self.offset),
-                        self.quadratic)
-        else:
-            if not self.quadratic.iszero:
-                return "%s(%s, bound=%f, offset=%s)" % \
-                       (self.__class__.__name__,
-                        str(self.weights),
-                        self.bound,
-                        str(self.offset))
-            else:
-                return "%s(%s, bound=%f, offset=%s, quadratic=%s)" % \
-                       (self.__class__.__name__,
-                        str(self.weights),
-                        self.bound,
-                        str(self.offset),
-                        self.quadratic)
-
-    def get_conjugate(self):
-        if self.quadratic.coef == 0:
-
-            offset, outq = _work_out_conjugate(self.offset, self.quadratic)
-
-            if self.bound is None:
-                cls = conjugate_slope_pairs[self.__class__]
-                atom = cls(self.weights,
-                           bound=self.lagrange,
-                           lagrange=None,
-                           offset=offset,
-                           quadratic=outq)
-            else:
-                cls = conjugate_slope_pairs[self.__class__]
-                atom = cls(self.weights,
-                           lagrange=self.bound,
-                           bound=None,
-                           offset=offset,
-                           quadratic=outq)
-        else:
-            atom = smooth_conjugate(self)
-
-        self._conjugate = atom
-        self._conjugate._conjugate = self
-        return self._conjugate
-
-    conjugate = property(get_conjugate)
-
-
-@objective_doc_templater()
-class slope_conjugate(slope):
-    r"""
-    The dual of the slope penalty:math:`\ell_{\infty}` norm
-    """
-
-    objective_template = r"""P^*(%(var)s)"""
-
-    @doc_template_user
-    def seminorm(self, x, lagrange=None, check_feasibility=False):
-        lagrange = seminorm.seminorm(self, x,
-                                     check_feasibility=check_feasibility,
-                                     lagrange=lagrange)
-        xsort = np.sort(np.fabs(x))[::-1]
-        return lagrange * np.fabs(xsort / self.weights).max()
-
-    @doc_template_user
-    def constraint(self, x, bound=None):
-        bound = seminorm.constraint(self, x, bound=bound)
-        inbox = self.seminorm(x, lagrange=1,
-                              check_feasibility=True) <= bound * (1 + self.tol)
-        if inbox:
-            return 0
-        else:
-            return np.inf
-
-    @doc_template_user
-    def lagrange_prox(self, x, lipschitz=1, lagrange=None):
-        raise NotImplementedError
-
-    @doc_template_user
-    def bound_prox(self, x, bound=None):
-        bound = seminorm.bound_prox(self, x, bound)
-
-        # the proximal map is evaluated
-        # by working out the SLOPE proximal
-        # map and computing the residual
-
-        # might be better to just find the correct cython function instead
-        # of always constructing IsotonicRegression
-
-        _slope_prox = _basic_proximal_map(x, self.weights * bound)
-        return x - _slope_prox
-
-
-def _basic_proximal_map(center, weights):
-    """
-    Proximal algorithm described (2.3) of SLOPE
-    though sklearn isotonic has ordering reversed.
-    """
-
-    # the proximal map sorts the absolute values,
-    # runs isotonic regression with an offset
-    # reassigns the signs
-
-    # might be better to just find the correct cython function instead
-    # of always constructing IsotonicRegression
-
-    ir = IsotonicRegression()
-
-    _dummy = np.arange(center.shape[0])
-    _arg = np.argsort(np.fabs(center))
-    shifted_center = np.fabs(center)[_arg] - weights[::-1]
-    _prox_val = np.clip(ir.fit_transform(_dummy, shifted_center), 0, np.inf)
-    _return_val = np.zeros_like(_prox_val)
-    _return_val[_arg] = _prox_val
-    _return_val *= np.sign(center)
-    return _return_val
-
-
-def _projection_onto_selected_subgradients(prox_arg,
-                                           weights,
-                                           ordering,
-                                           cluster_sizes,
-                                           active_signs,
-                                           last_value_zero=True):
-    """
-    Compute the projection of a point onto the set of
-    subgradients of the SLOPE penalty with a given
-    clustering of the solution and signs of the variables.
-    This is a projection onto a lower dimensional set. The dimension
-    of this set is p -- the dimensions of the `prox_arg` minus
-    the number of unique values in `ordered_clustering` + 1 if the
-    last value of the solution was zero (i.e. solution was sparse).
-    Parameters
-    ----------
-    prox_arg : np.ndarray(p, np.float)
-        Point to project
-    weights : np.ndarray(p, np.float)
-        Weights of the SLOPE penalty.
-    ordering : np.ndarray(p, np.int)
-        Order of original argument to SLOPE prox.
-        First entry corresponds to largest argument of SLOPE prox.
-    cluster_sizes : sequence
-        Sizes of clusters, starting with
-        largest in absolute value.
-    active_signs : np.ndarray(p, np.int)
-         Signs of non-zero coefficients.
-    last_value_zero : bool
-        Is the last solution value equal to 0?
-    """
-
-    result = np.zeros_like(prox_arg)
-
-    ordered_clustering = []
-    cur_idx = 0
-    for cluster_size in cluster_sizes:
-        ordered_clustering.append([ordering[j + cur_idx] for j in range(cluster_size)])
-        cur_idx += cluster_size
-
-    # Now, run appropriate SLOPE prox on each cluster
-    cur_idx = 0
-    for i, cluster in enumerate(ordered_clustering):
-        prox_subarg = np.array([prox_arg[j] for j in cluster])
-
-        # If the value of the soln to the prox was non-zero
-        # then we solve a SLOPE of size 1 smaller than the cluster
-
-        # If the cluster size is 1, the value is just
-        # the corresponding signed weight
-
-        if i < len(ordered_clustering) - 1 or not last_value_zero:
-            if len(cluster) == 1:
-                result[cluster[0]] = weights[cur_idx] * active_signs[cluster[0]]
+from selection.randomized.base import restricted_estimator
+from selection.constraints.affine import constraints
+from selection.randomized.query import (query,
+                                        multiple_queries,
+                                        langevin_sampler,
+                                        affine_gaussian_sampler)
+
+class randomized_slope():
+
+    def __init__(self,
+                 loglike,
+                 feature_weights,
+                 ridge_term,
+                 randomizer_scale,
+                 perturb=None):
+        r"""
+        Create a new post-selection object for the SLOPE problem
+        Parameters
+        ----------
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is broadcast to all features.
+        ridge_term : float
+            How big a ridge term to add?
+        randomizer_scale : float
+            Scale for IID components of randomization.
+        perturb : np.ndarray
+            Random perturbation subtracted as a linear
+            term in the objective function.
+        """
+
+        self.loglike = loglike
+        self.nfeature = p = self.loglike.shape[0]
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+        self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
+        self.ridge_term = ridge_term
+        self.penalty = slope(feature_weights, lagrange=1.)
+        self._initial_omega = perturb  # random perturbation
+
+    def fit(self,
+            solve_args={'tol': 1.e-12, 'min_its': 50},
+            perturb=None):
+
+        p = self.nfeature
+
+        # take a new perturbation if supplied
+        if perturb is not None:
+            self._initial_omega = perturb
+        if self._initial_omega is None:
+            self._initial_omega = self.randomizer.sample()
+
+        quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0)
+        problem = rr.simple_problem(self.loglike, self.penalty)
+        self.initial_soln = problem.solve(quad, **solve_args)
+
+        active_signs = np.sign(self.initial_soln)
+        active = self._active = active_signs != 0
+        self._unpenalized = np.zeros(p, np.bool)
+
+        self._overall = overall = active> 0
+        self._inactive = inactive = ~self._overall
+
+        _active_signs = active_signs.copy()
+        self.selection_variable = {'sign': _active_signs,
+                                   'variables': self._overall}
+
+        initial_subgrad = -(self.loglike.smooth_objective(self.initial_soln, 'grad') +
+                            quad.objective(self.initial_soln, 'grad'))
+        self.initial_subgrad = initial_subgrad
+
+        indices = np.argsort(-np.fabs(self.initial_soln))
+        sorted_soln = self.initial_soln[indices]
+        initial_scalings = np.sort(np.fabs(np.unique(self.initial_soln[active])))[::-1]
+        self.observed_opt_state = initial_scalings
+
+        _beta_unpenalized = restricted_estimator(self.loglike, self._overall, solve_args=solve_args)
+
+        beta_bar = np.zeros(p)
+        beta_bar[overall] = _beta_unpenalized
+        self._beta_full = beta_bar
+
+        self.num_opt_var = self.observed_opt_state.shape[0]
+
+        _opt_linear_term = np.zeros((p, self.num_opt_var))
+        _score_linear_term = np.zeros((p, self.num_opt_var))
+
+        X, y = self.loglike.data
+        W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar))
+        _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
+        _score_linear_term = _hessian_active
+        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
+
+        self.observed_score_state = _score_linear_term.dot(_beta_unpenalized)
+        self.observed_score_state[inactive] += self.loglike.smooth_objective(beta_bar, 'grad')[inactive]
+
+        cur_indx_array = []
+        cur_indx_array.append(0)
+        cur_indx = 0
+        pointer = 0
+        signs_cluster = []
+        for j in range(p - 1):
+            if np.abs(sorted_soln[j + 1]) != np.abs(sorted_soln[cur_indx]):
+                cur_indx_array.append(j + 1)
+                cur_indx = j + 1
+                sign_vec = np.zeros(p)
+                sign_vec[np.arange(j + 1 - cur_indx_array[pointer]) + cur_indx_array[pointer]] = \
+                    np.sign(self.initial_soln[indices[np.arange(j + 1 - cur_indx_array[pointer]) + cur_indx_array[pointer]]])
+                signs_cluster.append(sign_vec)
+                pointer = pointer + 1
+                if sorted_soln[j + 1] == 0:
+                    break
+
+        signs_cluster = np.asarray(signs_cluster).T
+        X_clustered = X[:, indices].dot(signs_cluster)
+        _opt_linear_term = -X.T.dot(X_clustered)
+        self.opt_transform = (_opt_linear_term, self.initial_subgrad)
+
+        cov, prec = self.randomizer.cov_prec
+        opt_linear, opt_offset = self.opt_transform
+
+        cond_precision = opt_linear.T.dot(opt_linear) * prec
+        cond_cov = np.linalg.inv(cond_precision)
+        logdens_linear = cond_cov.dot(opt_linear.T) * prec
+        cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+
+        def log_density(logdens_linear, offset, cond_prec, score, opt):
+            if score.ndim == 1:
+                mean_term = logdens_linear.dot(score.T + offset).T
             else:
-                indices = [j + cur_idx for j in range(len(cluster))]
-                cluster_weights = weights[indices]
-
-                ir = IsotonicRegression()
-                _ir_result = ir.fit_transform(np.arange(len(cluster)), cluster_weights[::-1])[::-1]
-                result[indices] = -np.multiply(active_signs[indices], _ir_result/2.)
+                mean_term = logdens_linear.dot(score.T + offset[:, None]).T
+            arg = opt + mean_term
+            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
+        log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
+
+        # now make the constraints
+
+        A_scaling = -np.identity(self.num_opt_var)
+        b_scaling = np.zeros(self.num_opt_var)
+
+        affine_con = constraints(A_scaling,
+                                 b_scaling,
+                                 mean=cond_mean,
+                                 covariance=cond_cov)
+
+        logdens_transform = (logdens_linear, opt_offset)
+
+        self.sampler = affine_gaussian_sampler(affine_con,
+                                               self.observed_opt_state,
+                                               self.observed_score_state,
+                                               log_density,
+                                               logdens_transform,
+                                               selection_info=self.selection_variable)
+        return active_signs
+
+    def selective_MLE(self,
+                      target="selected",
+                      features=None,
+                      parameter=None,
+                      level=0.9,
+                      compute_intervals=False,
+                      dispersion=None,
+                      solve_args={'tol': 1.e-12}):
+        """
+        Parameters
+        ----------
+        target : one of ['selected', 'full']
+        features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+        level : float
+            Confidence level.
+        ndraw : int (optional)
+            Defaults to 1000.
+        burnin : int (optional)
+            Defaults to 1000.
+        compute_intervals : bool
+            Compute confidence intervals?
+        dispersion : float (optional)
+            Use a known value for dispersion, or Pearson's X^2?
+        """
+
+        if parameter is None:
+            parameter = np.zeros(self.loglike.shape[0])
+
+        if target == 'selected':
+            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
+                                                                                                dispersion=dispersion)
+        # elif target == 'full':
+        #     X, y = self.loglike.data
+        #     n, p = X.shape
+        #     if n > p:
+        #         observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features,
+        #                                                                                         dispersion=dispersion)
+        #     else:
+        #         observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features,
+        #                                                                                             dispersion=dispersion)
+
+        # working out conditional law of opt variables given
+        # target after decomposing score wrt target
+
+        return self.sampler.selective_MLE(observed_target,
+                                          cov_target,
+                                          cov_target_score,
+                                          self.observed_opt_state,
+                                          solve_args=solve_args)
+
+    # Targets of inference
+    # and covariance with score representation
+
+    def selected_targets(self, features=None, dispersion=None):
+
+        X, y = self.loglike.data
+        n, p = X.shape
+
+        if features is None:
+            active = self._active
+            unpenalized = self._unpenalized
+            noverall = active.sum() + unpenalized.sum()
+            overall = active + unpenalized
+
+            score_linear = self.score_transform[0]
+            Q = -score_linear[overall]
+            cov_target = np.linalg.inv(Q)
+            observed_target = self._beta_full[overall]
+            crosscov_target_score = score_linear.dot(cov_target)
+            Xfeat = X[:, overall]
+            alternatives = [{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][active]] \
+                           + ['twosided'] * unpenalized.sum()
 
         else:
-            indices = np.array([j + cur_idx for j in range(len(cluster))])
-            cluster_weights = weights[indices]
 
-            pen = slope(cluster_weights, lagrange=1.)
-            loss = rr.squared_error(np.identity(len(cluster)), prox_subarg)
-            slope_problem = rr.simple_problem(loss, pen)
-            result[indices] = prox_subarg - slope_problem.solve()
+            features_b = np.zeros_like(self._overall)
+            features_b[features] = True
+            features = features_b
+
+            Xfeat = X[:, features]
+            Qfeat = Xfeat.T.dot(self._W[:, None] * Xfeat)
+            Gfeat = self.loglike.smooth_objective(self.initial_soln, 'grad')[features]
+            Qfeat_inv = np.linalg.inv(Qfeat)
+            one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat)
+            cov_target = Qfeat_inv
+            _score_linear = -Xfeat.T.dot(self._W[:, None] * X).T
+            crosscov_target_score = _score_linear.dot(cov_target)
+            observed_target = one_step
+            alternatives = ['twosided'] * features.sum()
+
+        if dispersion is None:  # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(
+                Xfeat.dot(observed_target))) ** 2 / self._W).sum() / (n - Xfeat.shape[1])
+
+        print(dispersion, 'dispersion')
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+
+    @staticmethod
+    def gaussian(X,
+                 Y,
+                 feature_weights,
+                 sigma=1.,
+                 quadratic=None,
+                 ridge_term=0.,
+                 randomizer_scale=None):
+
+        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic)
+        n, p = X.shape
 
-        cur_idx += len(cluster)
+        mean_diag = np.mean((X ** 2).sum(0))
+        if ridge_term is None:
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
-    return result
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-"""
-For a cluster of size bigger than 1, we solve
-"""
+        return randomized_slope(loglike, np.asarray(feature_weights) / sigma ** 2, ridge_term, randomizer_scale)
 
-conjugate_slope_pairs = {}
-for n1, n2 in [(slope, slope_conjugate)]:
-    conjugate_slope_pairs[n1] = n2
-    conjugate_slope_pairs[n2] = n1
\ No newline at end of file
diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index 0d63a9164..dda6458df 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -9,9 +9,11 @@
 from selection.tests.instance import gaussian_instance
 
 import numpy as np
-from selection.SLOPE.slope import slope
+from regreg.atoms.slope import slope
 import regreg.api as rr
 
+from selection.SLOPE.slope import randomized_slope
+
 def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma = None):
     robjects.r('''
     slope = function(X, Y, W , normalize, choice_weights, sigma, fdr = NA){
@@ -100,7 +102,7 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rh
 
 #compare_outputs_SLOPE_weights()
 
-def randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
+def test0_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
                      randomizer_scale= np.sqrt(0.25),
                      solve_args={'tol':1.e-12, 'min_its':50}):
 
@@ -130,36 +132,64 @@ def randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
     quad = rr.identity_quadratic(0, 0, -_initial_omega, 0)
     problem = rr.simple_problem(loglike, pen)
     initial_soln = problem.solve(quad, **solve_args)
-
-    print("initial_soln", initial_soln)
-
     initial_subgrad = -(loglike.smooth_objective(initial_soln, 'grad') + quad.objective(initial_soln, 'grad'))
-    #print("weights returned by R", r_lambda_seq)
-    print("initial subgrad", np.abs(initial_subgrad))
 
     indices = np.argsort(-np.abs(initial_soln))
-    print("sorted soln", initial_soln[indices], np.abs(initial_subgrad[indices]))
     sorted_soln = initial_soln[indices]
-    sorted_subgrad = initial_subgrad[indices]
 
     cur_indx_array = []
-    cur_indx_array .append(0)
+    cur_indx_array.append(0)
     cur_indx = 0
     pointer = 0
-    subgrad_cluster_indices = np.zeros(p, np.int)
+    signs_cluster = []
     for j in range(p-1):
         if np.abs(sorted_soln[j+1]) != np.abs(sorted_soln[cur_indx]):
             cur_indx_array.append(j+1)
             cur_indx = j+1
-            subgrad_cluster_indices[cur_indx_array[pointer]:(j+1)] = (np.argsort(-np.abs(sorted_subgrad
-                                                                                         [cur_indx_array[pointer]:(j+1)]))
-                                                                      + cur_indx_array[pointer])
+            sign_vec = np.zeros(p)
+            sign_vec[np.arange(j+1-cur_indx_array[pointer]) + cur_indx_array[pointer]] = \
+                np.sign(initial_soln[indices[np.arange(j+1-cur_indx_array[pointer]) + cur_indx_array[pointer]]])
+            signs_cluster.append(sign_vec)
             pointer = pointer + 1
             if sorted_soln[j+1]== 0:
-                subgrad_cluster_indices[(j+1):] = (np.argsort(-np.abs(sorted_subgrad[j+1:]))+(j+1))
                 break
 
-    print("start indices of clusters", cur_indx_array)
-    print("sorted indices of inactive cluster", subgrad_cluster_indices,
-          np.abs(sorted_subgrad[subgrad_cluster_indices]))
-randomized_slope()
\ No newline at end of file
+    signs_cluster = np.asarray(signs_cluster).T
+    X_clustered = X[:, indices].dot(signs_cluster)
+    print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
+
+def test_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35, randomizer_scale= np.sqrt(0.25)):
+
+    inst = gaussian_instance
+    signal = np.sqrt(signal_fac * 2. * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
+    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X,
+                                                      Y,
+                                                      W=None,
+                                                      normalize=True,
+                                                      choice_weights="gaussian",
+                                                      sigma=sigma_)
+
+    conv = randomized_slope.gaussian(X,
+                                     Y,
+                                     r_sigma * r_lambda_seq,
+                                     randomizer_scale=randomizer_scale * sigma_)
+
+    signs = conv.fit()
+    nonzero = signs != 0
+    print("dimensions", n, p, nonzero.sum())
+
+    estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=sigma_)
+
+    print("estimate", estimate)
+
+test_randomized_slope()
\ No newline at end of file
diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 10863ec67..58749a3e7 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -482,12 +482,12 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
 if __name__ == "__main__":
 
-    ndraw = 50
+    ndraw = 1
     output_overall = np.zeros(27)
 
     target = "selected"
     tuning = "selective_MLE"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.10
+    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.30
 
     if target == "selected":
         for i in range(ndraw):

From d3ffb322a9cb57b242c2d1d02d5ec4f7088d21e9 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Sun, 15 Apr 2018 23:07:49 -0700
Subject: [PATCH 586/617] removed bug in fixing feasible point

---
 selection/SLOPE/slope.py                |  4 ++-
 selection/SLOPE/tests/slope_run_test.py | 37 +++++++++++++++++++++++--
 selection/randomized/query.py           |  3 ++
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index 4a52629c3..04a5ec8b5 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -82,8 +82,9 @@ def fit(self,
 
         indices = np.argsort(-np.fabs(self.initial_soln))
         sorted_soln = self.initial_soln[indices]
-        initial_scalings = np.sort(np.fabs(np.unique(self.initial_soln[active])))[::-1]
+        initial_scalings = np.sort(np.unique(np.fabs(self.initial_soln[active])))[::-1]
         self.observed_opt_state = initial_scalings
+        #print("observed opt state", self.observed_opt_state)
 
         _beta_unpenalized = restricted_estimator(self.loglike, self._overall, solve_args=solve_args)
 
@@ -134,6 +135,7 @@ def fit(self,
         cond_cov = np.linalg.inv(cond_precision)
         logdens_linear = cond_cov.dot(opt_linear.T) * prec
         cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+        #print("shapes", cond_mean.shape, cond_precision.shape)
 
         def log_density(logdens_linear, offset, cond_prec, score, opt):
             if score.ndim == 1:
diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index dda6458df..f29b42fd6 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -13,6 +13,7 @@
 import regreg.api as rr
 
 from selection.SLOPE.slope import randomized_slope
+import matplotlib.pyplot as plt
 
 def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma = None):
     robjects.r('''
@@ -189,7 +190,39 @@ def test_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
     print("dimensions", n, p, nonzero.sum())
 
     estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=sigma_)
+    print("estimate", estimate, pval, intervals)
+
+    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+    coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+    return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
+
+def main(nsim=500, full=True):
+
+    P0, PA, cover, length_int = [], [], [], []
+    from statsmodels.distributions import ECDF
+
+    for i in range(nsim):
+        p0, pA, cover_, intervals = test_randomized_slope()
+
+        cover.extend(cover_)
+        P0.extend(p0)
+        PA.extend(pA)
+        print(np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
+              'null pvalue + power + length')
+
+        if i % 3 == 0 and i > 0:
+            U = np.linspace(0, 1, 101)
+            plt.clf()
+            if len(P0) > 0:
+                plt.plot(U, ECDF(P0)(U))
+            if len(PA) > 0:
+                plt.plot(U, ECDF(PA)(U), 'r')
+            plt.plot([0, 1], [0, 1], 'k--')
+            plt.savefig("/Users/snigdhapanigrahi/Desktop/plot.pdf")
+    plt.show()
+
+main()
+
+
 
-    print("estimate", estimate)
 
-test_randomized_slope()
\ No newline at end of file
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index b2e65aa42..9f2f2c19d 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -514,6 +514,9 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
         quantile = ndist.ppf(1 - alpha / 2.)
         intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
                                final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
+
+        print("check 0", observed_info_mean)
+        print("check 1", hess, intervals, final_estimator)
         return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator
 
 class optimization_intervals(object):

From 5d420e4435bdddc0d79ef5c764eff6064dba2528 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 16 Apr 2018 11:22:14 -0700
Subject: [PATCH 587/617] removed a sign bug from score_linear

---
 selection/SLOPE/slope.py                | 15 +++++++++------
 selection/SLOPE/tests/slope_run_test.py |  2 +-
 selection/randomized/query.py           |  3 ++-
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index 04a5ec8b5..02a3f5e98 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -67,7 +67,8 @@ def fit(self,
 
         active_signs = np.sign(self.initial_soln)
         active = self._active = active_signs != 0
-        self._unpenalized = np.zeros(p, np.bool)
+
+        print("check active terms", active.sum())
 
         self._overall = overall = active> 0
         self._inactive = inactive = ~self._overall
@@ -100,7 +101,7 @@ def fit(self,
         X, y = self.loglike.data
         W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar))
         _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
-        _score_linear_term = _hessian_active
+        _score_linear_term = -_hessian_active
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 
         self.observed_score_state = _score_linear_term.dot(_beta_unpenalized)
@@ -202,6 +203,8 @@ def selective_MLE(self,
         if target == 'selected':
             observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
                                                                                                 dispersion=dispersion)
+
+            print("check covariance in MLE", cov_target)
         # elif target == 'full':
         #     X, y = self.loglike.data
         #     n, p = X.shape
@@ -231,18 +234,18 @@ def selected_targets(self, features=None, dispersion=None):
 
         if features is None:
             active = self._active
-            unpenalized = self._unpenalized
-            noverall = active.sum() + unpenalized.sum()
-            overall = active + unpenalized
+            noverall = active.sum()
+            overall = active
 
             score_linear = self.score_transform[0]
             Q = -score_linear[overall]
             cov_target = np.linalg.inv(Q)
+            print("check covariance in selected targets", cov_target)
             observed_target = self._beta_full[overall]
             crosscov_target_score = score_linear.dot(cov_target)
             Xfeat = X[:, overall]
             alternatives = [{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][active]] \
-                           + ['twosided'] * unpenalized.sum()
+                           + ['twosided']
 
         else:
 
diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index f29b42fd6..0b11a0a65 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -196,7 +196,7 @@ def test_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
     coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
     return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
 
-def main(nsim=500, full=True):
+def main(nsim=1, full=True):
 
     P0, PA, cover, length_int = [], [], [], []
     from statsmodels.distributions import ECDF
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 9f2f2c19d..5a440fe47 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -516,7 +516,8 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
                                final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
 
         print("check 0", observed_info_mean)
-        print("check 1", hess, intervals, final_estimator)
+        print("check 1", cov_target, cov_target.dot(L.dot(target_lin)-L.dot(hess.dot(L.T))).dot(cov_target))
+        #print("check 1", hess, intervals, final_estimator)
         return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator
 
 class optimization_intervals(object):

From f1447752eb776bb2f47be57e48e443992fd9da10 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 16 Apr 2018 11:44:00 -0700
Subject: [PATCH 588/617] removed print statements, coverage is short of target

---
 selection/SLOPE/slope.py                |  3 ---
 selection/SLOPE/tests/slope_run_test.py | 29 ++++++++++++-------------
 selection/randomized/query.py           |  3 ---
 3 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index 02a3f5e98..d6dd6a8d2 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -136,7 +136,6 @@ def fit(self,
         cond_cov = np.linalg.inv(cond_precision)
         logdens_linear = cond_cov.dot(opt_linear.T) * prec
         cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
-        #print("shapes", cond_mean.shape, cond_precision.shape)
 
         def log_density(logdens_linear, offset, cond_prec, score, opt):
             if score.ndim == 1:
@@ -204,7 +203,6 @@ def selective_MLE(self,
             observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
                                                                                                 dispersion=dispersion)
 
-            print("check covariance in MLE", cov_target)
         # elif target == 'full':
         #     X, y = self.loglike.data
         #     n, p = X.shape
@@ -240,7 +238,6 @@ def selected_targets(self, features=None, dispersion=None):
             score_linear = self.score_transform[0]
             Q = -score_linear[overall]
             cov_target = np.linalg.inv(Q)
-            print("check covariance in selected targets", cov_target)
             observed_target = self._beta_full[overall]
             crosscov_target_score = score_linear.dot(cov_target)
             Xfeat = X[:, overall]
diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index 0b11a0a65..d780dce8c 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -159,7 +159,7 @@ def test0_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
     X_clustered = X[:, indices].dot(signs_cluster)
     print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
 
-def test_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35, randomizer_scale= np.sqrt(0.25)):
+def test_randomized_slope(n=500, p=100, signal_fac=1.5, s=5, sigma=3., rho=0.35, randomizer_scale= np.sqrt(0.25)):
 
     inst = gaussian_instance
     signal = np.sqrt(signal_fac * 2. * np.log(p))
@@ -196,7 +196,7 @@ def test_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
     coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
     return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
 
-def main(nsim=1, full=True):
+def main(nsim=100):
 
     P0, PA, cover, length_int = [], [], [], []
     from statsmodels.distributions import ECDF
@@ -207,19 +207,18 @@ def main(nsim=1, full=True):
         cover.extend(cover_)
         P0.extend(p0)
         PA.extend(pA)
-        print(np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
-              'null pvalue + power + length')
-
-        if i % 3 == 0 and i > 0:
-            U = np.linspace(0, 1, 101)
-            plt.clf()
-            if len(P0) > 0:
-                plt.plot(U, ECDF(P0)(U))
-            if len(PA) > 0:
-                plt.plot(U, ECDF(PA)(U), 'r')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.savefig("/Users/snigdhapanigrahi/Desktop/plot.pdf")
-    plt.show()
+        print(np.mean(cover),'null pvalue + power')
+
+    #     if i % 3 == 0 and i > 0:
+    #         U = np.linspace(0, 1, 101)
+    #         plt.clf()
+    #         if len(P0) > 0:
+    #             plt.plot(U, ECDF(P0)(U))
+    #         if len(PA) > 0:
+    #             plt.plot(U, ECDF(PA)(U), 'r')
+    #         plt.plot([0, 1], [0, 1], 'k--')
+    #         plt.savefig("/Users/snigdhapanigrahi/Desktop/plot.pdf")
+    # plt.show()
 
 main()
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 5a440fe47..ff90a6e19 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -515,9 +515,6 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
         intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
                                final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
 
-        print("check 0", observed_info_mean)
-        print("check 1", cov_target, cov_target.dot(L.dot(target_lin)-L.dot(hess.dot(L.T))).dot(cov_target))
-        #print("check 1", hess, intervals, final_estimator)
         return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator
 
 class optimization_intervals(object):

From e48b25c605dc9f1c909a60c29811dae2f047f954 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 16 Apr 2018 13:58:19 -0700
Subject: [PATCH 589/617] removed sign bug from opt_linear_term

---
 selection/SLOPE/slope.py      | 15 +++++++++++----
 selection/randomized/query.py |  3 ++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index d6dd6a8d2..d50ec0557 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -95,9 +95,6 @@ def fit(self,
 
         self.num_opt_var = self.observed_opt_state.shape[0]
 
-        _opt_linear_term = np.zeros((p, self.num_opt_var))
-        _score_linear_term = np.zeros((p, self.num_opt_var))
-
         X, y = self.loglike.data
         W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar))
         _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
@@ -126,12 +123,15 @@ def fit(self,
 
         signs_cluster = np.asarray(signs_cluster).T
         X_clustered = X[:, indices].dot(signs_cluster)
-        _opt_linear_term = -X.T.dot(X_clustered)
+        _opt_linear_term = X.T.dot(X_clustered)
         self.opt_transform = (_opt_linear_term, self.initial_subgrad)
 
         cov, prec = self.randomizer.cov_prec
         opt_linear, opt_offset = self.opt_transform
 
+        print("check if correct", np.allclose(-X.T.dot(y-X_clustered.dot(initial_scalings))
+                                              +initial_subgrad,self._initial_omega, rtol=1e-05, atol=1e-08))
+
         cond_precision = opt_linear.T.dot(opt_linear) * prec
         cond_cov = np.linalg.inv(cond_precision)
         logdens_linear = cond_cov.dot(opt_linear.T) * prec
@@ -149,6 +149,13 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
 
         # now make the constraints
 
+        #A_scaling_0 = -np.identity(self.num_opt_var)
+        #A_scaling_1 = -np.identity(self.num_opt_var)[:(self.num_opt_var-1), :]
+        #for k in range(A_scaling_1.shape[0]):
+        #    A_scaling_1[k,k+1]= 1
+        #A_scaling = np.vstack([A_scaling_0, A_scaling_1])
+        #b_scaling = np.zeros(2*self.num_opt_var-1)
+
         A_scaling = -np.identity(self.num_opt_var)
         b_scaling = np.zeros(self.num_opt_var)
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index ff90a6e19..3bc6472f5 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -494,7 +494,8 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
 
         conjugate_arg = prec_opt.dot(self.affine_con.mean)
 
-        init_soln = np.ones(prec_opt.shape[0])
+        #init_soln = np.ones(prec_opt.shape[0])
+        init_soln = feasible_point
         val, soln, hess = solve_barrier_nonneg(conjugate_arg,
                                                prec_opt,
                                                init_soln,

From ca75577a10f18eb420ee76cdc3c381c2899c6bd1 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 16 Apr 2018 14:20:56 -0700
Subject: [PATCH 590/617] still coverage is short of target

---
 selection/SLOPE/slope.py                | 18 +++----
 selection/SLOPE/tests/slope_run_test.py | 72 +++++++++++++------------
 2 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index d50ec0557..47a0fa40d 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -130,7 +130,7 @@ def fit(self,
         opt_linear, opt_offset = self.opt_transform
 
         print("check if correct", np.allclose(-X.T.dot(y-X_clustered.dot(initial_scalings))
-                                              +initial_subgrad,self._initial_omega, rtol=1e-05, atol=1e-08))
+                                              +self.initial_subgrad,self._initial_omega, rtol=1e-05, atol=1e-08))
 
         cond_precision = opt_linear.T.dot(opt_linear) * prec
         cond_cov = np.linalg.inv(cond_precision)
@@ -149,15 +149,15 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
 
         # now make the constraints
 
-        #A_scaling_0 = -np.identity(self.num_opt_var)
-        #A_scaling_1 = -np.identity(self.num_opt_var)[:(self.num_opt_var-1), :]
-        #for k in range(A_scaling_1.shape[0]):
-        #    A_scaling_1[k,k+1]= 1
-        #A_scaling = np.vstack([A_scaling_0, A_scaling_1])
-        #b_scaling = np.zeros(2*self.num_opt_var-1)
+        A_scaling_0 = -np.identity(self.num_opt_var)
+        A_scaling_1 = -np.identity(self.num_opt_var)[:(self.num_opt_var-1), :]
+        for k in range(A_scaling_1.shape[0]):
+           A_scaling_1[k,k+1]= 1
+        A_scaling = np.vstack([A_scaling_0, A_scaling_1])
+        b_scaling = np.zeros(2*self.num_opt_var-1)
 
-        A_scaling = -np.identity(self.num_opt_var)
-        b_scaling = np.zeros(self.num_opt_var)
+        # A_scaling = -np.identity(self.num_opt_var)
+        # b_scaling = np.zeros(self.num_opt_var)
 
         affine_con = constraints(A_scaling,
                                  b_scaling,
diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index d780dce8c..0e52738e9 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -161,40 +161,44 @@ def test0_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
 
 def test_randomized_slope(n=500, p=100, signal_fac=1.5, s=5, sigma=3., rho=0.35, randomizer_scale= np.sqrt(0.25)):
 
-    inst = gaussian_instance
-    signal = np.sqrt(signal_fac * 2. * np.log(p))
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=signal,
-                      s=s,
-                      equicorrelated=False,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=True)[:3]
-
-    sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
-    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X,
-                                                      Y,
-                                                      W=None,
-                                                      normalize=True,
-                                                      choice_weights="gaussian",
-                                                      sigma=sigma_)
-
-    conv = randomized_slope.gaussian(X,
-                                     Y,
-                                     r_sigma * r_lambda_seq,
-                                     randomizer_scale=randomizer_scale * sigma_)
-
-    signs = conv.fit()
-    nonzero = signs != 0
-    print("dimensions", n, p, nonzero.sum())
-
-    estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=sigma_)
-    print("estimate", estimate, pval, intervals)
-
-    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-    coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
-    return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
+    while True:
+        inst = gaussian_instance
+        signal = np.sqrt(signal_fac * 2. * np.log(p))
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
+        r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X,
+                                                          Y,
+                                                          W=None,
+                                                          normalize=True,
+                                                          choice_weights="gaussian",
+                                                          sigma=sigma_)
+
+        conv = randomized_slope.gaussian(X,
+                                         Y,
+                                         r_sigma * r_lambda_seq,
+                                         randomizer_scale=randomizer_scale * sigma_)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+        if nonzero.sum() > 0:
+            estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=sigma_)
+            print("estimate", estimate, pval, intervals)
+
+            beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+            coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+            break
+
+    if True:
+        return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
 
 def main(nsim=100):
 

From 698189098e297711c286793598f475f53ef1b37e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 16 Apr 2018 19:07:09 -0700
Subject: [PATCH 591/617] add output files

---
 ...rics_high_beta_type1_full_rho_0.35_tRL.csv | 41 +++++++++++++++++++
 ...trics_high_beta_type1_full_rho_0.7_tRL.csv | 41 +++++++++++++++++++
 ...metrics_high_beta_type1_full_rho_0_tRL.csv | 41 +++++++++++++++++++
 ...risk_high_beta_type1_full_rho_0.35_tRL.csv | 11 +++++
 .../risk_high_beta_type1_full_rho_0.7_tRL.csv | 11 +++++
 .../risk_high_beta_type1_full_rho_0_tRL.csv   | 11 +++++
 6 files changed, 156 insertions(+)
 create mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35_tRL.csv
 create mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.7_tRL.csv
 create mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0_tRL.csv
 create mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35_tRL.csv
 create mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.7_tRL.csv
 create mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0_tRL.csv

diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35_tRL.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35_tRL.csv
new file mode 100644
index 000000000..4247454e2
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35_tRL.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0.35,0.904560538674,0.27,10.1823227415,Selective MLE,13.28,0.5,0.008,0.004,1000,11.14355622,200,1,randomized_LASSO
+0.05,0.35,0.0,0.0,0.0,Randomized LASSO,13.28,0.0,0.0,0.0,1000,1.27519023435,200,1,randomized_LASSO
+0.05,0.35,0.814698714326,0.153333333333,inf,Lee,9.02,0.66,0.014,0.006,1000,1.22947708117,200,1,randomized_LASSO
+0.05,0.35,0.215435589303,0.58,3.49976162582,Naive,13.46,1.5,0.048,0.0,1000,1.22947708117,200,1,randomized_LASSO
+0.1,0.35,0.901193326107,0.13,7.2351485447,Selective MLE,12.18,0.24,0.018,0.006,1000,5.67042617943,200,1,randomized_LASSO
+0.1,0.35,0.0,0.0,0.0,Randomized LASSO,12.18,0.0,0.0,0.0,1000,1.09469903763,200,1,randomized_LASSO
+0.1,0.35,0.852926295926,0.0571428571429,inf,Lee,8.42,0.28,0.012,0.006,1000,1.03997065233,200,1,randomized_LASSO
+0.1,0.35,0.280898609117,0.68,2.53227893337,Naive,12.64,1.74,0.074,0.0,1000,1.03997065233,200,1,randomized_LASSO
+0.15,0.35,0.882759318987,0.19,5.81389463446,Selective MLE,15.78,0.4,0.054,0.018,1000,5.11745115543,200,1,randomized_LASSO
+0.15,0.35,0.0,0.0,0.0,Randomized LASSO,15.78,0.0,0.0,0.0,1000,1.0261449909,200,1,randomized_LASSO
+0.15,0.35,0.8636695845,0.108,inf,Lee,10.8,0.96,0.04,0.012,1000,0.970779284886,200,1,randomized_LASSO
+0.15,0.35,0.363206299726,0.5,2.14167127404,Naive,15.68,1.5,0.148,0.0,1000,0.970779284886,200,1,randomized_LASSO
+0.2,0.35,0.878854714053,0.136666666667,4.93075717257,Selective MLE,17.5,0.56,0.074,0.036,1000,3.94134638117,200,1,randomized_LASSO
+0.2,0.35,0.0,0.0,0.0,Randomized LASSO,17.5,0.0,0.0,0.0,1000,1.00570908043,200,1,randomized_LASSO
+0.2,0.35,0.811921267909,0.129456140351,inf,Lee,12.92,0.96,0.062,0.03,1000,0.955857160231,200,1,randomized_LASSO
+0.2,0.35,0.420908411408,0.46,1.92294662266,Naive,18.42,1.42,0.17,0.0,1000,0.955857160231,200,1,randomized_LASSO
+0.25,0.35,0.904443856452,0.103333333333,4.38141540518,Selective MLE,16.36,0.62,0.094,0.038,1000,2.96076741876,200,1,randomized_LASSO
+0.25,0.35,0.0,0.0,0.0,Randomized LASSO,16.36,0.0,0.0,0.0,1000,0.963436312334,200,1,randomized_LASSO
+0.25,0.35,0.829443531547,0.105692307692,inf,Lee,15.44,1.06,0.09,0.032,1000,0.899580794678,200,1,randomized_LASSO
+0.25,0.35,0.554338716916,0.5,1.78138367145,Naive,22.08,1.04,0.244,0.0,1000,0.899580794678,200,1,randomized_LASSO
+0.3,0.35,0.870643854672,0.124,3.48480528025,Selective MLE,22.24,1.0,0.156,0.068,1000,2.72989344456,200,1,randomized_LASSO
+0.3,0.35,0.0,0.0,0.0,Randomized LASSO,22.24,0.0,0.0,0.0,1000,0.949610403149,200,1,randomized_LASSO
+0.3,0.35,0.774245773293,0.126057971014,inf,Lee,19.16,2.16,0.132,0.064,1000,0.861327468008,200,1,randomized_LASSO
+0.3,0.35,0.62055068257,0.36,1.65643370396,Naive,28.08,0.74,0.232,0.0,1000,0.861327468008,200,1,randomized_LASSO
+0.42,0.35,0.871499391079,0.219095238095,2.92679636788,Selective MLE,23.12,2.1,0.214,0.136,1000,2.29869229231,200,1,randomized_LASSO
+0.42,0.35,0.0,0.0,0.0,Randomized LASSO,23.12,0.0,0.0,0.0,1000,0.876389275514,200,1,randomized_LASSO
+0.42,0.35,0.766220794294,0.151175438596,inf,Lee,24.14,2.18,0.17,0.058,1000,0.760023082731,200,1,randomized_LASSO
+0.42,0.35,0.723070401959,0.18,1.51698380468,Naive,33.84,0.38,0.286,0.0,1000,0.760023082731,200,1,randomized_LASSO
+0.71,0.35,0.832780761273,0.240670592973,1.91985249395,Selective MLE,32.84,5.38,0.438,0.332,1000,1.90473171699,200,1,randomized_LASSO
+0.71,0.35,0.0,0.0,0.0,Randomized LASSO,32.84,0.0,0.0,0.0,1000,0.747119128815,200,1,randomized_LASSO
+0.71,0.35,0.743799420992,0.176050664312,inf,Lee,37.26,4.12,0.25,0.096,1000,0.56797924093,200,1,randomized_LASSO
+0.71,0.35,0.899408727514,0.02,1.33828834119,Naive,51.22,0.04,0.428,0.0,1000,0.56797924093,200,1,randomized_LASSO
+1.22,0.35,0.824092627619,0.23783567413,1.40145975774,Selective MLE,31.56,8.12,0.66,0.602,1000,0.918711011887,200,1,randomized_LASSO
+1.22,0.35,0.0,0.0,0.0,Randomized LASSO,31.56,0.0,0.0,0.0,1000,0.607598814246,200,1,randomized_LASSO
+1.22,0.35,0.735296600906,0.178628554258,inf,Lee,47.24,5.24,0.302,0.124,1000,0.364022589518,200,1,randomized_LASSO
+1.22,0.35,0.952273896683,0.0,1.20644489562,Naive,58.0,0.0,0.636,0.0,1000,0.364022589518,200,1,randomized_LASSO
+2.07,0.35,0.772575484785,0.25638804377,1.00691373662,Selective MLE,34.18,11.52,0.83,0.804,1000,0.81289768376,200,1,randomized_LASSO
+2.07,0.35,0.0,0.0,0.0,Randomized LASSO,34.18,0.0,0.0,0.0,1000,0.501801832857,200,1,randomized_LASSO
+2.07,0.35,0.719978731909,0.217756312011,inf,Lee,52.66,5.64,0.306,0.11,1000,0.225363033778,200,1,randomized_LASSO
+2.07,0.35,0.979314360862,0.0,1.10471415905,Naive,61.34,0.0,0.808,0.0,1000,0.225363033778,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.7_tRL.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.7_tRL.csv
new file mode 100644
index 000000000..9a83e75fc
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.7_tRL.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0.7,0.922039239687,0.04,26.7382076911,Selective MLE,11.66,0.04,0.006,0.0,1000,66.5802029991,200,1,randomized_LASSO
+0.05,0.7,0.0,0.0,0.0,Randomized LASSO,11.66,0.0,0.0,0.0,1000,1.19241631522,200,1,randomized_LASSO
+0.05,0.7,0.831904761905,0.13580952381,inf,Lee,7.1,0.9,0.01,0.004,1000,1.09243672774,200,1,randomized_LASSO
+0.05,0.7,0.270358916792,0.7,3.68320282859,Naive,9.44,1.6,0.042,0.0,1000,1.09243672774,200,1,randomized_LASSO
+0.1,0.7,0.928723475835,0.06,16.7480169573,Selective MLE,14.62,0.1,0.014,0.002,1000,38.3852852404,200,1,randomized_LASSO
+0.1,0.7,0.0,0.0,0.0,Randomized LASSO,14.62,0.0,0.0,0.0,1000,1.08132675964,200,1,randomized_LASSO
+0.1,0.7,0.797081201567,0.163333333333,inf,Lee,8.2,0.42,0.028,0.01,1000,1.03348868058,200,1,randomized_LASSO
+0.1,0.7,0.322675568223,0.62,2.64005149869,Naive,11.44,1.56,0.074,0.0,1000,1.03348868058,200,1,randomized_LASSO
+0.15,0.7,0.915431178923,0.06,12.6546074846,Selective MLE,19.12,0.16,0.02,0.008,1000,26.7440029516,200,1,randomized_LASSO
+0.15,0.7,0.0,0.0,0.0,Randomized LASSO,19.12,0.0,0.0,0.0,1000,1.05911312813,200,1,randomized_LASSO
+0.15,0.7,0.835593582888,0.141904761905,inf,Lee,12.14,1.16,0.032,0.014,1000,0.97478395775,200,1,randomized_LASSO
+0.15,0.7,0.488651842883,0.58,2.28796404695,Naive,17.02,1.14,0.11,0.0,1000,0.97478395775,200,1,randomized_LASSO
+0.2,0.7,0.929907924884,0.0433333333333,11.0906038198,Selective MLE,17.38,0.14,0.028,0.006,1000,20.0408717049,200,1,randomized_LASSO
+0.2,0.7,0.0,0.0,0.0,Randomized LASSO,17.38,0.0,0.0,0.0,1000,0.978984630566,200,1,randomized_LASSO
+0.2,0.7,0.851145612054,0.0453787878788,inf,Lee,13.02,0.62,0.048,0.028,1000,0.91935867248,200,1,randomized_LASSO
+0.2,0.7,0.498082557816,0.42,2.00267496449,Naive,18.48,0.94,0.142,0.0,1000,0.91935867248,200,1,randomized_LASSO
+0.25,0.7,0.932610591671,0.0,9.82534260533,Selective MLE,19.14,0.04,0.036,0.004,1000,15.6008974535,200,1,randomized_LASSO
+0.25,0.7,0.0,0.0,0.0,Randomized LASSO,19.14,0.0,0.0,0.0,1000,0.969227518518,200,1,randomized_LASSO
+0.25,0.7,0.864711775957,0.0647619047619,inf,Lee,18.82,0.8,0.064,0.018,1000,0.885846251708,200,1,randomized_LASSO
+0.25,0.7,0.607958829559,0.24,1.91984322427,Naive,25.94,0.54,0.174,0.0,1000,0.885846251708,200,1,randomized_LASSO
+0.3,0.7,0.900900980781,0.03,8.33118546751,Selective MLE,23.84,0.14,0.064,0.01,1000,14.670816331,200,1,randomized_LASSO
+0.3,0.7,0.0,0.0,0.0,Randomized LASSO,23.84,0.0,0.0,0.0,1000,0.938287802512,200,1,randomized_LASSO
+0.3,0.7,0.744268267323,0.167569489334,inf,Lee,21.12,2.0,0.104,0.042,1000,0.827632432351,200,1,randomized_LASSO
+0.3,0.7,0.658147077777,0.18,1.78476753909,Naive,27.86,0.4,0.194,0.0,1000,0.827632432351,200,1,randomized_LASSO
+0.42,0.7,0.929540607176,0.0566666666667,6.80360118209,Selective MLE,27.46,0.24,0.11,0.016,1000,13.5209534407,200,1,randomized_LASSO
+0.42,0.7,0.0,0.0,0.0,Randomized LASSO,27.46,0.0,0.0,0.0,1000,0.844098099742,200,1,randomized_LASSO
+0.42,0.7,0.828304221914,0.118290598291,inf,Lee,27.26,1.14,0.116,0.03,1000,0.719350085744,200,1,randomized_LASSO
+0.42,0.7,0.782597848276,0.18,1.6578804247,Naive,36.58,0.28,0.224,0.0,1000,0.719350085744,200,1,randomized_LASSO
+0.71,0.7,0.889349872267,0.113095238095,4.67826236113,Selective MLE,32.88,0.98,0.226,0.06,1000,7.70099169377,200,1,randomized_LASSO
+0.71,0.7,0.0,0.0,0.0,Randomized LASSO,32.88,0.0,0.0,0.0,1000,0.730480536029,200,1,randomized_LASSO
+0.71,0.7,0.859988542109,0.0599251336898,inf,Lee,40.34,1.38,0.154,0.03,1000,0.520966311478,200,1,randomized_LASSO
+0.71,0.7,0.918887154994,0.0,1.46136235542,Naive,49.7,0.0,0.382,0.0,1000,0.520966311478,200,1,randomized_LASSO
+1.22,0.7,0.847615136972,0.213984126984,3.38018198745,Selective MLE,33.92,3.2,0.472,0.236,1000,4.80133134411,200,1,randomized_LASSO
+1.22,0.7,0.0,0.0,0.0,Randomized LASSO,33.92,0.0,0.0,0.0,1000,0.574001051024,200,1,randomized_LASSO
+1.22,0.7,0.825169195991,0.10756017316,inf,Lee,49.1,2.14,0.194,0.044,1000,0.322558328992,200,1,randomized_LASSO
+1.22,0.7,0.965361186761,0.0,1.36091425418,Naive,57.4,0.0,0.504,0.0,1000,0.322558328992,200,1,randomized_LASSO
+2.07,0.7,0.774512289686,0.225146242646,2.36868393184,Selective MLE,34.82,7.78,0.722,0.578,1000,2.90326565422,200,1,randomized_LASSO
+2.07,0.7,0.0,0.0,0.0,Randomized LASSO,34.82,0.0,0.0,0.0,1000,0.439628497143,200,1,randomized_LASSO
+2.07,0.7,0.747848973929,0.161774509804,inf,Lee,52.44,2.96,0.282,0.07,1000,0.189410896637,200,1,randomized_LASSO
+2.07,0.7,0.986016239696,0.0,1.23917614471,Naive,59.64,0.0,0.652,0.0,1000,0.189410896637,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0_tRL.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0_tRL.csv
new file mode 100644
index 000000000..55db39726
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0_tRL.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0,0.88897128285,0.323333333333,8.79220766982,Selective MLE,12.98,0.52,0.008,0.004,1000,8.2886621002,200,1,randomized_LASSO
+0.05,0,0.0,0.0,0.0,Randomized LASSO,12.98,0.0,0.0,0.0,1000,1.31442104866,200,1,randomized_LASSO
+0.05,0,0.753510470915,0.211666666667,inf,Lee,8.2,0.94,0.01,0.006,1000,1.17248749115,200,1,randomized_LASSO
+0.05,0,0.20697947614,0.76,3.51707544591,Naive,12.46,2.12,0.024,0.0,1000,1.17248749115,200,1,randomized_LASSO
+0.1,0,0.891872254469,0.123333333333,6.34530176112,Selective MLE,14.0,0.44,0.032,0.012,1000,5.70079582818,200,1,randomized_LASSO
+0.1,0,0.0,0.0,0.0,Randomized LASSO,14.0,0.0,0.0,0.0,1000,1.10763024692,200,1,randomized_LASSO
+0.1,0,0.803814659197,0.159271561772,inf,Lee,10.16,1.16,0.024,0.014,1000,1.05070354854,200,1,randomized_LASSO
+0.1,0,0.308253659516,0.62,2.55566050799,Naive,14.88,1.6,0.098,0.0,1000,1.05070354854,200,1,randomized_LASSO
+0.15,0,0.868768231273,0.223333333333,4.71418264616,Selective MLE,17.82,0.66,0.08,0.024,1000,3.98836268352,200,1,randomized_LASSO
+0.15,0,0.0,0.0,0.0,Randomized LASSO,17.82,0.0,0.0,0.0,1000,1.0430537927,200,1,randomized_LASSO
+0.15,0,0.835598452955,0.0742608695652,inf,Lee,13.64,0.7,0.032,0.012,1000,1.00792015423,200,1,randomized_LASSO
+0.15,0,0.403810732703,0.6,2.16109421674,Naive,20.34,1.66,0.134,0.0,1000,1.00792015423,200,1,randomized_LASSO
+0.2,0,0.868467053905,0.218095238095,4.10298653517,Selective MLE,17.46,1.0,0.096,0.056,1000,3.22973247347,200,1,randomized_LASSO
+0.2,0,0.0,0.0,0.0,Randomized LASSO,17.46,0.0,0.0,0.0,1000,1.01048679788,200,1,randomized_LASSO
+0.2,0,0.811217958999,0.117333333333,inf,Lee,13.18,1.22,0.058,0.032,1000,0.938462922739,200,1,randomized_LASSO
+0.2,0,0.499373658179,0.6,1.89997856499,Naive,19.58,1.64,0.208,0.0,1000,0.938462922739,200,1,randomized_LASSO
+0.25,0,0.883503463146,0.195,3.70622944753,Selective MLE,18.28,0.88,0.098,0.054,1000,2.47135003169,200,1,randomized_LASSO
+0.25,0,0.0,0.0,0.0,Randomized LASSO,18.28,0.0,0.0,0.0,1000,0.97688918139,200,1,randomized_LASSO
+0.25,0,0.839550741484,0.0897006327006,inf,Lee,16.44,1.24,0.078,0.042,1000,0.90117958759,200,1,randomized_LASSO
+0.25,0,0.616494448814,0.42,1.78032249483,Naive,24.46,1.16,0.236,0.0,1000,0.90117958759,200,1,randomized_LASSO
+0.3,0,0.866051921174,0.244095238095,3.13147259805,Selective MLE,19.94,1.68,0.16,0.102,1000,2.36317409857,200,1,randomized_LASSO
+0.3,0,0.0,0.0,0.0,Randomized LASSO,19.94,0.0,0.0,0.0,1000,0.939293015234,200,1,randomized_LASSO
+0.3,0,0.743928328678,0.167357376284,inf,Lee,15.62,1.82,0.14,0.05,1000,0.858982589281,200,1,randomized_LASSO
+0.3,0,0.619547597705,0.34,1.64955307026,Naive,23.2,0.8,0.266,0.0,1000,0.858982589281,200,1,randomized_LASSO
+0.42,0,0.867041781847,0.239714285714,2.50968360211,Selective MLE,24.84,2.06,0.222,0.132,1000,2.00307448702,200,1,randomized_LASSO
+0.42,0,0.0,0.0,0.0,Randomized LASSO,24.84,0.0,0.0,0.0,1000,0.865395486812,200,1,randomized_LASSO
+0.42,0,0.732482450526,0.168303817424,inf,Lee,26.4,3.44,0.224,0.08,1000,0.75939059585,200,1,randomized_LASSO
+0.42,0,0.741146303416,0.22,1.54525272229,Naive,37.58,0.66,0.336,0.0,1000,0.75939059585,200,1,randomized_LASSO
+0.71,0,0.814466485587,0.263022979436,1.6600714217,Selective MLE,30.0,5.58,0.442,0.364,1000,1.92922645517,200,1,randomized_LASSO
+0.71,0,0.0,0.0,0.0,Randomized LASSO,30.0,0.0,0.0,0.0,1000,0.770365309897,200,1,randomized_LASSO
+0.71,0,0.808583099881,0.144655122655,inf,Lee,39.18,2.4,0.202,0.058,1000,0.574733612271,200,1,randomized_LASSO
+0.71,0,0.897275350581,0.04,1.35357789306,Naive,52.5,0.08,0.472,0.0,1000,0.574733612271,200,1,randomized_LASSO
+1.22,0,0.803640115619,0.253073759574,1.22548655163,Selective MLE,31.98,8.86,0.674,0.634,1000,0.783112288547,200,1,randomized_LASSO
+1.22,0,0.0,0.0,0.0,Randomized LASSO,31.98,0.0,0.0,0.0,1000,0.609913135656,200,1,randomized_LASSO
+1.22,0,0.77612053658,0.116686190856,inf,Lee,48.2,3.44,0.304,0.086,1000,0.373728618284,200,1,randomized_LASSO
+1.22,0,0.957601878675,0.0,1.20782773316,Naive,62.32,0.0,0.624,0.0,1000,0.373728618284,200,1,randomized_LASSO
+2.07,0,0.770778679702,0.247848096348,0.857075455058,Selective MLE,32.44,12.04,0.894,0.874,1000,0.411382057681,200,1,randomized_LASSO
+2.07,0,0.0,0.0,0.0,Randomized LASSO,32.44,0.0,0.0,0.0,1000,0.468370989328,200,1,randomized_LASSO
+2.07,0,0.803080990926,0.0965080670963,inf,Lee,49.68,2.66,0.254,0.084,1000,0.208476236462,200,1,randomized_LASSO
+2.07,0,0.984105991703,0.0,1.08767214923,Naive,59.22,0.0,0.874,0.0,1000,0.208476236462,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35_tRL.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35_tRL.csv
new file mode 100644
index 000000000..86a155103
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35_tRL.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+1.22947708117,1.27519023435,29.9774110469,1.24903915215,6.04763869728,11.14355622,0.05,0.35,1000,200,1,randomized_LASSO
+1.03997065233,1.09469903763,16.293370011,1.08898303471,3.38092077039,5.67042617943,0.1,0.35,1000,200,1,randomized_LASSO
+0.970779284886,1.0261449909,13.5328791418,0.980575255112,2.69468544429,5.11745115543,0.15,0.35,1000,200,1,randomized_LASSO
+0.955857160231,1.00570908043,9.92658362282,0.972031122743,2.42305064218,3.94134638117,0.2,0.35,1000,200,1,randomized_LASSO
+0.899580794678,0.963436312334,8.07491069098,0.904449458809,1.94621905699,2.96076741876,0.25,0.35,1000,200,1,randomized_LASSO
+0.861327468008,0.949610403149,9.07751513011,0.867332470168,1.87440533665,2.72989344456,0.3,0.35,1000,200,1,randomized_LASSO
+0.760023082731,0.876389275514,6.63125065196,0.75529006061,1.48698253691,2.29869229231,0.42,0.35,1000,200,1,randomized_LASSO
+0.56797924093,0.747119128815,5.00555624788,0.525097514,1.03821222608,1.90473171699,0.71,0.35,1000,200,1,randomized_LASSO
+0.364022589518,0.607598814246,2.73622995835,0.261085084031,0.634367967642,0.918711011887,1.22,0.35,1000,200,1,randomized_LASSO
+0.225363033778,0.501801832857,1.53237148385,0.103328651514,0.377559544681,0.81289768376,2.07,0.35,1000,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.7_tRL.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.7_tRL.csv
new file mode 100644
index 000000000..bc1e08396
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.7_tRL.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+1.09243672774,1.19241631522,66.5802029991,1.23406515401,5.37926240412,66.5802029991,0.05,0.7,1000,200,1,randomized_LASSO
+1.03348868058,1.08132675964,38.3852852404,1.07150225572,3.49735932903,38.3852852404,0.1,0.7,1000,200,1,randomized_LASSO
+0.97478395775,1.05911312813,26.7440029516,1.02051312064,2.93875908586,26.7440029516,0.15,0.7,1000,200,1,randomized_LASSO
+0.91935867248,0.978984630566,20.0408717049,0.939219038505,2.24129394098,20.0408717049,0.2,0.7,1000,200,1,randomized_LASSO
+0.885846251708,0.969227518518,15.6008974535,0.900166766283,1.94366792471,15.6008974535,0.25,0.7,1000,200,1,randomized_LASSO
+0.827632432351,0.938287802512,14.670816331,0.844845584183,1.84385143811,14.670816331,0.3,0.7,1000,200,1,randomized_LASSO
+0.719350085744,0.844098099742,13.5209534407,0.721438073621,1.4054012529,13.5209534407,0.42,0.7,1000,200,1,randomized_LASSO
+0.520966311478,0.730480536029,7.70099169377,0.494283033378,1.03323592945,7.70099169377,0.71,0.7,1000,200,1,randomized_LASSO
+0.322558328992,0.574001051024,4.80133134411,0.236516272445,0.597607242237,4.80133134411,1.22,0.7,1000,200,1,randomized_LASSO
+0.189410896637,0.439628497143,2.90326565422,0.092964938924,0.331250334849,2.90326565422,2.07,0.7,1000,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0_tRL.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0_tRL.csv
new file mode 100644
index 000000000..e16d9c95b
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0_tRL.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+1.17248749115,1.31442104866,23.6453370978,1.28791920231,6.4199010483,8.2886621002,0.05,0,1000,200,1,randomized_LASSO
+1.05070354854,1.10763024692,14.1800839856,1.1238099725,3.67703632915,5.70079582818,0.1,0,1000,200,1,randomized_LASSO
+1.00792015423,1.0430537927,11.2019796169,1.03348070544,2.81963361807,3.98836268352,0.15,0,1000,200,1,randomized_LASSO
+0.938462922739,1.01048679788,7.56513834807,0.959418500699,2.34878604629,3.22973247347,0.2,0,1000,200,1,randomized_LASSO
+0.90117958759,0.97688918139,6.38666109808,0.902395680636,2.0548885926,2.47135003169,0.25,0,1000,200,1,randomized_LASSO
+0.858982589281,0.939293015234,5.73534495114,0.870730532696,1.88688220322,2.36317409857,0.3,0,1000,200,1,randomized_LASSO
+0.75939059585,0.865395486812,5.84219932939,0.745503498889,1.57411396465,2.00307448702,0.42,0,1000,200,1,randomized_LASSO
+0.574733612271,0.770365309897,3.2842446673,0.544215065212,1.08962289716,1.92922645517,0.71,0,1000,200,1,randomized_LASSO
+0.373728618284,0.609913135656,2.01125498031,0.295208597233,0.619868328368,0.783112288547,1.22,0,1000,200,1,randomized_LASSO
+0.208476236462,0.468370989328,1.0464136513,0.0822605369992,0.302679646991,0.411382057681,2.07,0,1000,200,1,randomized_LASSO

From 1f26c09772bedb049653f7b28ea8d4e4770560da Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 17 Apr 2018 10:19:39 -0700
Subject: [PATCH 592/617] commit results where tuning parameter is tuned using
 selective mle

---
 .../metrics_high_beta_type1_full_rho_0.35.csv | 41 +++++++++++++++++++
 .../metrics_high_beta_type1_full_rho_0.csv    | 41 +++++++++++++++++++
 .../risk_high_beta_type1_full_rho_0.35.csv    | 11 +++++
 .../risk_high_beta_type1_full_rho_0.csv       | 11 +++++
 4 files changed, 104 insertions(+)
 create mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35.csv
 create mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.csv
 create mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35.csv
 create mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.csv

diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35.csv
new file mode 100644
index 000000000..d0e0c2dfa
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0.35,0.86380952381,0.16,13.0374740184,Selective MLE,2.08,0.16,0.002,0.0,1000,5.02989513105,200,1,selective_MLE
+0.05,0.35,0.0,0.0,0.0,Randomized LASSO,2.08,0.0,0.0,0.0,1000,1.03326475867,200,1,selective_MLE
+0.05,0.35,0.810161064426,0.1775,inf,Lee,7.06,0.66,0.01,0.002,1000,1.15804369753,200,1,selective_MLE
+0.05,0.35,0.1912071848,0.76,3.51752981257,Naive,10.42,2.08,0.028,0.0,1000,1.15804369753,200,1,selective_MLE
+0.1,0.35,0.897142857143,0.08,9.71567962848,Selective MLE,1.82,0.14,0.006,0.004,1000,2.95736722228,200,1,selective_MLE
+0.1,0.35,0.0,0.0,0.0,Randomized LASSO,1.82,0.0,0.0,0.0,1000,1.00318150658,200,1,selective_MLE
+0.1,0.35,0.868062434138,0.0953846153846,inf,Lee,10.6,0.5,0.022,0.014,1000,1.08288774171,200,1,selective_MLE
+0.1,0.35,0.306908254952,0.64,2.55676876557,Naive,14.78,1.6,0.082,0.0,1000,1.08288774171,200,1,selective_MLE
+0.15,0.35,0.924285714286,0.02,8.12544927375,Selective MLE,1.7,0.1,0.01,0.008,1000,1.78432174263,200,1,selective_MLE
+0.15,0.35,0.0,0.0,0.0,Randomized LASSO,1.7,0.0,0.0,0.0,1000,0.983208270296,200,1,selective_MLE
+0.15,0.35,0.852194383721,0.143166666667,inf,Lee,9.98,0.96,0.042,0.02,1000,0.993737003883,200,1,selective_MLE
+0.15,0.35,0.337529616061,0.58,2.13063655281,Naive,14.76,1.7,0.122,0.0,1000,0.993737003883,200,1,selective_MLE
+0.2,0.35,0.91380952381,0.1,6.73586062053,Selective MLE,2.14,0.1,0.002,0.0,1000,2.19256133433,200,1,selective_MLE
+0.2,0.35,0.0,0.0,0.0,Randomized LASSO,2.14,0.0,0.0,0.0,1000,0.991266295579,200,1,selective_MLE
+0.2,0.35,0.766471372755,0.172,inf,Lee,14.1,0.92,0.05,0.026,1000,0.992526772626,200,1,selective_MLE
+0.2,0.35,0.482946940064,0.56,1.92496100515,Naive,20.98,1.5,0.166,0.0,1000,0.992526772626,200,1,selective_MLE
+0.25,0.35,0.874333333333,0.1,5.7006648181,Selective MLE,2.6,0.22,0.016,0.012,1000,1.80139037275,200,1,selective_MLE
+0.25,0.35,0.0,0.0,0.0,Randomized LASSO,2.6,0.0,0.0,0.0,1000,0.976643552483,200,1,selective_MLE
+0.25,0.35,0.786518225676,0.11119047619,inf,Lee,17.44,1.4,0.088,0.044,1000,0.895249457402,200,1,selective_MLE
+0.25,0.35,0.598940055094,0.42,1.7871635152,Naive,26.52,1.08,0.216,0.0,1000,0.895249457402,200,1,selective_MLE
+0.3,0.35,0.883598484848,0.08,5.99079681341,Selective MLE,3.14,0.12,0.016,0.004,1000,1.79804896466,200,1,selective_MLE
+0.3,0.35,0.0,0.0,0.0,Randomized LASSO,3.14,0.0,0.0,0.0,1000,0.97217454907,200,1,selective_MLE
+0.3,0.35,0.850180818168,0.0937095188953,inf,Lee,18.84,1.32,0.092,0.028,1000,0.86628788711,200,1,selective_MLE
+0.3,0.35,0.637800000485,0.38,1.65945204094,Naive,27.22,1.04,0.244,0.0,1000,0.86628788711,200,1,selective_MLE
+0.42,0.35,0.939222222222,0.0466666666667,4.48839314161,Selective MLE,3.5,0.36,0.04,0.03,1000,1.38678502316,200,1,selective_MLE
+0.42,0.35,0.0,0.0,0.0,Randomized LASSO,3.5,0.0,0.0,0.0,1000,0.950472355433,200,1,selective_MLE
+0.42,0.35,0.813218122313,0.142135142721,inf,Lee,30.2,2.54,0.168,0.058,1000,0.760968826709,200,1,selective_MLE
+0.42,0.35,0.821904659163,0.1,1.56279526504,Naive,42.0,0.3,0.304,0.0,1000,0.760968826709,200,1,selective_MLE
+0.71,0.35,0.845983079609,0.0618571428571,2.92592844044,Selective MLE,8.7,1.94,0.21,0.172,1000,1.17719079209,200,1,selective_MLE
+0.71,0.35,0.0,0.0,0.0,Randomized LASSO,8.7,0.0,0.0,0.0,1000,0.853407944406,200,1,selective_MLE
+0.71,0.35,0.724240274315,0.150139194139,inf,Lee,39.4,3.52,0.266,0.104,1000,0.545188750369,200,1,selective_MLE
+0.71,0.35,0.897369823919,0.0,1.37021257383,Naive,52.54,0.0,0.458,0.0,1000,0.545188750369,200,1,selective_MLE
+1.22,0.35,0.846472687459,0.139342712843,1.62609900699,Selective MLE,17.96,6.02,0.568,0.506,1000,0.828578087539,200,1,selective_MLE
+1.22,0.35,0.0,0.0,0.0,Randomized LASSO,17.96,0.0,0.0,0.0,1000,0.669072845661,200,1,selective_MLE
+1.22,0.35,0.697092694354,0.187363717137,inf,Lee,51.82,4.82,0.32,0.118,1000,0.354254840901,200,1,selective_MLE
+1.22,0.35,0.968201494975,0.0,1.22242066847,Naive,64.82,0.0,0.61,0.0,1000,0.354254840901,200,1,selective_MLE
+2.07,0.35,0.821919854055,0.122014403897,1.1382269201,Selective MLE,19.38,8.82,0.784,0.754,1000,0.446110763277,200,1,selective_MLE
+2.07,0.35,0.0,0.0,0.0,Randomized LASSO,19.38,0.0,0.0,0.0,1000,0.567708010316,200,1,selective_MLE
+2.07,0.35,0.786147231511,0.120987886383,inf,Lee,51.26,3.02,0.252,0.072,1000,0.207900773568,200,1,selective_MLE
+2.07,0.35,0.987254893848,0.0,1.10094183201,Naive,62.74,0.0,0.858,0.0,1000,0.207900773568,200,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.csv
new file mode 100644
index 000000000..47dbf5638
--- /dev/null
+++ b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.csv
@@ -0,0 +1,41 @@
+SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
+0.05,0,0.939333333333,0.02,12.4883493047,Selective MLE,1.98,0.04,0.002,0.002,1000,3.24705655557,200,1,selective_MLE
+0.05,0,0.0,0.0,0.0,Randomized LASSO,1.98,0.0,0.0,0.0,1000,1.01323890467,200,1,selective_MLE
+0.05,0,0.840792221,0.138,inf,Lee,7.68,0.76,0.012,0.004,1000,1.18979142946,200,1,selective_MLE
+0.05,0,0.182062781828,0.82,3.51806995253,Naive,11.78,2.46,0.032,0.0,1000,1.18979142946,200,1,selective_MLE
+0.1,0,0.948142857143,0.06,7.72927664108,Selective MLE,2.1,0.06,0.002,0.0,1000,2.20315756913,200,1,selective_MLE
+0.1,0,0.0,0.0,0.0,Randomized LASSO,2.1,0.0,0.0,0.0,1000,0.994559752969,200,1,selective_MLE
+0.1,0,0.817267346017,0.125,inf,Lee,8.36,0.66,0.018,0.01,1000,1.0499982218,200,1,selective_MLE
+0.1,0,0.259410577097,0.74,2.51991380922,Naive,11.48,2.18,0.092,0.0,1000,1.0499982218,200,1,selective_MLE
+0.15,0,0.94331372549,0.06,6.34782521321,Selective MLE,3.4,0.1,0.006,0.002,1000,1.98416436442,200,1,selective_MLE
+0.15,0,0.0,0.0,0.0,Randomized LASSO,3.4,0.0,0.0,0.0,1000,0.995273245034,200,1,selective_MLE
+0.15,0,0.843812152985,0.111397435897,inf,Lee,11.76,0.96,0.046,0.018,1000,0.986295023502,200,1,selective_MLE
+0.15,0,0.427335079752,0.6,2.13817019831,Naive,17.76,1.64,0.144,0.0,1000,0.986295023502,200,1,selective_MLE
+0.2,0,0.90203030303,0.0933333333333,5.72350149651,Selective MLE,2.96,0.34,0.024,0.016,1000,1.85651551225,200,1,selective_MLE
+0.2,0,0.0,0.0,0.0,Randomized LASSO,2.96,0.0,0.0,0.0,1000,0.978648208349,200,1,selective_MLE
+0.2,0,0.871329972555,0.0879191919192,inf,Lee,11.34,1.0,0.05,0.032,1000,0.946348528327,200,1,selective_MLE
+0.2,0,0.440246057252,0.64,1.89304610067,Naive,17.38,1.98,0.156,0.0,1000,0.946348528327,200,1,selective_MLE
+0.25,0,0.923824675325,0.04,4.8411497362,Selective MLE,2.92,0.28,0.024,0.024,1000,1.35427531353,200,1,selective_MLE
+0.25,0,0.0,0.0,0.0,Randomized LASSO,2.92,0.0,0.0,0.0,1000,0.963936970096,200,1,selective_MLE
+0.25,0,0.799817592593,0.109792207792,inf,Lee,17.6,1.18,0.108,0.038,1000,0.877901846227,200,1,selective_MLE
+0.25,0,0.60424285517,0.38,1.78254634538,Naive,26.46,1.24,0.256,0.0,1000,0.877901846227,200,1,selective_MLE
+0.3,0,0.962333333333,0.02,4.0846953987,Selective MLE,4.1,0.28,0.036,0.024,1000,1.27509640458,200,1,selective_MLE
+0.3,0,0.0,0.0,0.0,Randomized LASSO,4.1,0.0,0.0,0.0,1000,0.963413654406,200,1,selective_MLE
+0.3,0,0.740728587282,0.14370148857,inf,Lee,22.58,2.64,0.176,0.064,1000,0.871637370414,200,1,selective_MLE
+0.3,0,0.690347872224,0.32,1.71056902174,Naive,32.36,0.74,0.246,0.0,1000,0.871637370414,200,1,selective_MLE
+0.42,0,0.908340548341,0.0333333333333,3.4626911418,Selective MLE,6.06,0.84,0.13,0.078,1000,1.46313049815,200,1,selective_MLE
+0.42,0,0.0,0.0,0.0,Randomized LASSO,6.06,0.0,0.0,0.0,1000,0.902483553335,200,1,selective_MLE
+0.42,0,0.772215413934,0.117950980392,inf,Lee,27.52,2.04,0.176,0.072,1000,0.739251951337,200,1,selective_MLE
+0.42,0,0.800636311322,0.12,1.51881127885,Naive,38.1,0.32,0.342,0.0,1000,0.739251951337,200,1,selective_MLE
+0.71,0,0.902711246222,0.135333333333,2.29066703226,Selective MLE,11.94,2.6,0.258,0.204,1000,1.07824235978,200,1,selective_MLE
+0.71,0,0.0,0.0,0.0,Randomized LASSO,11.94,0.0,0.0,0.0,1000,0.836538976592,200,1,selective_MLE
+0.71,0,0.816120961485,0.0964545454545,inf,Lee,39.2,2.22,0.218,0.066,1000,0.56972376987,200,1,selective_MLE
+0.71,0,0.893159232195,0.02,1.33867459865,Naive,52.92,0.02,0.456,0.0,1000,0.56972376987,200,1,selective_MLE
+1.22,0,0.846552646398,0.148354256854,1.45750373595,Selective MLE,17.32,5.84,0.514,0.482,1000,0.727206377914,200,1,selective_MLE
+1.22,0,0.0,0.0,0.0,Randomized LASSO,17.32,0.0,0.0,0.0,1000,0.711660402878,200,1,selective_MLE
+1.22,0,0.697183263023,0.178388196001,inf,Lee,47.9,4.66,0.336,0.12,1000,0.37145714765,200,1,selective_MLE
+1.22,0,0.960033854849,0.0,1.18905978659,Naive,61.66,0.0,0.622,0.0,1000,0.37145714765,200,1,selective_MLE
+2.07,0,0.813603148591,0.116182900433,0.9652716672,Selective MLE,18.1,8.96,0.792,0.772,1000,0.347558277288,200,1,selective_MLE
+2.07,0,0.0,0.0,0.0,Randomized LASSO,18.1,0.0,0.0,0.0,1000,0.556557304432,200,1,selective_MLE
+2.07,0,0.729880633536,0.176751570048,inf,Lee,52.32,4.46,0.352,0.102,1000,0.20332446773,200,1,selective_MLE
+2.07,0,0.983468197749,0.0,1.08614747667,Naive,61.2,0.0,0.858,0.0,1000,0.20332446773,200,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35.csv
new file mode 100644
index 000000000..6886c50f9
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+1.15804369753,1.03326475867,5.02989513105,1.27125139482,2.71819494978,5.02989513105,0.05,0.35,1000,200,1,selective_MLE
+1.08288774171,1.00318150658,2.95736722228,1.12956825759,1.79266089014,2.95736722228,0.1,0.35,1000,200,1,selective_MLE
+0.993737003883,0.983208270296,1.78432174263,1.01639127537,1.44646897849,1.78432174263,0.15,0.35,1000,200,1,selective_MLE
+0.992526772626,0.991266295579,2.19256133433,1.01242596671,1.36480228762,2.19256133433,0.2,0.35,1000,200,1,selective_MLE
+0.895249457402,0.976643552483,1.80139037275,0.90408118781,1.2427738658,1.80139037275,0.25,0.35,1000,200,1,selective_MLE
+0.86628788711,0.97217454907,1.79804896466,0.860191356047,1.2211458867,1.79804896466,0.3,0.35,1000,200,1,selective_MLE
+0.760968826709,0.950472355433,1.38678502316,0.760063270144,1.05808358132,1.38678502316,0.42,0.35,1000,200,1,selective_MLE
+0.545188750369,0.853407944406,1.17719079209,0.513362787122,0.856116134157,1.17719079209,0.71,0.35,1000,200,1,selective_MLE
+0.354254840901,0.669072845661,0.828578087539,0.255188048196,0.528899193159,0.828578087539,1.22,0.35,1000,200,1,selective_MLE
+0.207900773568,0.567708010316,0.446110763277,0.0793901361815,0.285583228595,0.446110763277,2.07,0.35,1000,200,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.csv
new file mode 100644
index 000000000..371a248a9
--- /dev/null
+++ b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.csv
@@ -0,0 +1,11 @@
+Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
+1.18979142946,1.01323890467,3.24705655557,1.26747917859,2.52302676222,3.24705655557,0.05,0,1000,200,1,selective_MLE
+1.0499982218,0.994559752969,2.20315756913,1.08522590394,1.80817304281,2.20315756913,0.1,0,1000,200,1,selective_MLE
+0.986295023502,0.995273245034,1.98416436442,1.02842358859,1.65477241528,1.98416436442,0.15,0,1000,200,1,selective_MLE
+0.946348528327,0.978648208349,1.85651551225,0.957246371957,1.41201355988,1.85651551225,0.2,0,1000,200,1,selective_MLE
+0.877901846227,0.963936970096,1.35427531353,0.892956430716,1.24760051675,1.35427531353,0.25,0,1000,200,1,selective_MLE
+0.871637370414,0.963413654406,1.27509640458,0.878049441441,1.23165619207,1.27509640458,0.3,0,1000,200,1,selective_MLE
+0.739251951337,0.902483553335,1.46313049815,0.739133721282,1.07196731339,1.46313049815,0.42,0,1000,200,1,selective_MLE
+0.56972376987,0.836538976592,1.07824235978,0.532687510942,0.899318445422,1.07824235978,0.71,0,1000,200,1,selective_MLE
+0.37145714765,0.711660402878,0.727206377914,0.27830772286,0.581515000657,0.727206377914,1.22,0,1000,200,1,selective_MLE
+0.20332446773,0.556557304432,0.347558277288,0.0790857133544,0.266649181037,0.347558277288,2.07,0,1000,200,1,selective_MLE

From d32de016428ab16a6f456c0ff94851bd2d5e39cb Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 20 Apr 2018 13:37:59 -0700
Subject: [PATCH 593/617] adding log posterior parametrized using sel_MLE in
 query

---
 selection/SLOPE/slope.py                      |  8 ++---
 selection/SLOPE/tests/slope_run_test.py       |  6 ++--
 .../adjusted_MLE/tests/test_risk_coverage.py  |  2 +-
 selection/randomized/query.py                 | 29 +++++++++++++++++++
 4 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index 47a0fa40d..a2f73d0b5 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -129,14 +129,16 @@ def fit(self,
         cov, prec = self.randomizer.cov_prec
         opt_linear, opt_offset = self.opt_transform
 
-        print("check if correct", np.allclose(-X.T.dot(y-X_clustered.dot(initial_scalings))
-                                              +self.initial_subgrad,self._initial_omega, rtol=1e-05, atol=1e-08))
+        print("check if correct", np.allclose(self.observed_score_state + opt_offset + opt_linear.dot(initial_scalings),
+                                              self._initial_omega, rtol=1e-05, atol=1e-08))
 
         cond_precision = opt_linear.T.dot(opt_linear) * prec
         cond_cov = np.linalg.inv(cond_precision)
         logdens_linear = cond_cov.dot(opt_linear.T) * prec
         cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
 
+        logdens_transform = (logdens_linear, opt_offset)
+
         def log_density(logdens_linear, offset, cond_prec, score, opt):
             if score.ndim == 1:
                 mean_term = logdens_linear.dot(score.T + offset).T
@@ -164,8 +166,6 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                  mean=cond_mean,
                                  covariance=cond_cov)
 
-        logdens_transform = (logdens_linear, opt_offset)
-
         self.sampler = affine_gaussian_sampler(affine_con,
                                                self.observed_opt_state,
                                                self.observed_score_state,
diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/SLOPE/tests/slope_run_test.py
index 0e52738e9..55257be94 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/SLOPE/tests/slope_run_test.py
@@ -159,7 +159,7 @@ def test0_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
     X_clustered = X[:, indices].dot(signs_cluster)
     print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
 
-def test_randomized_slope(n=500, p=100, signal_fac=1.5, s=5, sigma=3., rho=0.35, randomizer_scale= np.sqrt(0.25)):
+def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0., randomizer_scale= np.sqrt(0.5)):
 
     while True:
         inst = gaussian_instance
@@ -178,7 +178,7 @@ def test_randomized_slope(n=500, p=100, signal_fac=1.5, s=5, sigma=3., rho=0.35,
                                                           Y,
                                                           W=None,
                                                           normalize=True,
-                                                          choice_weights="gaussian",
+                                                          choice_weights="bhq", #put gaussian
                                                           sigma=sigma_)
 
         conv = randomized_slope.gaussian(X,
@@ -203,7 +203,7 @@ def test_randomized_slope(n=500, p=100, signal_fac=1.5, s=5, sigma=3., rho=0.35,
 def main(nsim=100):
 
     P0, PA, cover, length_int = [], [], [], []
-    from statsmodels.distributions import ECDF
+    #from statsmodels.distributions import ECDF
 
     for i in range(nsim):
         p0, pA, cover_, intervals = test_randomized_slope()
diff --git a/selection/adjusted_MLE/tests/test_risk_coverage.py b/selection/adjusted_MLE/tests/test_risk_coverage.py
index ed2b84c5d..21c1134e0 100644
--- a/selection/adjusted_MLE/tests/test_risk_coverage.py
+++ b/selection/adjusted_MLE/tests/test_risk_coverage.py
@@ -140,4 +140,4 @@ def write_ouput(outpath, n=500, p=100, rho=0.35, s=5, beta_type=1, target="selec
     df_risk.to_csv(outfile_risk, index=False)
 
 write_ouput("/Users/snigdhapanigrahi/adjusted_MLE/results", n=200, p=1000, rho=0, s=10, beta_type=1,
-            target="full", tuning = "randomized_LASSO", randomizing_scale= np.sqrt(0.25), ndraw = 50)
+            target="full", tuning = "selective_MLE", randomizing_scale= np.sqrt(0.25), ndraw = 50)
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 3bc6472f5..df4030e84 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -518,6 +518,35 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
 
         return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator
 
+    def log_posterior(self, theta, observed_target, cov_target, cov_target_score, feasible_point, solve_args={}):
+
+        prec_target = np.linalg.inv(cov_target)
+        ndim = prec_target.shape[0]
+        logdens_lin, logdens_off = self.logdens_transform
+        target_lin = - logdens_lin.dot(cov_target_score.T.dot(prec_target))
+        target_offset = self.affine_con.mean - target_lin.dot(observed_target)
+
+        cov_opt = self.affine_con.covariance
+        prec_opt = np.linalg.inv(cov_opt)
+
+        mean_param = target_lin.dot(theta)+target_offset
+        conjugate_arg = prec_opt.dot(mean_param)
+        init_soln = feasible_point
+        val, soln, hess = solve_barrier_nonneg(conjugate_arg,
+                                               prec_opt,
+                                               init_soln,
+                                               **solve_args)
+
+        inter_map = cov_target.dot(target_lin.T.dot(prec_opt))
+        param_map = theta + inter_map.dot(mean_param - soln)
+        log_normalizer_map = (theta.T.dot(prec_target + target_lin.T.dot(prec_opt).dot(target_lin)).dot(theta))/2. \
+                             - theta.T.dot(target_lin.T).prec_opt.dot(soln) - target_offset.T.dot(prec_opt).dot(target_offset)/2. \
+                             + val
+
+        jacobian_map = (np.identity(ndim)+ inter_map.dot(target_lin))- inter_map.dot(hess).dot(prec_opt).dot(target_lin)
+
+        return param_map, log_normalizer_map, jacobian_map
+
 class optimization_intervals(object):
 
     def __init__(self,

From 4b54c4a80ab9be8fb469a0b8ee7dbfafa985d76b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 20 Apr 2018 14:41:42 -0700
Subject: [PATCH 594/617] added a missing term

---
 selection/adjusted_MLE/tests/test_inferential_metrics.py | 2 +-
 selection/randomized/query.py                            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 58749a3e7..ffac8d21e 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -485,7 +485,7 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
     ndraw = 1
     output_overall = np.zeros(27)
 
-    target = "selected"
+    target = "full"
     tuning = "selective_MLE"
     n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.30
 
diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index df4030e84..bbba9a535 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -541,9 +541,9 @@ def log_posterior(self, theta, observed_target, cov_target, cov_target_score, fe
         param_map = theta + inter_map.dot(mean_param - soln)
         log_normalizer_map = (theta.T.dot(prec_target + target_lin.T.dot(prec_opt).dot(target_lin)).dot(theta))/2. \
                              - theta.T.dot(target_lin.T).prec_opt.dot(soln) - target_offset.T.dot(prec_opt).dot(target_offset)/2. \
-                             + val
+                             + val - (param_map.T.dot(prec_target).param_map)/2.
 
-        jacobian_map = (np.identity(ndim)+ inter_map.dot(target_lin))- inter_map.dot(hess).dot(prec_opt).dot(target_lin)
+        jacobian_map = (np.identity(ndim)+ inter_map.dot(target_lin)) - inter_map.dot(hess).dot(prec_opt.dot(target_lin))
 
         return param_map, log_normalizer_map, jacobian_map
 

From 1d430c87ed4024b433ff699e6b2fb3e3090e0642 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 20 Apr 2018 14:45:01 -0700
Subject: [PATCH 595/617] changed name of func

---
 selection/randomized/query.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index bbba9a535..0e9433c92 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -518,7 +518,7 @@ def selective_MLE(self, observed_target, cov_target, cov_target_score, feasible_
 
         return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator
 
-    def log_posterior(self, theta, observed_target, cov_target, cov_target_score, feasible_point, solve_args={}):
+    def reparam_map(self, theta, observed_target, cov_target, cov_target_score, feasible_point, solve_args={}):
 
         prec_target = np.linalg.inv(cov_target)
         ndim = prec_target.shape[0]

From 04fa473e0897729ae0d50740a22be50e1d557c17 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Mon, 23 Apr 2018 10:52:38 -0400
Subject: [PATCH 596/617] created class for ms that can be used for BH

---
 selection/randomized/marginal_screening.py | 273 +++++++++++++++++++++
 1 file changed, 273 insertions(+)
 create mode 100644 selection/randomized/marginal_screening.py

diff --git a/selection/randomized/marginal_screening.py b/selection/randomized/marginal_screening.py
new file mode 100644
index 000000000..7fa67886d
--- /dev/null
+++ b/selection/randomized/marginal_screening.py
@@ -0,0 +1,273 @@
+from __future__ import print_function
+import functools
+import numpy as np
+from selection.randomized.randomization import randomization
+import regreg.api as rr
+from selection.randomized.base import restricted_estimator
+from selection.constraints.affine import constraints
+from selection.randomized.query import (query,
+                                        multiple_queries,
+                                        langevin_sampler,
+                                        affine_gaussian_sampler)
+
+class marginal_screening():
+
+    def __init__(self,
+                 observed_score,
+                 threshold,
+                 randomizer_scale,
+                 perturb=None):
+
+        self.nfeature =  p = score.shape[0]
+        if np.asarray(threshold).shape == ():
+            threshold = np.ones(p) * threshold
+        self.threshold = np.asarray(threshold)
+
+        self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
+        self._initial_omega = perturb
+        self.observed_score = observed_score
+
+    def fit(self, perturb=None):
+
+        p = self.nfeature
+
+        # take a new perturbation if supplied
+        if perturb is not None:
+            self._initial_omega = perturb
+        if self._initial_omega is None:
+            self._initial_omega = self.randomizer.sample()
+
+        randomized_score = self.observed_score + self._initial_omega
+
+        self.boundary = np.fabs(randomized_score) > self.threshold
+        self.interior = ~self.boundary
+        active_signs = np.sign(randomized_score[self.boundary])
+
+        self.observed_opt_state = self._initial_omega[self.boundary] + self.observed_score[self.boundary] - \
+                                  np.diag(active_signs)* self.threshold[self.boundary]
+        self.num_opt_var = self.observed_opt_state.shape[0]
+
+        opt_linear = np.zeros((p, self.num_opt_var))
+        opt_linear[self.boundary, :] = np.diag(active_signs)
+        opt_offset = np.zeros(p)
+        opt_offset[self.boundary] = active_signs * self.threshold[self.boundary]
+        opt_offset[self.interior] = self._initial_omega[self.interior] + self.observed_score[self.interior]
+        self.opt_transform = (opt_linear, opt_offset)
+
+        cov, prec = self.randomizer.cov_prec
+        cond_precision = opt_linear.T.dot(opt_linear) * prec
+        cond_cov = np.linalg.inv(cond_precision)
+        logdens_linear = cond_cov.dot(opt_linear.T) * prec
+        cond_mean = -logdens_linear.dot(self.observed_score + opt_offset)
+
+        logdens_transform = (logdens_linear, opt_offset)
+        A_scaling = -np.identity(self.num_opt_var)
+        b_scaling = np.zeros(self.num_opt_var)
+
+        def log_density(logdens_linear, offset, cond_prec, score, opt):
+            if score.ndim == 1:
+                mean_term = logdens_linear.dot(score.T + offset).T
+            else:
+                mean_term = logdens_linear.dot(score.T + offset[:, None]).T
+            arg = opt + mean_term
+            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
+        log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
+
+        affine_con = constraints(A_scaling,
+                                 b_scaling,
+                                 mean=cond_mean,
+                                 covariance=cond_cov)
+
+        self.sampler = affine_gaussian_sampler(affine_con,
+                                               self.observed_opt_state,
+                                               self.observed_score,
+                                               log_density,
+                                               logdens_transform,
+                                               selection_info=self.selection_variable)
+        return active_signs
+
+
+    def selective_MLE(self,
+                      target="selected",
+                      features=None,
+                      parameter=None,
+                      level=0.9,
+                      compute_intervals=False,
+                      dispersion=None,
+                      solve_args={'tol': 1.e-12}):
+        """
+        Parameters
+        ----------
+        target : one of ['selected', 'full']
+        features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+        level : float
+            Confidence level.
+        ndraw : int (optional)
+            Defaults to 1000.
+        burnin : int (optional)
+            Defaults to 1000.
+        compute_intervals : bool
+            Compute confidence intervals?
+        dispersion : float (optional)
+            Use a known value for dispersion, or Pearson's X^2?
+        """
+
+        if parameter is None:
+            parameter = np.zeros(self.loglike.shape[0])
+
+        if target == 'selected':
+            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
+                                                                                                dispersion=dispersion)
+
+        elif target == 'full':
+            X, y = self.loglike.data
+            n, p = X.shape
+            if n > p:
+                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features,
+                                                                                                dispersion=dispersion)
+            else:
+                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features,
+                                                                                                    dispersion=dispersion)
+
+
+        return self.sampler.selective_MLE(observed_target,
+                                          cov_target,
+                                          cov_target_score,
+                                          self.observed_opt_state,
+                                          solve_args=solve_args)
+
+    def selected_targets(self, features=None, dispersion=None):
+
+        X, y = self.loglike.data
+        n, p = X.shape
+
+        if features is None:
+            active = self._active
+            unpenalized = self._unpenalized
+            noverall = active.sum() + unpenalized.sum()
+            overall = active + unpenalized
+
+            score_linear = self.score_transform[0]
+            Q = -score_linear[overall]
+            cov_target = np.linalg.inv(Q)
+            observed_target = self._beta_full[overall]
+            crosscov_target_score = score_linear.dot(cov_target)
+            Xfeat = X[:, overall]
+            alternatives = [{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][active]] + [
+                                                                                                                       'twosided'] * unpenalized.sum()
+
+        else:
+
+            features_b = np.zeros_like(self._overall)
+            features_b[features] = True
+            features = features_b
+
+            Xfeat = X[:, features]
+            Qfeat = Xfeat.T.dot(self._W[:, None] * Xfeat)
+            Gfeat = self.loglike.smooth_objective(self.initial_soln, 'grad')[features]
+            Qfeat_inv = np.linalg.inv(Qfeat)
+            one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat)
+            cov_target = Qfeat_inv
+            _score_linear = -Xfeat.T.dot(self._W[:, None] * X).T
+            crosscov_target_score = _score_linear.dot(cov_target)
+            observed_target = one_step
+            alternatives = ['twosided'] * features.sum()
+
+        if dispersion is None:  # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(
+                Xfeat.dot(observed_target))) ** 2 / self._W).sum() / (n - Xfeat.shape[1])
+
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+
+    def full_targets(self, features=None, dispersion=None):
+
+        if features is None:
+            features = self._overall
+        features_bool = np.zeros(self._overall.shape, np.bool)
+        features_bool[features] = True
+        features = features_bool
+
+        X, y = self.loglike.data
+        n, p = X.shape
+
+        # target is one-step estimator
+
+        Qfull = X.T.dot(self._W[:, None] * X)
+        G = self.loglike.smooth_objective(self.initial_soln, 'grad')
+        Qfull_inv = np.linalg.inv(Qfull)
+        one_step = self.initial_soln - Qfull_inv.dot(G)
+        cov_target = Qfull_inv[features][:, features]
+        observed_target = one_step[features]
+        crosscov_target_score = np.zeros((p, cov_target.shape[0]))
+        crosscov_target_score[features] = -np.identity(cov_target.shape[0])
+
+        if dispersion is None:  # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step))) ** 2 / self._W).sum() / (
+            n - p)
+
+        alternatives = ['twosided'] * features.sum()
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+
+    def debiased_targets(self,
+                         features=None,
+                         dispersion=None,
+                         debiasing_args={}):
+
+        if features is None:
+            features = self._overall
+        features_bool = np.zeros(self._overall.shape, np.bool)
+        features_bool[features] = True
+        features = features_bool
+
+        X, y = self.loglike.data
+        n, p = X.shape
+
+        # target is one-step estimator
+
+        G = self.loglike.smooth_objective(self.initial_soln, 'grad')
+        Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(self._W)[:, None],
+                                                  np.nonzero(features)[0],
+                                                  **debiasing_args)) / n
+        observed_target = self.initial_soln[features] - Qinv_hat.dot(G)
+        if p > n:
+            M1 = Qinv_hat.dot(X.T)
+            cov_target = (M1 * self._W[None, :]).dot(M1.T)
+            crosscov_target_score = -(M1 * self._W[None, :]).dot(X).T
+        else:
+            Qfull = X.T.dot(self._W[:, None] * X)
+            cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T))
+            crosscov_target_score = -Qinv_hat.dot(Qfull).T
+
+        if dispersion is None:  # use Pearson's X^2
+            Xfeat = X[:, features]
+            Qrelax = Xfeat.T.dot(self._W[:, None] * Xfeat)
+            relaxed_soln = self.initial_soln[features] - np.linalg.inv(Qrelax).dot(G[features])
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(
+                Xfeat.dot(relaxed_soln))) ** 2 / self._W).sum() / (n - features.sum())
+
+        alternatives = ['twosided'] * features.sum()
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+
+    @staticmethod
+    def gaussian(X,
+                 Y,
+                 threshold,
+                 sigma=1.,
+                 randomizer_scale=None):
+
+        n, p = X.shape
+        mean_diag = np.mean((X ** 2).sum(0))
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
+
+        return marginal_screening(-X.dot(Y), threshold, randomizer_scale)
+
+
+
+

From 3aff7af87b457203d7fca16565512c21c291a04d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Apr 2018 13:59:33 -0700
Subject: [PATCH 597/617] regreg a requirement

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 280ef2764..c00bee95b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,5 +6,5 @@ mpmath
 pyinter
 statsmodels
 sklearn
-pyinter
+regreg
 rpy2

From bc3c1d2574a8b83a3f09e5934546c4fab898d25e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Apr 2018 14:29:18 -0700
Subject: [PATCH 598/617] deleting output

---
 .../metrics_beta_type1_full_rho_0.35.csv      | 41 -------------------
 .../metrics_beta_type1_full_rho_0.7.csv       | 41 -------------------
 .../output/metrics_beta_type1_full_rho_0.csv  | 41 -------------------
 .../metrics_beta_type1_selected_rho_0.35.csv  | 41 -------------------
 .../metrics_beta_type1_selected_rho_0.7.csv   | 41 -------------------
 .../metrics_beta_type1_selected_rho_0.csv     | 41 -------------------
 .../metrics_high_beta_type1_full_rho_0.35.csv | 41 -------------------
 ...rics_high_beta_type1_full_rho_0.35_tRL.csv | 41 -------------------
 ...trics_high_beta_type1_full_rho_0.7_tRL.csv | 41 -------------------
 .../metrics_high_beta_type1_full_rho_0.csv    | 41 -------------------
 ...metrics_high_beta_type1_full_rho_0_tRL.csv | 41 -------------------
 .../output/risk_beta_type1_full_rho_0.35.csv  | 11 -----
 .../output/risk_beta_type1_full_rho_0.7.csv   | 11 -----
 .../output/risk_beta_type1_full_rho_0.csv     | 11 -----
 .../risk_beta_type1_selected_rho_0.35.csv     | 11 -----
 .../risk_beta_type1_selected_rho_0.7.csv      | 11 -----
 .../output/risk_beta_type1_selected_rho_0.csv | 11 -----
 .../risk_high_beta_type1_full_rho_0.35.csv    | 11 -----
 ...risk_high_beta_type1_full_rho_0.35_tRL.csv | 11 -----
 .../risk_high_beta_type1_full_rho_0.7_tRL.csv | 11 -----
 .../risk_high_beta_type1_full_rho_0.csv       | 11 -----
 .../risk_high_beta_type1_full_rho_0_tRL.csv   | 11 -----
 22 files changed, 572 deletions(-)
 delete mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.35.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.7.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.35.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.7.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35_tRL.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.7_tRL.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.csv
 delete mode 100644 selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0_tRL.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.35.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.7.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.35.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.7.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35_tRL.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.7_tRL.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.csv
 delete mode 100644 selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0_tRL.csv

diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.35.csv b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.35.csv
deleted file mode 100644
index 709cab5b5..000000000
--- a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.35.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0.35,0.931461038961,0.1,2.41544824566,Selective MLE,6.16,0.58,0.152,0.088,100,1.19947480531,500,1,selective_MLE
-0.05,0.35,0.0,0.0,0.0,Randomized LASSO,6.16,0.0,0.0,0.0,100,0.838899806485,500,1,selective_MLE
-0.05,0.35,0.884926599127,0.05425,inf,Lee,17.12,0.78,0.188,0.072,100,0.724816854623,500,1,selective_MLE
-0.05,0.35,0.65701749871,0.38,1.55605689956,Naive,19.96,0.88,0.552,0.0,100,0.724816854623,500,1,selective_MLE
-0.1,0.35,0.923951051872,0.0723333333333,1.60286679569,Selective MLE,6.68,2.02,0.464,0.372,100,0.617980303537,500,1,selective_MLE
-0.1,0.35,0.0,0.0,0.0,Randomized LASSO,6.68,0.0,0.0,0.0,100,0.623250677108,500,1,selective_MLE
-0.1,0.35,0.829751327499,0.136298427063,inf,Lee,19.16,2.74,0.488,0.316,100,0.385265083675,500,1,selective_MLE
-0.1,0.35,0.674292607555,0.32,1.0957955719,Naive,22.72,0.62,0.848,0.0,100,0.385265083675,500,1,selective_MLE
-0.15,0.35,0.919792596293,0.0436666666667,1.24993790514,Selective MLE,7.34,3.34,0.704,0.64,100,0.375999447603,500,1,selective_MLE
-0.15,0.35,0.0,0.0,0.0,Randomized LASSO,7.34,0.0,0.0,0.0,100,0.542201834918,500,1,selective_MLE
-0.15,0.35,0.860987230522,0.0820341880342,inf,Lee,20.62,3.34,0.656,0.54,100,0.270390483342,500,1,selective_MLE
-0.15,0.35,0.652876573256,0.34,0.893430986125,Naive,23.84,0.68,0.952,0.0,100,0.270390483342,500,1,selective_MLE
-0.2,0.35,0.926208791209,0.013,1.07711888638,Selective MLE,7.52,4.06,0.86,0.8,100,0.222436708189,500,1,selective_MLE
-0.2,0.35,0.0,0.0,0.0,Randomized LASSO,7.52,0.0,0.0,0.0,100,0.446913741016,500,1,selective_MLE
-0.2,0.35,0.832607143904,0.0939413919414,inf,Lee,21.2,3.5,0.648,0.572,100,0.217031859955,500,1,selective_MLE
-0.2,0.35,0.656193739552,0.34,0.778513197816,Naive,23.04,0.58,0.984,0.0,100,0.217031859955,500,1,selective_MLE
-0.25,0.35,0.896191475191,0.028,0.954438262285,Selective MLE,7.96,4.76,0.92,0.92,100,0.136180132365,500,1,selective_MLE
-0.25,0.35,0.0,0.0,0.0,Randomized LASSO,7.96,0.0,0.0,0.0,100,0.369746575113,500,1,selective_MLE
-0.25,0.35,0.867119718639,0.126863636364,inf,Lee,22.62,3.7,0.692,0.528,100,0.183191135704,500,1,selective_MLE
-0.25,0.35,0.673454163252,0.36,0.70260871614,Naive,24.7,0.72,0.996,0.0,100,0.183191135704,500,1,selective_MLE
-0.3,0.35,0.922422355422,0.018,0.846864516823,Selective MLE,7.12,4.78,0.944,0.936,100,0.124306493466,500,1,selective_MLE
-0.3,0.35,0.0,0.0,0.0,Randomized LASSO,7.12,0.0,0.0,0.0,100,0.370077049834,500,1,selective_MLE
-0.3,0.35,0.900479439176,0.059,inf,Lee,22.32,3.32,0.668,0.604,100,0.139899752608,500,1,selective_MLE
-0.3,0.35,0.653521031881,0.44,0.639842749189,Naive,25.14,0.96,1.0,0.0,100,0.139899752608,500,1,selective_MLE
-0.42,0.35,0.89451037851,0.0233333333333,0.695195505914,Selective MLE,6.82,5.12,0.996,0.996,100,0.067374298508,500,1,selective_MLE
-0.42,0.35,0.0,0.0,0.0,Randomized LASSO,6.82,0.0,0.0,0.0,100,0.310468898242,500,1,selective_MLE
-0.42,0.35,0.866246270431,0.131911255411,inf,Lee,21.74,4.34,0.776,0.704,100,0.101985001419,500,1,selective_MLE
-0.42,0.35,0.645621038488,0.32,0.535115175216,Naive,23.98,0.68,1.0,0.0,100,0.101985001419,500,1,selective_MLE
-0.71,0.35,0.915206349206,0.00666666666667,0.517475359883,Selective MLE,6.68,5.04,1.0,1.0,100,0.0317729502039,500,1,selective_MLE
-0.71,0.35,0.0,0.0,0.0,Randomized LASSO,6.68,0.0,0.0,0.0,100,0.218910141131,500,1,selective_MLE
-0.71,0.35,0.841226328389,0.153599439776,inf,Lee,22.34,5.2,0.844,0.716,100,0.0569139003612,500,1,selective_MLE
-0.71,0.35,0.662128719316,0.46,0.411939807863,Naive,25.74,0.88,1.0,0.0,100,0.0569139003612,500,1,selective_MLE
-1.22,0.35,0.896861111111,0.00333333333333,0.399786803636,Selective MLE,6.52,5.02,1.0,1.0,100,0.0176700251849,500,1,selective_MLE
-1.22,0.35,0.0,0.0,0.0,Randomized LASSO,6.52,0.0,0.0,0.0,100,0.182617145112,500,1,selective_MLE
-1.22,0.35,0.877158606178,0.072,inf,Lee,22.22,4.54,0.868,0.812,100,0.0329382817335,500,1,selective_MLE
-1.22,0.35,0.683593512131,0.26,0.321334855624,Naive,25.94,0.7,1.0,0.0,100,0.0329382817335,500,1,selective_MLE
-2.07,0.35,0.883165223665,0.0157142857143,0.301333150726,Selective MLE,6.24,5.1,1.0,1.0,100,0.0116313177681,500,1,selective_MLE
-2.07,0.35,0.0,0.0,0.0,Randomized LASSO,6.24,0.0,0.0,0.0,100,0.100893025098,500,1,selective_MLE
-2.07,0.35,0.881958794089,0.101575091575,inf,Lee,19.76,5.28,0.932,0.9,100,0.0207267202668,500,1,selective_MLE
-2.07,0.35,0.626224030054,0.42,0.242265511428,Naive,23.18,1.08,1.0,0.0,100,0.0207267202668,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.7.csv b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.7.csv
deleted file mode 100644
index b9ea473ce..000000000
--- a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.7.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0.7,0.905238095238,0.04,2.95333681974,Selective MLE,3.72,0.3,0.112,0.048,100,1.11864047232,500,1,selective_MLE
-0.05,0.7,0.0,0.0,0.0,Randomized LASSO,3.72,0.0,0.0,0.0,100,0.826745258299,500,1,selective_MLE
-0.05,0.7,0.896339366858,0.0583333333333,inf,Lee,16.06,0.48,0.16,0.064,100,0.606481746444,500,1,selective_MLE
-0.05,0.7,0.718009953293,0.36,1.80323034055,Naive,18.22,0.78,0.38,0.0,100,0.606481746444,500,1,selective_MLE
-0.1,0.7,0.897138167388,0.0666666666667,2.03805744419,Selective MLE,5.58,1.24,0.328,0.228,100,0.812188963578,500,1,selective_MLE
-0.1,0.7,0.0,0.0,0.0,Randomized LASSO,5.58,0.0,0.0,0.0,100,0.700295664431,500,1,selective_MLE
-0.1,0.7,0.834135047629,0.109545454545,inf,Lee,19.6,1.5,0.356,0.18,100,0.398650296901,500,1,selective_MLE
-0.1,0.7,0.724421219274,0.34,1.2924447882,Naive,21.66,0.4,0.652,0.0,100,0.398650296901,500,1,selective_MLE
-0.15,0.7,0.869679172679,0.0613333333333,1.64987078154,Selective MLE,7.48,1.82,0.48,0.332,100,0.591789402777,500,1,selective_MLE
-0.15,0.7,0.0,0.0,0.0,Randomized LASSO,7.48,0.0,0.0,0.0,100,0.586732001573,500,1,selective_MLE
-0.15,0.7,0.871529817256,0.113658730159,inf,Lee,21.84,2.18,0.452,0.308,100,0.266817960717,500,1,selective_MLE
-0.15,0.7,0.735953965022,0.32,1.08356718193,Naive,23.92,0.56,0.76,0.0,100,0.266817960717,500,1,selective_MLE
-0.2,0.7,0.851695443445,0.061380952381,1.39842783719,Selective MLE,7.5,2.96,0.624,0.544,100,0.40776192466,500,1,selective_MLE
-0.2,0.7,0.0,0.0,0.0,Randomized LASSO,7.5,0.0,0.0,0.0,100,0.487626752228,500,1,selective_MLE
-0.2,0.7,0.867552980668,0.0930555555556,inf,Lee,20.28,2.18,0.552,0.336,100,0.207599545724,500,1,selective_MLE
-0.2,0.7,0.692427739069,0.34,0.925488873517,Naive,22.3,0.5,0.82,0.0,100,0.207599545724,500,1,selective_MLE
-0.25,0.7,0.895587313014,0.0733333333333,1.27619828265,Selective MLE,8.16,3.38,0.728,0.62,100,0.300554430254,500,1,selective_MLE
-0.25,0.7,0.0,0.0,0.0,Randomized LASSO,8.16,0.0,0.0,0.0,100,0.451547708341,500,1,selective_MLE
-0.25,0.7,0.875634221242,0.115936507937,inf,Lee,21.28,3.02,0.576,0.428,100,0.178457205606,500,1,selective_MLE
-0.25,0.7,0.726470926607,0.38,0.841723670385,Naive,23.24,0.7,0.9,0.0,100,0.178457205606,500,1,selective_MLE
-0.3,0.7,0.88966045066,0.0506666666667,1.12991162944,Selective MLE,7.08,4.08,0.808,0.768,100,0.239662294933,500,1,selective_MLE
-0.3,0.7,0.0,0.0,0.0,Randomized LASSO,7.08,0.0,0.0,0.0,100,0.417466476111,500,1,selective_MLE
-0.3,0.7,0.898605992125,0.118976190476,inf,Lee,20.38,3.12,0.628,0.528,100,0.142653661284,500,1,selective_MLE
-0.3,0.7,0.714628649891,0.46,0.754701079716,Naive,22.32,0.64,0.96,0.0,100,0.142653661284,500,1,selective_MLE
-0.42,0.7,0.898163780664,0.02,0.952282599856,Selective MLE,7.3,4.78,0.948,0.932,100,0.135011251127,500,1,selective_MLE
-0.42,0.7,0.0,0.0,0.0,Randomized LASSO,7.3,0.0,0.0,0.0,100,0.343633849642,500,1,selective_MLE
-0.42,0.7,0.862383839929,0.113658730159,inf,Lee,22.16,3.32,0.692,0.54,100,0.100564129182,500,1,selective_MLE
-0.42,0.7,0.728642923069,0.42,0.645102579648,Naive,24.06,0.68,0.98,0.0,100,0.100564129182,500,1,selective_MLE
-0.71,0.7,0.905436507937,0.022380952381,0.725954560251,Selective MLE,6.62,5.08,0.988,0.988,100,0.0660453156033,500,1,selective_MLE
-0.71,0.7,0.0,0.0,0.0,Randomized LASSO,6.62,0.0,0.0,0.0,100,0.325589733329,500,1,selective_MLE
-0.71,0.7,0.879464321309,0.0939285714286,inf,Lee,20.46,4.26,0.816,0.744,100,0.0622398248064,500,1,selective_MLE
-0.71,0.7,0.706791161013,0.38,0.498224619244,Naive,23.26,0.8,1.0,0.0,100,0.0622398248064,500,1,selective_MLE
-1.22,0.7,0.897117604618,0.0233333333333,0.553150093591,Selective MLE,6.66,5.14,1.0,1.0,100,0.0314691475029,500,1,selective_MLE
-1.22,0.7,0.0,0.0,0.0,Randomized LASSO,6.66,0.0,0.0,0.0,100,0.20922378322,500,1,selective_MLE
-1.22,0.7,0.84938062082,0.11780952381,inf,Lee,22.3,4.78,0.832,0.764,100,0.034510480008,500,1,selective_MLE
-1.22,0.7,0.734174716546,0.38,0.384944868613,Naive,25.12,0.64,1.0,0.0,100,0.034510480008,500,1,selective_MLE
-2.07,0.7,0.895259018759,0.0233333333333,0.41944806981,Selective MLE,6.62,5.14,1.0,1.0,100,0.0178486248352,500,1,selective_MLE
-2.07,0.7,0.0,0.0,0.0,Randomized LASSO,6.62,0.0,0.0,0.0,100,0.115974002994,500,1,selective_MLE
-2.07,0.7,0.853498348449,0.117346153846,inf,Lee,22.68,4.68,0.82,0.772,100,0.0205041933808,500,1,selective_MLE
-2.07,0.7,0.753284561051,0.34,0.296225025241,Naive,24.9,0.66,1.0,0.0,100,0.0205041933808,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.csv b/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.csv
deleted file mode 100644
index 8bbf349b8..000000000
--- a/selection/adjusted_MLE/output/metrics_beta_type1_full_rho_0.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0,0.937457042957,0.02,2.43870954381,Selective MLE,4.72,0.56,0.192,0.1,100,1.02796717205,500,1,selective_MLE
-0.05,0,0.0,0.0,0.0,Randomized LASSO,4.72,0.0,0.0,0.0,100,0.820946505923,500,1,selective_MLE
-0.05,0,0.902203680618,0.035,inf,Lee,15.54,0.94,0.276,0.132,100,0.652411550711,500,1,selective_MLE
-0.05,0,0.565987015067,0.5,1.50601151103,Naive,18.58,1.34,0.7,0.0,100,0.652411550711,500,1,selective_MLE
-0.1,0,0.926107992008,0.0416666666667,1.60063013697,Selective MLE,8.68,2.0,0.48,0.38,100,0.682772681521,500,1,selective_MLE
-0.1,0,0.0,0.0,0.0,Randomized LASSO,8.68,0.0,0.0,0.0,100,0.615859220351,500,1,selective_MLE
-0.1,0,0.788245175539,0.166719169719,inf,Lee,20.36,3.2,0.556,0.372,100,0.418810019872,500,1,selective_MLE
-0.1,0,0.593770391156,0.48,1.06776996874,Naive,24.36,1.22,0.912,0.0,100,0.418810019872,500,1,selective_MLE
-0.15,0,0.938626762127,0.004,1.22111486797,Selective MLE,7.28,3.22,0.708,0.64,100,0.325984583304,500,1,selective_MLE
-0.15,0,0.0,0.0,0.0,Randomized LASSO,7.28,0.0,0.0,0.0,100,0.5151162648,500,1,selective_MLE
-0.15,0,0.873978371044,0.0903992673993,inf,Lee,21.74,2.86,0.644,0.464,100,0.280431627709,500,1,selective_MLE
-0.15,0,0.631333350474,0.38,0.873398104552,Naive,25.08,0.92,0.964,0.0,100,0.280431627709,500,1,selective_MLE
-0.2,0,0.891768897769,0.0206666666667,1.0338155556,Selective MLE,8.28,4.32,0.872,0.844,100,0.215462021939,500,1,selective_MLE
-0.2,0,0.0,0.0,0.0,Randomized LASSO,8.28,0.0,0.0,0.0,100,0.401905491611,500,1,selective_MLE
-0.2,0,0.861183444566,0.0970952380952,inf,Lee,23.46,3.48,0.74,0.536,100,0.214846497925,500,1,selective_MLE
-0.2,0,0.630855949609,0.34,0.759580774553,Naive,26.48,0.86,0.992,0.0,100,0.214846497925,500,1,selective_MLE
-0.25,0,0.905975468975,0.024,0.899819168512,Selective MLE,7.42,4.54,0.9,0.88,100,0.174473785317,500,1,selective_MLE
-0.25,0,0.0,0.0,0.0,Randomized LASSO,7.42,0.0,0.0,0.0,100,0.421809411384,500,1,selective_MLE
-0.25,0,0.864400247066,0.125833333333,inf,Lee,21.38,4.28,0.764,0.668,100,0.182037721298,500,1,selective_MLE
-0.25,0,0.608578806998,0.48,0.676868448936,Naive,24.06,1.3,0.996,0.0,100,0.182037721298,500,1,selective_MLE
-0.3,0,0.906860805861,0.0197142857143,0.791999074151,Selective MLE,7.0,4.94,0.964,0.964,100,0.118313600765,500,1,selective_MLE
-0.3,0,0.0,0.0,0.0,Randomized LASSO,7.0,0.0,0.0,0.0,100,0.333848112123,500,1,selective_MLE
-0.3,0,0.883543995909,0.0591904761905,inf,Lee,20.82,3.72,0.736,0.656,100,0.150299675758,500,1,selective_MLE
-0.3,0,0.615124498408,0.34,0.616692047402,Naive,24.16,0.8,1.0,0.0,100,0.150299675758,500,1,selective_MLE
-0.42,0,0.895063492063,0.022380952381,0.656207992641,Selective MLE,7.32,5.1,0.996,0.992,100,0.0685267959665,500,1,selective_MLE
-0.42,0,0.0,0.0,0.0,Randomized LASSO,7.32,0.0,0.0,0.0,100,0.278841228658,500,1,selective_MLE
-0.42,0,0.853230856303,0.144404761905,inf,Lee,21.96,4.54,0.8,0.72,100,0.122385160693,500,1,selective_MLE
-0.42,0,0.597283994482,0.44,0.52081007883,Naive,25.86,1.3,1.0,0.0,100,0.122385160693,500,1,selective_MLE
-0.71,0,0.895963768116,0.01,0.489990645513,Selective MLE,6.5,5.06,1.0,1.0,100,0.0302118943543,500,1,selective_MLE
-0.71,0,0.0,0.0,0.0,Randomized LASSO,6.5,0.0,0.0,0.0,100,0.200842080649,500,1,selective_MLE
-0.71,0,0.840865259701,0.129703463203,inf,Lee,21.3,4.52,0.78,0.736,100,0.064742081091,500,1,selective_MLE
-0.71,0,0.605603797089,0.44,0.404439089414,Naive,24.74,0.98,1.0,0.0,100,0.064742081091,500,1,selective_MLE
-1.22,0,0.878015151515,0.0,0.368012101716,Selective MLE,6.48,5.0,1.0,1.0,100,0.0178112548381,500,1,selective_MLE
-1.22,0,0.0,0.0,0.0,Randomized LASSO,6.48,0.0,0.0,0.0,100,0.153741474347,500,1,selective_MLE
-1.22,0,0.887908101558,0.0727619047619,inf,Lee,22.08,5.1,0.916,0.872,100,0.0355829221315,500,1,selective_MLE
-1.22,0,0.600077278822,0.44,0.305925814842,Naive,24.76,1.22,1.0,0.0,100,0.0355829221315,500,1,selective_MLE
-2.07,0,0.884706349206,0.0114285714286,0.27689442939,Selective MLE,6.18,5.08,1.0,1.0,100,0.0105093060895,500,1,selective_MLE
-2.07,0,0.0,0.0,0.0,Randomized LASSO,6.18,0.0,0.0,0.0,100,0.0905511133875,500,1,selective_MLE
-2.07,0,0.856255336237,0.12569047619,inf,Lee,21.82,5.48,0.94,0.904,100,0.0192982775325,500,1,selective_MLE
-2.07,0,0.611937525472,0.42,0.234382449577,Naive,25.3,0.84,1.0,0.0,100,0.0192982775325,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.35.csv b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.35.csv
deleted file mode 100644
index 37717b576..000000000
--- a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.35.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0.35,0.953446391446,0.01,2.62255933497,Selective MLE,6.74,0.3,0.128,0.056,100,1.37873397223,500,1,selective_MLE
-0.05,0.35,0.0,0.0,0.0,Randomized LASSO,6.74,0.0,0.0,0.0,100,0.85718568517,500,1,selective_MLE
-0.05,0.35,0.91206634392,0.0466666666667,inf,Lee,14.82,0.64,0.18,0.088,100,0.697798250784,500,1,selective_MLE
-0.05,0.35,0.623911071893,0.54,1.53350350149,Naive,17.72,1.18,0.6,0.0,100,0.697798250784,500,1,selective_MLE
-0.1,0.35,0.94304956155,0.038,1.71400822216,Selective MLE,7.9,1.6,0.416,0.304,100,0.72190312741,500,1,selective_MLE
-0.1,0.35,0.0,0.0,0.0,Randomized LASSO,7.9,0.0,0.0,0.0,100,0.636428859402,500,1,selective_MLE
-0.1,0.35,0.831318293013,0.107522536287,inf,Lee,19.74,2.22,0.5,0.252,100,0.419309318668,500,1,selective_MLE
-0.1,0.35,0.647898230764,0.3,1.09488163635,Naive,23.06,0.64,0.868,0.0,100,0.419309318668,500,1,selective_MLE
-0.15,0.35,0.893418470418,0.0477142857143,1.33303417535,Selective MLE,8.8,3.24,0.656,0.608,100,0.527093447425,500,1,selective_MLE
-0.15,0.35,0.0,0.0,0.0,Randomized LASSO,8.8,0.0,0.0,0.0,100,0.532820557278,500,1,selective_MLE
-0.15,0.35,0.883129892952,0.0510303030303,inf,Lee,22.82,2.46,0.532,0.392,100,0.30931592898,500,1,selective_MLE
-0.15,0.35,0.656039279891,0.4,0.904728692949,Naive,25.4,1.12,0.94,0.0,100,0.30931592898,500,1,selective_MLE
-0.2,0.35,0.904584804085,0.0482142857143,1.09913086753,Selective MLE,9.22,3.96,0.772,0.744,100,0.323355132192,500,1,selective_MLE
-0.2,0.35,0.0,0.0,0.0,Randomized LASSO,9.22,0.0,0.0,0.0,100,0.444429877595,500,1,selective_MLE
-0.2,0.35,0.881195349887,0.0685714285714,inf,Lee,21.24,3.4,0.692,0.588,100,0.246305559448,500,1,selective_MLE
-0.2,0.35,0.642143598466,0.36,0.771359441676,Naive,23.94,0.84,0.988,0.0,100,0.246305559448,500,1,selective_MLE
-0.25,0.35,0.888728485567,0.0173333333333,0.937853190268,Selective MLE,8.18,4.76,0.94,0.932,100,0.18706333101,500,1,selective_MLE
-0.25,0.35,0.0,0.0,0.0,Randomized LASSO,8.18,0.0,0.0,0.0,100,0.360765235691,500,1,selective_MLE
-0.25,0.35,0.864023356123,0.0857748917749,inf,Lee,21.66,3.42,0.704,0.584,100,0.174246008689,500,1,selective_MLE
-0.25,0.35,0.645451554632,0.38,0.699039380918,Naive,23.56,0.74,0.996,0.0,100,0.174246008689,500,1,selective_MLE
-0.3,0.35,0.900941284206,0.0166666666667,0.81640366547,Selective MLE,7.18,4.96,0.98,0.972,100,0.11590795158,500,1,selective_MLE
-0.3,0.35,0.0,0.0,0.0,Randomized LASSO,7.18,0.0,0.0,0.0,100,0.336916782573,500,1,selective_MLE
-0.3,0.35,0.910495466961,0.0765,inf,Lee,20.5,3.78,0.78,0.676,100,0.134503703797,500,1,selective_MLE
-0.3,0.35,0.651415225722,0.32,0.635206913155,Naive,23.18,0.82,1.0,0.0,100,0.134503703797,500,1,selective_MLE
-0.42,0.35,0.930399240856,0.00333333333333,0.639483506134,Selective MLE,6.84,5.02,1.0,1.0,100,0.0500593814501,500,1,selective_MLE
-0.42,0.35,0.0,0.0,0.0,Randomized LASSO,6.84,0.0,0.0,0.0,100,0.256875358635,500,1,selective_MLE
-0.42,0.35,0.832160402818,0.127043015808,inf,Lee,21.72,4.44,0.808,0.688,100,0.101018740148,500,1,selective_MLE
-0.42,0.35,0.686047173525,0.22,0.537081992933,Naive,24.7,0.68,1.0,0.0,100,0.101018740148,500,1,selective_MLE
-0.71,0.35,0.876014430014,0.01,0.480635758239,Selective MLE,6.94,5.06,1.0,1.0,100,0.0354428715806,500,1,selective_MLE
-0.71,0.35,0.0,0.0,0.0,Randomized LASSO,6.94,0.0,0.0,0.0,100,0.177950947921,500,1,selective_MLE
-0.71,0.35,0.811317398691,0.147659340659,inf,Lee,20.72,4.9,0.82,0.768,100,0.0588696020544,500,1,selective_MLE
-0.71,0.35,0.656579716621,0.38,0.412422762436,Naive,23.82,0.64,1.0,0.0,100,0.0588696020544,500,1,selective_MLE
-1.22,0.35,0.862783846872,0.00666666666667,0.357782078979,Selective MLE,6.88,5.04,1.0,1.0,100,0.0196990246932,500,1,selective_MLE
-1.22,0.35,0.0,0.0,0.0,Randomized LASSO,6.88,0.0,0.0,0.0,100,0.131259024663,500,1,selective_MLE
-1.22,0.35,0.907285507789,0.062880952381,inf,Lee,21.6,4.62,0.876,0.852,100,0.0361438615056,500,1,selective_MLE
-1.22,0.35,0.616838530693,0.42,0.312798676849,Naive,24.38,1.18,1.0,0.0,100,0.0361438615056,500,1,selective_MLE
-2.07,0.35,0.87792979243,0.00666666666667,0.263935686642,Selective MLE,6.2,5.04,1.0,1.0,100,0.0111903101344,500,1,selective_MLE
-2.07,0.35,0.0,0.0,0.0,Randomized LASSO,6.2,0.0,0.0,0.0,100,0.103825117154,500,1,selective_MLE
-2.07,0.35,0.870705509603,0.0995,inf,Lee,21.42,4.76,0.888,0.824,100,0.0227142973009,500,1,selective_MLE
-2.07,0.35,0.638173272898,0.38,0.241994303429,Naive,24.26,0.92,1.0,0.0,100,0.0227142973009,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.7.csv b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.7.csv
deleted file mode 100644
index b9f7b5d3d..000000000
--- a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.7.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0.7,0.941353246753,0.0466666666667,3.12847862728,Selective MLE,8.08,0.34,0.12,0.052,100,1.66586374221,500,1,selective_MLE
-0.05,0.7,0.0,0.0,0.0,Randomized LASSO,8.08,0.0,0.0,0.0,100,0.801798637534,500,1,selective_MLE
-0.05,0.7,0.84664048404,0.106333333333,inf,Lee,18.28,1.28,0.204,0.092,100,0.661064182407,500,1,selective_MLE
-0.05,0.7,0.759772511809,0.32,1.81220998005,Naive,20.7,0.6,0.388,0.0,100,0.661064182407,500,1,selective_MLE
-0.1,0.7,0.923905114493,0.065,2.04022769938,Selective MLE,8.38,1.48,0.384,0.26,100,0.799341484436,500,1,selective_MLE
-0.1,0.7,0.0,0.0,0.0,Randomized LASSO,8.38,0.0,0.0,0.0,100,0.562341962093,500,1,selective_MLE
-0.1,0.7,0.934468458444,0.0416666666667,inf,Lee,18.56,0.82,0.328,0.148,100,0.362204790134,500,1,selective_MLE
-0.1,0.7,0.772552814909,0.36,1.30374672061,Naive,20.16,0.62,0.6,0.0,100,0.362204790134,500,1,selective_MLE
-0.15,0.7,0.909303241203,0.0996666666667,1.61825315428,Selective MLE,9.32,2.62,0.576,0.464,100,0.480043897059,500,1,selective_MLE
-0.15,0.7,0.0,0.0,0.0,Randomized LASSO,9.32,0.0,0.0,0.0,100,0.46261866559,500,1,selective_MLE
-0.15,0.7,0.857411817184,0.0915555555556,inf,Lee,20.02,2.04,0.484,0.3,100,0.246989970283,500,1,selective_MLE
-0.15,0.7,0.746438916071,0.38,1.06442385769,Naive,22.18,0.64,0.784,0.0,100,0.246989970283,500,1,selective_MLE
-0.2,0.7,0.893055028305,0.0746666666667,1.34162708639,Selective MLE,9.2,3.46,0.7,0.632,100,0.350465323309,500,1,selective_MLE
-0.2,0.7,0.0,0.0,0.0,Randomized LASSO,9.2,0.0,0.0,0.0,100,0.399987898639,500,1,selective_MLE
-0.2,0.7,0.899794766829,0.0613333333333,inf,Lee,20.04,2.3,0.544,0.4,100,0.202248144831,500,1,selective_MLE
-0.2,0.7,0.723670204707,0.36,0.936604099722,Naive,22.14,0.66,0.828,0.0,100,0.202248144831,500,1,selective_MLE
-0.25,0.7,0.901028776779,0.0600476190476,1.10528070685,Selective MLE,7.96,4.3,0.824,0.796,100,0.231265018526,500,1,selective_MLE
-0.25,0.7,0.0,0.0,0.0,Randomized LASSO,7.96,0.0,0.0,0.0,100,0.391931305213,500,1,selective_MLE
-0.25,0.7,0.869938608551,0.058,inf,Lee,19.4,2.36,0.584,0.408,100,0.172239159064,500,1,selective_MLE
-0.25,0.7,0.734517071822,0.3,0.825343778303,Naive,20.96,0.52,0.9,0.0,100,0.172239159064,500,1,selective_MLE
-0.3,0.7,0.903070593622,0.0580952380952,1.07247799185,Selective MLE,9.46,4.5,0.868,0.836,100,0.207613886764,500,1,selective_MLE
-0.3,0.7,0.0,0.0,0.0,Randomized LASSO,9.46,0.0,0.0,0.0,100,0.365459757906,500,1,selective_MLE
-0.3,0.7,0.837387555884,0.131878787879,inf,Lee,20.3,3.48,0.66,0.536,100,0.137834199808,500,1,selective_MLE
-0.3,0.7,0.725759395522,0.32,0.76482979869,Naive,22.32,0.46,0.944,0.0,100,0.137834199808,500,1,selective_MLE
-0.42,0.7,0.916862914863,0.0423333333333,0.792847708267,Selective MLE,7.96,4.94,0.952,0.944,100,0.103537820619,500,1,selective_MLE
-0.42,0.7,0.0,0.0,0.0,Randomized LASSO,7.96,0.0,0.0,0.0,100,0.321212638744,500,1,selective_MLE
-0.42,0.7,0.876272476718,0.082,inf,Lee,22.58,3.2,0.672,0.552,100,0.101927117901,500,1,selective_MLE
-0.42,0.7,0.745566797024,0.32,0.651727263064,Naive,24.72,0.64,0.988,0.0,100,0.101927117901,500,1,selective_MLE
-0.71,0.7,0.911663780664,0.00666666666667,0.574890188171,Selective MLE,7.18,5.02,1.0,0.996,100,0.0397673470199,500,1,selective_MLE
-0.71,0.7,0.0,0.0,0.0,Randomized LASSO,7.18,0.0,0.0,0.0,100,0.250400422185,500,1,selective_MLE
-0.71,0.7,0.868175712041,0.105714285714,inf,Lee,20.78,4.44,0.828,0.74,100,0.0603137823088,500,1,selective_MLE
-0.71,0.7,0.737111160385,0.5,0.493834490485,Naive,23.82,0.86,1.0,0.0,100,0.0603137823088,500,1,selective_MLE
-1.22,0.7,0.893790598291,0.0238095238095,0.421277992252,Selective MLE,7.06,5.16,1.0,1.0,100,0.0197899774304,500,1,selective_MLE
-1.22,0.7,0.0,0.0,0.0,Randomized LASSO,7.06,0.0,0.0,0.0,100,0.171959642058,500,1,selective_MLE
-1.22,0.7,0.85568554212,0.0900952380952,inf,Lee,20.96,4.4,0.836,0.768,100,0.0331405157854,500,1,selective_MLE
-1.22,0.7,0.69805206367,0.5,0.376074177624,Naive,23.7,1.1,1.0,0.0,100,0.0331405157854,500,1,selective_MLE
-2.07,0.7,0.918686094951,0.0166666666667,0.31458774565,Selective MLE,6.98,5.1,1.0,1.0,100,0.0132487406717,500,1,selective_MLE
-2.07,0.7,0.0,0.0,0.0,Randomized LASSO,6.98,0.0,0.0,0.0,100,0.105343758224,500,1,selective_MLE
-2.07,0.7,0.896404172114,0.0737619047619,inf,Lee,18.86,4.74,0.876,0.872,100,0.0196362653582,500,1,selective_MLE
-2.07,0.7,0.745607621443,0.4,0.284394427217,Naive,21.04,0.68,1.0,0.0,100,0.0196362653582,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.csv b/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.csv
deleted file mode 100644
index f07d7949e..000000000
--- a/selection/adjusted_MLE/output/metrics_beta_type1_selected_rho_0.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0,0.932472356019,0.03,2.6324941767,Selective MLE,5.98,0.34,0.144,0.06,100,1.32630280485,500,1,selective_MLE
-0.05,0,0.0,0.0,0.0,Randomized LASSO,5.98,0.0,0.0,0.0,100,0.850486099629,500,1,selective_MLE
-0.05,0,0.860325496125,0.0786666666667,inf,Lee,15.4,1.16,0.248,0.128,100,0.743435422189,500,1,selective_MLE
-0.05,0,0.58166636283,0.36,1.50526063476,Naive,18.64,0.72,0.624,0.0,100,0.743435422189,500,1,selective_MLE
-0.1,0,0.918493841556,0.0636666666667,1.60463392779,Selective MLE,7.62,2.08,0.452,0.368,100,0.790410383997,500,1,selective_MLE
-0.1,0,0.0,0.0,0.0,Randomized LASSO,7.62,0.0,0.0,0.0,100,0.633263657991,500,1,selective_MLE
-0.1,0,0.772588728079,0.143692918193,inf,Lee,18.2,3.18,0.592,0.384,100,0.402338513706,500,1,selective_MLE
-0.1,0,0.638335673122,0.48,1.06272306187,Naive,22.58,1.14,0.9,0.0,100,0.402338513706,500,1,selective_MLE
-0.15,0,0.930189535954,0.0426666666667,1.26801056055,Selective MLE,8.9,3.36,0.72,0.64,100,0.441970517896,500,1,selective_MLE
-0.15,0,0.0,0.0,0.0,Randomized LASSO,8.9,0.0,0.0,0.0,100,0.500278735638,500,1,selective_MLE
-0.15,0,0.861172095308,0.0819047619048,inf,Lee,23.32,2.64,0.584,0.412,100,0.311910915364,500,1,selective_MLE
-0.15,0,0.631503502131,0.4,0.87618977193,Naive,26.48,0.78,0.976,0.0,100,0.311910915364,500,1,selective_MLE
-0.2,0,0.891537668214,0.045380952381,1.06823603924,Selective MLE,9.58,4.38,0.88,0.828,100,0.295231118235,500,1,selective_MLE
-0.2,0,0.0,0.0,0.0,Randomized LASSO,9.58,0.0,0.0,0.0,100,0.41184090871,500,1,selective_MLE
-0.2,0,0.873406617318,0.0773709273183,inf,Lee,22.54,3.38,0.676,0.552,100,0.225929760535,500,1,selective_MLE
-0.2,0,0.615013356706,0.26,0.754970800244,Naive,26.28,0.58,0.992,0.0,100,0.225929760535,500,1,selective_MLE
-0.25,0,0.89275951826,0.0173333333333,0.88119704876,Selective MLE,8.18,4.64,0.924,0.908,100,0.182150423954,500,1,selective_MLE
-0.25,0,0.0,0.0,0.0,Randomized LASSO,8.18,0.0,0.0,0.0,100,0.329875217599,500,1,selective_MLE
-0.25,0,0.862133418685,0.0904706959707,inf,Lee,23.42,4.14,0.756,0.64,100,0.178438719613,500,1,selective_MLE
-0.25,0,0.611743771144,0.48,0.674957724008,Naive,26.86,1.42,1.0,0.0,100,0.178438719613,500,1,selective_MLE
-0.3,0,0.916427925016,0.0285714285714,0.79173975785,Selective MLE,7.5,5.02,0.976,0.968,100,0.111715425255,500,1,selective_MLE
-0.3,0,0.0,0.0,0.0,Randomized LASSO,7.5,0.0,0.0,0.0,100,0.298821814837,500,1,selective_MLE
-0.3,0,0.911144418584,0.0596168831169,inf,Lee,22.0,4.12,0.84,0.736,100,0.137883197407,500,1,selective_MLE
-0.3,0,0.623022913068,0.3,0.616177690356,Naive,25.68,0.82,1.0,0.0,100,0.137883197407,500,1,selective_MLE
-0.42,0,0.902132034632,0.0157142857143,0.635633387241,Selective MLE,7.18,5.06,0.992,0.992,100,0.0713444446047,500,1,selective_MLE
-0.42,0,0.0,0.0,0.0,Randomized LASSO,7.18,0.0,0.0,0.0,100,0.243721553208,500,1,selective_MLE
-0.42,0,0.892962359305,0.056380952381,inf,Lee,22.28,3.96,0.748,0.688,100,0.0969747510687,500,1,selective_MLE
-0.42,0,0.601893799756,0.38,0.519658907133,Naive,26.24,1.18,1.0,0.0,100,0.0969747510687,500,1,selective_MLE
-0.71,0,0.913706349206,0.00666666666667,0.458282318816,Selective MLE,6.28,5.0,0.992,0.992,100,0.0321801187824,500,1,selective_MLE
-0.71,0,0.0,0.0,0.0,Randomized LASSO,6.28,0.0,0.0,0.0,100,0.218274335294,500,1,selective_MLE
-0.71,0,0.907448196543,0.0443846153846,inf,Lee,22.62,4.5,0.872,0.832,100,0.0601112928232,500,1,selective_MLE
-0.71,0,0.645894221103,0.32,0.400115092722,Naive,26.46,0.94,1.0,0.0,100,0.0601112928232,500,1,selective_MLE
-1.22,0,0.89423981574,0.0190476190476,0.36355554238,Selective MLE,6.96,5.12,1.0,1.0,100,0.024659280186,500,1,selective_MLE
-1.22,0,0.0,0.0,0.0,Randomized LASSO,6.96,0.0,0.0,0.0,100,0.110645464006,500,1,selective_MLE
-1.22,0,0.843731225696,0.129650793651,inf,Lee,21.5,4.9,0.844,0.78,100,0.0361396721766,500,1,selective_MLE
-1.22,0,0.573358425381,0.36,0.304981895518,Naive,24.02,0.88,1.0,0.0,100,0.0361396721766,500,1,selective_MLE
-2.07,0,0.903992063492,0.00666666666667,0.267634909387,Selective MLE,6.66,5.04,1.0,1.0,100,0.00916534444897,500,1,selective_MLE
-2.07,0,0.0,0.0,0.0,Randomized LASSO,6.66,0.0,0.0,0.0,100,0.0798053674236,500,1,selective_MLE
-2.07,0,0.864089754713,0.109571428571,inf,Lee,22.6,4.92,0.88,0.808,100,0.0217887602061,500,1,selective_MLE
-2.07,0,0.63382150953,0.44,0.234850586616,Naive,25.6,0.84,1.0,0.0,100,0.0217887602061,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35.csv
deleted file mode 100644
index d0e0c2dfa..000000000
--- a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0.35,0.86380952381,0.16,13.0374740184,Selective MLE,2.08,0.16,0.002,0.0,1000,5.02989513105,200,1,selective_MLE
-0.05,0.35,0.0,0.0,0.0,Randomized LASSO,2.08,0.0,0.0,0.0,1000,1.03326475867,200,1,selective_MLE
-0.05,0.35,0.810161064426,0.1775,inf,Lee,7.06,0.66,0.01,0.002,1000,1.15804369753,200,1,selective_MLE
-0.05,0.35,0.1912071848,0.76,3.51752981257,Naive,10.42,2.08,0.028,0.0,1000,1.15804369753,200,1,selective_MLE
-0.1,0.35,0.897142857143,0.08,9.71567962848,Selective MLE,1.82,0.14,0.006,0.004,1000,2.95736722228,200,1,selective_MLE
-0.1,0.35,0.0,0.0,0.0,Randomized LASSO,1.82,0.0,0.0,0.0,1000,1.00318150658,200,1,selective_MLE
-0.1,0.35,0.868062434138,0.0953846153846,inf,Lee,10.6,0.5,0.022,0.014,1000,1.08288774171,200,1,selective_MLE
-0.1,0.35,0.306908254952,0.64,2.55676876557,Naive,14.78,1.6,0.082,0.0,1000,1.08288774171,200,1,selective_MLE
-0.15,0.35,0.924285714286,0.02,8.12544927375,Selective MLE,1.7,0.1,0.01,0.008,1000,1.78432174263,200,1,selective_MLE
-0.15,0.35,0.0,0.0,0.0,Randomized LASSO,1.7,0.0,0.0,0.0,1000,0.983208270296,200,1,selective_MLE
-0.15,0.35,0.852194383721,0.143166666667,inf,Lee,9.98,0.96,0.042,0.02,1000,0.993737003883,200,1,selective_MLE
-0.15,0.35,0.337529616061,0.58,2.13063655281,Naive,14.76,1.7,0.122,0.0,1000,0.993737003883,200,1,selective_MLE
-0.2,0.35,0.91380952381,0.1,6.73586062053,Selective MLE,2.14,0.1,0.002,0.0,1000,2.19256133433,200,1,selective_MLE
-0.2,0.35,0.0,0.0,0.0,Randomized LASSO,2.14,0.0,0.0,0.0,1000,0.991266295579,200,1,selective_MLE
-0.2,0.35,0.766471372755,0.172,inf,Lee,14.1,0.92,0.05,0.026,1000,0.992526772626,200,1,selective_MLE
-0.2,0.35,0.482946940064,0.56,1.92496100515,Naive,20.98,1.5,0.166,0.0,1000,0.992526772626,200,1,selective_MLE
-0.25,0.35,0.874333333333,0.1,5.7006648181,Selective MLE,2.6,0.22,0.016,0.012,1000,1.80139037275,200,1,selective_MLE
-0.25,0.35,0.0,0.0,0.0,Randomized LASSO,2.6,0.0,0.0,0.0,1000,0.976643552483,200,1,selective_MLE
-0.25,0.35,0.786518225676,0.11119047619,inf,Lee,17.44,1.4,0.088,0.044,1000,0.895249457402,200,1,selective_MLE
-0.25,0.35,0.598940055094,0.42,1.7871635152,Naive,26.52,1.08,0.216,0.0,1000,0.895249457402,200,1,selective_MLE
-0.3,0.35,0.883598484848,0.08,5.99079681341,Selective MLE,3.14,0.12,0.016,0.004,1000,1.79804896466,200,1,selective_MLE
-0.3,0.35,0.0,0.0,0.0,Randomized LASSO,3.14,0.0,0.0,0.0,1000,0.97217454907,200,1,selective_MLE
-0.3,0.35,0.850180818168,0.0937095188953,inf,Lee,18.84,1.32,0.092,0.028,1000,0.86628788711,200,1,selective_MLE
-0.3,0.35,0.637800000485,0.38,1.65945204094,Naive,27.22,1.04,0.244,0.0,1000,0.86628788711,200,1,selective_MLE
-0.42,0.35,0.939222222222,0.0466666666667,4.48839314161,Selective MLE,3.5,0.36,0.04,0.03,1000,1.38678502316,200,1,selective_MLE
-0.42,0.35,0.0,0.0,0.0,Randomized LASSO,3.5,0.0,0.0,0.0,1000,0.950472355433,200,1,selective_MLE
-0.42,0.35,0.813218122313,0.142135142721,inf,Lee,30.2,2.54,0.168,0.058,1000,0.760968826709,200,1,selective_MLE
-0.42,0.35,0.821904659163,0.1,1.56279526504,Naive,42.0,0.3,0.304,0.0,1000,0.760968826709,200,1,selective_MLE
-0.71,0.35,0.845983079609,0.0618571428571,2.92592844044,Selective MLE,8.7,1.94,0.21,0.172,1000,1.17719079209,200,1,selective_MLE
-0.71,0.35,0.0,0.0,0.0,Randomized LASSO,8.7,0.0,0.0,0.0,1000,0.853407944406,200,1,selective_MLE
-0.71,0.35,0.724240274315,0.150139194139,inf,Lee,39.4,3.52,0.266,0.104,1000,0.545188750369,200,1,selective_MLE
-0.71,0.35,0.897369823919,0.0,1.37021257383,Naive,52.54,0.0,0.458,0.0,1000,0.545188750369,200,1,selective_MLE
-1.22,0.35,0.846472687459,0.139342712843,1.62609900699,Selective MLE,17.96,6.02,0.568,0.506,1000,0.828578087539,200,1,selective_MLE
-1.22,0.35,0.0,0.0,0.0,Randomized LASSO,17.96,0.0,0.0,0.0,1000,0.669072845661,200,1,selective_MLE
-1.22,0.35,0.697092694354,0.187363717137,inf,Lee,51.82,4.82,0.32,0.118,1000,0.354254840901,200,1,selective_MLE
-1.22,0.35,0.968201494975,0.0,1.22242066847,Naive,64.82,0.0,0.61,0.0,1000,0.354254840901,200,1,selective_MLE
-2.07,0.35,0.821919854055,0.122014403897,1.1382269201,Selective MLE,19.38,8.82,0.784,0.754,1000,0.446110763277,200,1,selective_MLE
-2.07,0.35,0.0,0.0,0.0,Randomized LASSO,19.38,0.0,0.0,0.0,1000,0.567708010316,200,1,selective_MLE
-2.07,0.35,0.786147231511,0.120987886383,inf,Lee,51.26,3.02,0.252,0.072,1000,0.207900773568,200,1,selective_MLE
-2.07,0.35,0.987254893848,0.0,1.10094183201,Naive,62.74,0.0,0.858,0.0,1000,0.207900773568,200,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35_tRL.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35_tRL.csv
deleted file mode 100644
index 4247454e2..000000000
--- a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.35_tRL.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0.35,0.904560538674,0.27,10.1823227415,Selective MLE,13.28,0.5,0.008,0.004,1000,11.14355622,200,1,randomized_LASSO
-0.05,0.35,0.0,0.0,0.0,Randomized LASSO,13.28,0.0,0.0,0.0,1000,1.27519023435,200,1,randomized_LASSO
-0.05,0.35,0.814698714326,0.153333333333,inf,Lee,9.02,0.66,0.014,0.006,1000,1.22947708117,200,1,randomized_LASSO
-0.05,0.35,0.215435589303,0.58,3.49976162582,Naive,13.46,1.5,0.048,0.0,1000,1.22947708117,200,1,randomized_LASSO
-0.1,0.35,0.901193326107,0.13,7.2351485447,Selective MLE,12.18,0.24,0.018,0.006,1000,5.67042617943,200,1,randomized_LASSO
-0.1,0.35,0.0,0.0,0.0,Randomized LASSO,12.18,0.0,0.0,0.0,1000,1.09469903763,200,1,randomized_LASSO
-0.1,0.35,0.852926295926,0.0571428571429,inf,Lee,8.42,0.28,0.012,0.006,1000,1.03997065233,200,1,randomized_LASSO
-0.1,0.35,0.280898609117,0.68,2.53227893337,Naive,12.64,1.74,0.074,0.0,1000,1.03997065233,200,1,randomized_LASSO
-0.15,0.35,0.882759318987,0.19,5.81389463446,Selective MLE,15.78,0.4,0.054,0.018,1000,5.11745115543,200,1,randomized_LASSO
-0.15,0.35,0.0,0.0,0.0,Randomized LASSO,15.78,0.0,0.0,0.0,1000,1.0261449909,200,1,randomized_LASSO
-0.15,0.35,0.8636695845,0.108,inf,Lee,10.8,0.96,0.04,0.012,1000,0.970779284886,200,1,randomized_LASSO
-0.15,0.35,0.363206299726,0.5,2.14167127404,Naive,15.68,1.5,0.148,0.0,1000,0.970779284886,200,1,randomized_LASSO
-0.2,0.35,0.878854714053,0.136666666667,4.93075717257,Selective MLE,17.5,0.56,0.074,0.036,1000,3.94134638117,200,1,randomized_LASSO
-0.2,0.35,0.0,0.0,0.0,Randomized LASSO,17.5,0.0,0.0,0.0,1000,1.00570908043,200,1,randomized_LASSO
-0.2,0.35,0.811921267909,0.129456140351,inf,Lee,12.92,0.96,0.062,0.03,1000,0.955857160231,200,1,randomized_LASSO
-0.2,0.35,0.420908411408,0.46,1.92294662266,Naive,18.42,1.42,0.17,0.0,1000,0.955857160231,200,1,randomized_LASSO
-0.25,0.35,0.904443856452,0.103333333333,4.38141540518,Selective MLE,16.36,0.62,0.094,0.038,1000,2.96076741876,200,1,randomized_LASSO
-0.25,0.35,0.0,0.0,0.0,Randomized LASSO,16.36,0.0,0.0,0.0,1000,0.963436312334,200,1,randomized_LASSO
-0.25,0.35,0.829443531547,0.105692307692,inf,Lee,15.44,1.06,0.09,0.032,1000,0.899580794678,200,1,randomized_LASSO
-0.25,0.35,0.554338716916,0.5,1.78138367145,Naive,22.08,1.04,0.244,0.0,1000,0.899580794678,200,1,randomized_LASSO
-0.3,0.35,0.870643854672,0.124,3.48480528025,Selective MLE,22.24,1.0,0.156,0.068,1000,2.72989344456,200,1,randomized_LASSO
-0.3,0.35,0.0,0.0,0.0,Randomized LASSO,22.24,0.0,0.0,0.0,1000,0.949610403149,200,1,randomized_LASSO
-0.3,0.35,0.774245773293,0.126057971014,inf,Lee,19.16,2.16,0.132,0.064,1000,0.861327468008,200,1,randomized_LASSO
-0.3,0.35,0.62055068257,0.36,1.65643370396,Naive,28.08,0.74,0.232,0.0,1000,0.861327468008,200,1,randomized_LASSO
-0.42,0.35,0.871499391079,0.219095238095,2.92679636788,Selective MLE,23.12,2.1,0.214,0.136,1000,2.29869229231,200,1,randomized_LASSO
-0.42,0.35,0.0,0.0,0.0,Randomized LASSO,23.12,0.0,0.0,0.0,1000,0.876389275514,200,1,randomized_LASSO
-0.42,0.35,0.766220794294,0.151175438596,inf,Lee,24.14,2.18,0.17,0.058,1000,0.760023082731,200,1,randomized_LASSO
-0.42,0.35,0.723070401959,0.18,1.51698380468,Naive,33.84,0.38,0.286,0.0,1000,0.760023082731,200,1,randomized_LASSO
-0.71,0.35,0.832780761273,0.240670592973,1.91985249395,Selective MLE,32.84,5.38,0.438,0.332,1000,1.90473171699,200,1,randomized_LASSO
-0.71,0.35,0.0,0.0,0.0,Randomized LASSO,32.84,0.0,0.0,0.0,1000,0.747119128815,200,1,randomized_LASSO
-0.71,0.35,0.743799420992,0.176050664312,inf,Lee,37.26,4.12,0.25,0.096,1000,0.56797924093,200,1,randomized_LASSO
-0.71,0.35,0.899408727514,0.02,1.33828834119,Naive,51.22,0.04,0.428,0.0,1000,0.56797924093,200,1,randomized_LASSO
-1.22,0.35,0.824092627619,0.23783567413,1.40145975774,Selective MLE,31.56,8.12,0.66,0.602,1000,0.918711011887,200,1,randomized_LASSO
-1.22,0.35,0.0,0.0,0.0,Randomized LASSO,31.56,0.0,0.0,0.0,1000,0.607598814246,200,1,randomized_LASSO
-1.22,0.35,0.735296600906,0.178628554258,inf,Lee,47.24,5.24,0.302,0.124,1000,0.364022589518,200,1,randomized_LASSO
-1.22,0.35,0.952273896683,0.0,1.20644489562,Naive,58.0,0.0,0.636,0.0,1000,0.364022589518,200,1,randomized_LASSO
-2.07,0.35,0.772575484785,0.25638804377,1.00691373662,Selective MLE,34.18,11.52,0.83,0.804,1000,0.81289768376,200,1,randomized_LASSO
-2.07,0.35,0.0,0.0,0.0,Randomized LASSO,34.18,0.0,0.0,0.0,1000,0.501801832857,200,1,randomized_LASSO
-2.07,0.35,0.719978731909,0.217756312011,inf,Lee,52.66,5.64,0.306,0.11,1000,0.225363033778,200,1,randomized_LASSO
-2.07,0.35,0.979314360862,0.0,1.10471415905,Naive,61.34,0.0,0.808,0.0,1000,0.225363033778,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.7_tRL.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.7_tRL.csv
deleted file mode 100644
index 9a83e75fc..000000000
--- a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.7_tRL.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0.7,0.922039239687,0.04,26.7382076911,Selective MLE,11.66,0.04,0.006,0.0,1000,66.5802029991,200,1,randomized_LASSO
-0.05,0.7,0.0,0.0,0.0,Randomized LASSO,11.66,0.0,0.0,0.0,1000,1.19241631522,200,1,randomized_LASSO
-0.05,0.7,0.831904761905,0.13580952381,inf,Lee,7.1,0.9,0.01,0.004,1000,1.09243672774,200,1,randomized_LASSO
-0.05,0.7,0.270358916792,0.7,3.68320282859,Naive,9.44,1.6,0.042,0.0,1000,1.09243672774,200,1,randomized_LASSO
-0.1,0.7,0.928723475835,0.06,16.7480169573,Selective MLE,14.62,0.1,0.014,0.002,1000,38.3852852404,200,1,randomized_LASSO
-0.1,0.7,0.0,0.0,0.0,Randomized LASSO,14.62,0.0,0.0,0.0,1000,1.08132675964,200,1,randomized_LASSO
-0.1,0.7,0.797081201567,0.163333333333,inf,Lee,8.2,0.42,0.028,0.01,1000,1.03348868058,200,1,randomized_LASSO
-0.1,0.7,0.322675568223,0.62,2.64005149869,Naive,11.44,1.56,0.074,0.0,1000,1.03348868058,200,1,randomized_LASSO
-0.15,0.7,0.915431178923,0.06,12.6546074846,Selective MLE,19.12,0.16,0.02,0.008,1000,26.7440029516,200,1,randomized_LASSO
-0.15,0.7,0.0,0.0,0.0,Randomized LASSO,19.12,0.0,0.0,0.0,1000,1.05911312813,200,1,randomized_LASSO
-0.15,0.7,0.835593582888,0.141904761905,inf,Lee,12.14,1.16,0.032,0.014,1000,0.97478395775,200,1,randomized_LASSO
-0.15,0.7,0.488651842883,0.58,2.28796404695,Naive,17.02,1.14,0.11,0.0,1000,0.97478395775,200,1,randomized_LASSO
-0.2,0.7,0.929907924884,0.0433333333333,11.0906038198,Selective MLE,17.38,0.14,0.028,0.006,1000,20.0408717049,200,1,randomized_LASSO
-0.2,0.7,0.0,0.0,0.0,Randomized LASSO,17.38,0.0,0.0,0.0,1000,0.978984630566,200,1,randomized_LASSO
-0.2,0.7,0.851145612054,0.0453787878788,inf,Lee,13.02,0.62,0.048,0.028,1000,0.91935867248,200,1,randomized_LASSO
-0.2,0.7,0.498082557816,0.42,2.00267496449,Naive,18.48,0.94,0.142,0.0,1000,0.91935867248,200,1,randomized_LASSO
-0.25,0.7,0.932610591671,0.0,9.82534260533,Selective MLE,19.14,0.04,0.036,0.004,1000,15.6008974535,200,1,randomized_LASSO
-0.25,0.7,0.0,0.0,0.0,Randomized LASSO,19.14,0.0,0.0,0.0,1000,0.969227518518,200,1,randomized_LASSO
-0.25,0.7,0.864711775957,0.0647619047619,inf,Lee,18.82,0.8,0.064,0.018,1000,0.885846251708,200,1,randomized_LASSO
-0.25,0.7,0.607958829559,0.24,1.91984322427,Naive,25.94,0.54,0.174,0.0,1000,0.885846251708,200,1,randomized_LASSO
-0.3,0.7,0.900900980781,0.03,8.33118546751,Selective MLE,23.84,0.14,0.064,0.01,1000,14.670816331,200,1,randomized_LASSO
-0.3,0.7,0.0,0.0,0.0,Randomized LASSO,23.84,0.0,0.0,0.0,1000,0.938287802512,200,1,randomized_LASSO
-0.3,0.7,0.744268267323,0.167569489334,inf,Lee,21.12,2.0,0.104,0.042,1000,0.827632432351,200,1,randomized_LASSO
-0.3,0.7,0.658147077777,0.18,1.78476753909,Naive,27.86,0.4,0.194,0.0,1000,0.827632432351,200,1,randomized_LASSO
-0.42,0.7,0.929540607176,0.0566666666667,6.80360118209,Selective MLE,27.46,0.24,0.11,0.016,1000,13.5209534407,200,1,randomized_LASSO
-0.42,0.7,0.0,0.0,0.0,Randomized LASSO,27.46,0.0,0.0,0.0,1000,0.844098099742,200,1,randomized_LASSO
-0.42,0.7,0.828304221914,0.118290598291,inf,Lee,27.26,1.14,0.116,0.03,1000,0.719350085744,200,1,randomized_LASSO
-0.42,0.7,0.782597848276,0.18,1.6578804247,Naive,36.58,0.28,0.224,0.0,1000,0.719350085744,200,1,randomized_LASSO
-0.71,0.7,0.889349872267,0.113095238095,4.67826236113,Selective MLE,32.88,0.98,0.226,0.06,1000,7.70099169377,200,1,randomized_LASSO
-0.71,0.7,0.0,0.0,0.0,Randomized LASSO,32.88,0.0,0.0,0.0,1000,0.730480536029,200,1,randomized_LASSO
-0.71,0.7,0.859988542109,0.0599251336898,inf,Lee,40.34,1.38,0.154,0.03,1000,0.520966311478,200,1,randomized_LASSO
-0.71,0.7,0.918887154994,0.0,1.46136235542,Naive,49.7,0.0,0.382,0.0,1000,0.520966311478,200,1,randomized_LASSO
-1.22,0.7,0.847615136972,0.213984126984,3.38018198745,Selective MLE,33.92,3.2,0.472,0.236,1000,4.80133134411,200,1,randomized_LASSO
-1.22,0.7,0.0,0.0,0.0,Randomized LASSO,33.92,0.0,0.0,0.0,1000,0.574001051024,200,1,randomized_LASSO
-1.22,0.7,0.825169195991,0.10756017316,inf,Lee,49.1,2.14,0.194,0.044,1000,0.322558328992,200,1,randomized_LASSO
-1.22,0.7,0.965361186761,0.0,1.36091425418,Naive,57.4,0.0,0.504,0.0,1000,0.322558328992,200,1,randomized_LASSO
-2.07,0.7,0.774512289686,0.225146242646,2.36868393184,Selective MLE,34.82,7.78,0.722,0.578,1000,2.90326565422,200,1,randomized_LASSO
-2.07,0.7,0.0,0.0,0.0,Randomized LASSO,34.82,0.0,0.0,0.0,1000,0.439628497143,200,1,randomized_LASSO
-2.07,0.7,0.747848973929,0.161774509804,inf,Lee,52.44,2.96,0.282,0.07,1000,0.189410896637,200,1,randomized_LASSO
-2.07,0.7,0.986016239696,0.0,1.23917614471,Naive,59.64,0.0,0.652,0.0,1000,0.189410896637,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.csv
deleted file mode 100644
index 47dbf5638..000000000
--- a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0,0.939333333333,0.02,12.4883493047,Selective MLE,1.98,0.04,0.002,0.002,1000,3.24705655557,200,1,selective_MLE
-0.05,0,0.0,0.0,0.0,Randomized LASSO,1.98,0.0,0.0,0.0,1000,1.01323890467,200,1,selective_MLE
-0.05,0,0.840792221,0.138,inf,Lee,7.68,0.76,0.012,0.004,1000,1.18979142946,200,1,selective_MLE
-0.05,0,0.182062781828,0.82,3.51806995253,Naive,11.78,2.46,0.032,0.0,1000,1.18979142946,200,1,selective_MLE
-0.1,0,0.948142857143,0.06,7.72927664108,Selective MLE,2.1,0.06,0.002,0.0,1000,2.20315756913,200,1,selective_MLE
-0.1,0,0.0,0.0,0.0,Randomized LASSO,2.1,0.0,0.0,0.0,1000,0.994559752969,200,1,selective_MLE
-0.1,0,0.817267346017,0.125,inf,Lee,8.36,0.66,0.018,0.01,1000,1.0499982218,200,1,selective_MLE
-0.1,0,0.259410577097,0.74,2.51991380922,Naive,11.48,2.18,0.092,0.0,1000,1.0499982218,200,1,selective_MLE
-0.15,0,0.94331372549,0.06,6.34782521321,Selective MLE,3.4,0.1,0.006,0.002,1000,1.98416436442,200,1,selective_MLE
-0.15,0,0.0,0.0,0.0,Randomized LASSO,3.4,0.0,0.0,0.0,1000,0.995273245034,200,1,selective_MLE
-0.15,0,0.843812152985,0.111397435897,inf,Lee,11.76,0.96,0.046,0.018,1000,0.986295023502,200,1,selective_MLE
-0.15,0,0.427335079752,0.6,2.13817019831,Naive,17.76,1.64,0.144,0.0,1000,0.986295023502,200,1,selective_MLE
-0.2,0,0.90203030303,0.0933333333333,5.72350149651,Selective MLE,2.96,0.34,0.024,0.016,1000,1.85651551225,200,1,selective_MLE
-0.2,0,0.0,0.0,0.0,Randomized LASSO,2.96,0.0,0.0,0.0,1000,0.978648208349,200,1,selective_MLE
-0.2,0,0.871329972555,0.0879191919192,inf,Lee,11.34,1.0,0.05,0.032,1000,0.946348528327,200,1,selective_MLE
-0.2,0,0.440246057252,0.64,1.89304610067,Naive,17.38,1.98,0.156,0.0,1000,0.946348528327,200,1,selective_MLE
-0.25,0,0.923824675325,0.04,4.8411497362,Selective MLE,2.92,0.28,0.024,0.024,1000,1.35427531353,200,1,selective_MLE
-0.25,0,0.0,0.0,0.0,Randomized LASSO,2.92,0.0,0.0,0.0,1000,0.963936970096,200,1,selective_MLE
-0.25,0,0.799817592593,0.109792207792,inf,Lee,17.6,1.18,0.108,0.038,1000,0.877901846227,200,1,selective_MLE
-0.25,0,0.60424285517,0.38,1.78254634538,Naive,26.46,1.24,0.256,0.0,1000,0.877901846227,200,1,selective_MLE
-0.3,0,0.962333333333,0.02,4.0846953987,Selective MLE,4.1,0.28,0.036,0.024,1000,1.27509640458,200,1,selective_MLE
-0.3,0,0.0,0.0,0.0,Randomized LASSO,4.1,0.0,0.0,0.0,1000,0.963413654406,200,1,selective_MLE
-0.3,0,0.740728587282,0.14370148857,inf,Lee,22.58,2.64,0.176,0.064,1000,0.871637370414,200,1,selective_MLE
-0.3,0,0.690347872224,0.32,1.71056902174,Naive,32.36,0.74,0.246,0.0,1000,0.871637370414,200,1,selective_MLE
-0.42,0,0.908340548341,0.0333333333333,3.4626911418,Selective MLE,6.06,0.84,0.13,0.078,1000,1.46313049815,200,1,selective_MLE
-0.42,0,0.0,0.0,0.0,Randomized LASSO,6.06,0.0,0.0,0.0,1000,0.902483553335,200,1,selective_MLE
-0.42,0,0.772215413934,0.117950980392,inf,Lee,27.52,2.04,0.176,0.072,1000,0.739251951337,200,1,selective_MLE
-0.42,0,0.800636311322,0.12,1.51881127885,Naive,38.1,0.32,0.342,0.0,1000,0.739251951337,200,1,selective_MLE
-0.71,0,0.902711246222,0.135333333333,2.29066703226,Selective MLE,11.94,2.6,0.258,0.204,1000,1.07824235978,200,1,selective_MLE
-0.71,0,0.0,0.0,0.0,Randomized LASSO,11.94,0.0,0.0,0.0,1000,0.836538976592,200,1,selective_MLE
-0.71,0,0.816120961485,0.0964545454545,inf,Lee,39.2,2.22,0.218,0.066,1000,0.56972376987,200,1,selective_MLE
-0.71,0,0.893159232195,0.02,1.33867459865,Naive,52.92,0.02,0.456,0.0,1000,0.56972376987,200,1,selective_MLE
-1.22,0,0.846552646398,0.148354256854,1.45750373595,Selective MLE,17.32,5.84,0.514,0.482,1000,0.727206377914,200,1,selective_MLE
-1.22,0,0.0,0.0,0.0,Randomized LASSO,17.32,0.0,0.0,0.0,1000,0.711660402878,200,1,selective_MLE
-1.22,0,0.697183263023,0.178388196001,inf,Lee,47.9,4.66,0.336,0.12,1000,0.37145714765,200,1,selective_MLE
-1.22,0,0.960033854849,0.0,1.18905978659,Naive,61.66,0.0,0.622,0.0,1000,0.37145714765,200,1,selective_MLE
-2.07,0,0.813603148591,0.116182900433,0.9652716672,Selective MLE,18.1,8.96,0.792,0.772,1000,0.347558277288,200,1,selective_MLE
-2.07,0,0.0,0.0,0.0,Randomized LASSO,18.1,0.0,0.0,0.0,1000,0.556557304432,200,1,selective_MLE
-2.07,0,0.729880633536,0.176751570048,inf,Lee,52.32,4.46,0.352,0.102,1000,0.20332446773,200,1,selective_MLE
-2.07,0,0.983468197749,0.0,1.08614747667,Naive,61.2,0.0,0.858,0.0,1000,0.20332446773,200,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0_tRL.csv b/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0_tRL.csv
deleted file mode 100644
index 55db39726..000000000
--- a/selection/adjusted_MLE/output/metrics_high_beta_type1_full_rho_0_tRL.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-SNR,correlation,coverage,fdr,length,method,nactive,ndiscoveries,power,power_post_BH,regression_dim,risk,sample_size,signal_type,tuning
-0.05,0,0.88897128285,0.323333333333,8.79220766982,Selective MLE,12.98,0.52,0.008,0.004,1000,8.2886621002,200,1,randomized_LASSO
-0.05,0,0.0,0.0,0.0,Randomized LASSO,12.98,0.0,0.0,0.0,1000,1.31442104866,200,1,randomized_LASSO
-0.05,0,0.753510470915,0.211666666667,inf,Lee,8.2,0.94,0.01,0.006,1000,1.17248749115,200,1,randomized_LASSO
-0.05,0,0.20697947614,0.76,3.51707544591,Naive,12.46,2.12,0.024,0.0,1000,1.17248749115,200,1,randomized_LASSO
-0.1,0,0.891872254469,0.123333333333,6.34530176112,Selective MLE,14.0,0.44,0.032,0.012,1000,5.70079582818,200,1,randomized_LASSO
-0.1,0,0.0,0.0,0.0,Randomized LASSO,14.0,0.0,0.0,0.0,1000,1.10763024692,200,1,randomized_LASSO
-0.1,0,0.803814659197,0.159271561772,inf,Lee,10.16,1.16,0.024,0.014,1000,1.05070354854,200,1,randomized_LASSO
-0.1,0,0.308253659516,0.62,2.55566050799,Naive,14.88,1.6,0.098,0.0,1000,1.05070354854,200,1,randomized_LASSO
-0.15,0,0.868768231273,0.223333333333,4.71418264616,Selective MLE,17.82,0.66,0.08,0.024,1000,3.98836268352,200,1,randomized_LASSO
-0.15,0,0.0,0.0,0.0,Randomized LASSO,17.82,0.0,0.0,0.0,1000,1.0430537927,200,1,randomized_LASSO
-0.15,0,0.835598452955,0.0742608695652,inf,Lee,13.64,0.7,0.032,0.012,1000,1.00792015423,200,1,randomized_LASSO
-0.15,0,0.403810732703,0.6,2.16109421674,Naive,20.34,1.66,0.134,0.0,1000,1.00792015423,200,1,randomized_LASSO
-0.2,0,0.868467053905,0.218095238095,4.10298653517,Selective MLE,17.46,1.0,0.096,0.056,1000,3.22973247347,200,1,randomized_LASSO
-0.2,0,0.0,0.0,0.0,Randomized LASSO,17.46,0.0,0.0,0.0,1000,1.01048679788,200,1,randomized_LASSO
-0.2,0,0.811217958999,0.117333333333,inf,Lee,13.18,1.22,0.058,0.032,1000,0.938462922739,200,1,randomized_LASSO
-0.2,0,0.499373658179,0.6,1.89997856499,Naive,19.58,1.64,0.208,0.0,1000,0.938462922739,200,1,randomized_LASSO
-0.25,0,0.883503463146,0.195,3.70622944753,Selective MLE,18.28,0.88,0.098,0.054,1000,2.47135003169,200,1,randomized_LASSO
-0.25,0,0.0,0.0,0.0,Randomized LASSO,18.28,0.0,0.0,0.0,1000,0.97688918139,200,1,randomized_LASSO
-0.25,0,0.839550741484,0.0897006327006,inf,Lee,16.44,1.24,0.078,0.042,1000,0.90117958759,200,1,randomized_LASSO
-0.25,0,0.616494448814,0.42,1.78032249483,Naive,24.46,1.16,0.236,0.0,1000,0.90117958759,200,1,randomized_LASSO
-0.3,0,0.866051921174,0.244095238095,3.13147259805,Selective MLE,19.94,1.68,0.16,0.102,1000,2.36317409857,200,1,randomized_LASSO
-0.3,0,0.0,0.0,0.0,Randomized LASSO,19.94,0.0,0.0,0.0,1000,0.939293015234,200,1,randomized_LASSO
-0.3,0,0.743928328678,0.167357376284,inf,Lee,15.62,1.82,0.14,0.05,1000,0.858982589281,200,1,randomized_LASSO
-0.3,0,0.619547597705,0.34,1.64955307026,Naive,23.2,0.8,0.266,0.0,1000,0.858982589281,200,1,randomized_LASSO
-0.42,0,0.867041781847,0.239714285714,2.50968360211,Selective MLE,24.84,2.06,0.222,0.132,1000,2.00307448702,200,1,randomized_LASSO
-0.42,0,0.0,0.0,0.0,Randomized LASSO,24.84,0.0,0.0,0.0,1000,0.865395486812,200,1,randomized_LASSO
-0.42,0,0.732482450526,0.168303817424,inf,Lee,26.4,3.44,0.224,0.08,1000,0.75939059585,200,1,randomized_LASSO
-0.42,0,0.741146303416,0.22,1.54525272229,Naive,37.58,0.66,0.336,0.0,1000,0.75939059585,200,1,randomized_LASSO
-0.71,0,0.814466485587,0.263022979436,1.6600714217,Selective MLE,30.0,5.58,0.442,0.364,1000,1.92922645517,200,1,randomized_LASSO
-0.71,0,0.0,0.0,0.0,Randomized LASSO,30.0,0.0,0.0,0.0,1000,0.770365309897,200,1,randomized_LASSO
-0.71,0,0.808583099881,0.144655122655,inf,Lee,39.18,2.4,0.202,0.058,1000,0.574733612271,200,1,randomized_LASSO
-0.71,0,0.897275350581,0.04,1.35357789306,Naive,52.5,0.08,0.472,0.0,1000,0.574733612271,200,1,randomized_LASSO
-1.22,0,0.803640115619,0.253073759574,1.22548655163,Selective MLE,31.98,8.86,0.674,0.634,1000,0.783112288547,200,1,randomized_LASSO
-1.22,0,0.0,0.0,0.0,Randomized LASSO,31.98,0.0,0.0,0.0,1000,0.609913135656,200,1,randomized_LASSO
-1.22,0,0.77612053658,0.116686190856,inf,Lee,48.2,3.44,0.304,0.086,1000,0.373728618284,200,1,randomized_LASSO
-1.22,0,0.957601878675,0.0,1.20782773316,Naive,62.32,0.0,0.624,0.0,1000,0.373728618284,200,1,randomized_LASSO
-2.07,0,0.770778679702,0.247848096348,0.857075455058,Selective MLE,32.44,12.04,0.894,0.874,1000,0.411382057681,200,1,randomized_LASSO
-2.07,0,0.0,0.0,0.0,Randomized LASSO,32.44,0.0,0.0,0.0,1000,0.468370989328,200,1,randomized_LASSO
-2.07,0,0.803080990926,0.0965080670963,inf,Lee,49.68,2.66,0.254,0.084,1000,0.208476236462,200,1,randomized_LASSO
-2.07,0,0.984105991703,0.0,1.08767214923,Naive,59.22,0.0,0.874,0.0,1000,0.208476236462,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.35.csv b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.35.csv
deleted file mode 100644
index a6ec55380..000000000
--- a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.35.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-0.724816854623,0.838899806485,1.62965821078,0.724224013793,1.33106465713,1.19947480531,0.05,0.35,100,500,1,selective_MLE
-0.385265083675,0.623250677108,0.895897013543,0.361045191295,0.60611889663,0.617980303537,0.1,0.35,100,500,1,selective_MLE
-0.270390483342,0.542201834918,0.542516757338,0.194962371313,0.372711293725,0.375999447603,0.15,0.35,100,500,1,selective_MLE
-0.217031859955,0.446913741016,0.380461749893,0.127195036097,0.227063885605,0.222436708189,0.2,0.35,100,500,1,selective_MLE
-0.183191135704,0.369746575113,0.287851483974,0.0701930323035,0.132418997893,0.136180132365,0.25,0.35,100,500,1,selective_MLE
-0.139899752608,0.370077049834,0.229602473852,0.0696566148775,0.129604816339,0.124306493466,0.3,0.35,100,500,1,selective_MLE
-0.101985001419,0.310468898242,0.155101021839,0.0285528565579,0.0690563735948,0.067374298508,0.42,0.35,100,500,1,selective_MLE
-0.0569139003612,0.218910141131,0.0741056132107,0.0148122885092,0.0328322740991,0.0317729502039,0.71,0.35,100,500,1,selective_MLE
-0.0329382817335,0.182617145112,0.045243085294,0.00958924135652,0.0198175219444,0.0176700251849,1.22,0.35,100,500,1,selective_MLE
-0.0207267202668,0.100893025098,0.026965625387,0.00498697963158,0.0111318165399,0.0116313177681,2.07,0.35,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.7.csv b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.7.csv
deleted file mode 100644
index bb1ea0979..000000000
--- a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.7.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-0.606481746444,0.826745258299,1.33305308527,0.62271913104,0.980415841111,1.11864047232,0.05,0.7,100,500,1,selective_MLE
-0.398650296901,0.700295664431,1.02004385461,0.36712331116,0.630021857222,0.812188963578,0.1,0.7,100,500,1,selective_MLE
-0.266817960717,0.586732001573,0.716854128753,0.222798693376,0.413654992164,0.591789402777,0.15,0.7,100,500,1,selective_MLE
-0.207599545724,0.487626752228,0.492860811183,0.130128412475,0.245453395708,0.40776192466,0.2,0.7,100,500,1,selective_MLE
-0.178457205606,0.451547708341,0.41839803002,0.101150720899,0.191089891637,0.300554430254,0.25,0.7,100,500,1,selective_MLE
-0.142653661284,0.417466476111,0.29398318169,0.0763905428181,0.159325062914,0.239662294933,0.3,0.7,100,500,1,selective_MLE
-0.100564129182,0.343633849642,0.202650571086,0.0360311178731,0.0746274086812,0.135011251127,0.42,0.7,100,500,1,selective_MLE
-0.0622398248064,0.325589733329,0.0951241582053,0.0188866395806,0.0358910916596,0.0660453156033,0.71,0.7,100,500,1,selective_MLE
-0.034510480008,0.20922378322,0.0489181354491,0.012197026661,0.018067922928,0.0314691475029,1.22,0.7,100,500,1,selective_MLE
-0.0205041933808,0.115974002994,0.0320890511388,0.00618113465831,0.0109080617738,0.0178486248352,2.07,0.7,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.csv b/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.csv
deleted file mode 100644
index 9c1ca727a..000000000
--- a/selection/adjusted_MLE/output/risk_beta_type1_full_rho_0.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-0.652411550711,0.820946505923,1.4070040248,0.661438798105,1.16213302331,1.02796717205,0.05,0,100,500,1,selective_MLE
-0.418810019872,0.615859220351,1.08859877204,0.396310997244,0.730827245437,0.682772681521,0.1,0,100,500,1,selective_MLE
-0.280431627709,0.5151162648,0.53810847739,0.202537367658,0.362203372763,0.325984583304,0.15,0,100,500,1,selective_MLE
-0.214846497925,0.401905491611,0.42362790596,0.11670955253,0.22108750486,0.215462021939,0.2,0,100,500,1,selective_MLE
-0.182037721298,0.421809411384,0.319733900683,0.0912351556428,0.201887706538,0.174473785317,0.25,0,100,500,1,selective_MLE
-0.150299675758,0.333848112123,0.217944505315,0.0590215304306,0.127539754074,0.118313600765,0.3,0,100,500,1,selective_MLE
-0.122385160693,0.278841228658,0.159635815479,0.0357065622719,0.0846994005377,0.0685267959665,0.42,0,100,500,1,selective_MLE
-0.064742081091,0.200842080649,0.075943258678,0.0175017280137,0.0352320848703,0.0302118943543,0.71,0,100,500,1,selective_MLE
-0.0355829221315,0.153741474347,0.055041462649,0.0120802822177,0.019930314589,0.0178112548381,1.22,0,100,500,1,selective_MLE
-0.0192982775325,0.0905511133875,0.0321402100347,0.00550207449333,0.0116545903161,0.0105093060895,2.07,0,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.35.csv b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.35.csv
deleted file mode 100644
index 3b4b877b0..000000000
--- a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.35.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-0.697798250784,0.85718568517,2.21896878479,0.699571498105,1.33274011885,1.37873397223,0.05,0.35,100,500,1,selective_MLE
-0.419309318668,0.636428859402,1.1987352918,0.380829530637,0.646123024361,0.72190312741,0.1,0.35,100,500,1,selective_MLE
-0.30931592898,0.532820557278,1.02217246606,0.249026330394,0.431733783231,0.527093447425,0.15,0.35,100,500,1,selective_MLE
-0.246305559448,0.444429877595,0.673491149536,0.154679163925,0.320423659938,0.323355132192,0.2,0.35,100,500,1,selective_MLE
-0.174246008689,0.360765235691,0.467873778027,0.0760494000571,0.164079376842,0.18706333101,0.25,0.35,100,500,1,selective_MLE
-0.134503703797,0.336916782573,0.345490972051,0.0459261611936,0.0935937159224,0.11590795158,0.3,0.35,100,500,1,selective_MLE
-0.101018740148,0.256875358635,0.221607861887,0.0257195421617,0.0553450654339,0.0500593814501,0.42,0.35,100,500,1,selective_MLE
-0.0588696020544,0.177950947921,0.132963527587,0.0201241127366,0.0424956636144,0.0354428715806,0.71,0.35,100,500,1,selective_MLE
-0.0361438615056,0.131259024663,0.0838490306946,0.0122029950952,0.0242627335914,0.0196990246932,1.22,0.35,100,500,1,selective_MLE
-0.0227142973009,0.103825117154,0.039772197288,0.00664066401051,0.0118976464415,0.0111903101344,2.07,0.35,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.7.csv b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.7.csv
deleted file mode 100644
index b0a461397..000000000
--- a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.7.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-0.661064182407,0.801798637534,3.75841271437,0.66794182998,1.30489419765,1.66586374221,0.05,0.7,100,500,1,selective_MLE
-0.362204790134,0.562341962093,1.88335993038,0.335434741644,0.565653950118,0.799341484436,0.1,0.7,100,500,1,selective_MLE
-0.246989970283,0.46261866559,1.19598629058,0.192855215933,0.34910692817,0.480043897059,0.15,0.7,100,500,1,selective_MLE
-0.202248144831,0.399987898639,0.910333623448,0.119039329576,0.230405329048,0.350465323309,0.2,0.7,100,500,1,selective_MLE
-0.172239159064,0.391931305213,0.792634324635,0.107346196542,0.168426306761,0.231265018526,0.25,0.7,100,500,1,selective_MLE
-0.137834199808,0.365459757906,0.643725343517,0.0769725923295,0.148819449516,0.207613886764,0.3,0.7,100,500,1,selective_MLE
-0.101927117901,0.321212638744,0.386211423156,0.0429049071332,0.0843358069426,0.103537820619,0.42,0.7,100,500,1,selective_MLE
-0.0603137823088,0.250400422185,0.199884223847,0.0197333709389,0.0342016623851,0.0397673470199,0.71,0.7,100,500,1,selective_MLE
-0.0331405157854,0.171959642058,0.111838231528,0.0111907083798,0.0183320601807,0.0197899774304,1.22,0.7,100,500,1,selective_MLE
-0.0196362653582,0.105343758224,0.0683338359143,0.00567750470076,0.0108766113923,0.0132487406717,2.07,0.7,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.csv b/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.csv
deleted file mode 100644
index be23c3507..000000000
--- a/selection/adjusted_MLE/output/risk_beta_type1_selected_rho_0.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-0.743435422189,0.850486099629,2.02596730455,0.725569100518,1.31529845576,1.32630280485,0.05,0,100,500,1,selective_MLE
-0.402338513706,0.633263657991,1.20378586671,0.350213321137,0.656021851188,0.790410383997,0.1,0,100,500,1,selective_MLE
-0.311910915364,0.500278735638,0.826297999063,0.210857868418,0.420782103491,0.441970517896,0.15,0,100,500,1,selective_MLE
-0.225929760535,0.41184090871,0.569616166985,0.125815448077,0.270196807028,0.295231118235,0.2,0,100,500,1,selective_MLE
-0.178438719613,0.329875217599,0.440095415652,0.0917532172973,0.189823026931,0.182150423954,0.25,0,100,500,1,selective_MLE
-0.137883197407,0.298821814837,0.313436366994,0.0402924350131,0.117190963254,0.111715425255,0.3,0,100,500,1,selective_MLE
-0.0969747510687,0.243721553208,0.176178413144,0.0278034606202,0.0711334925696,0.0713444446047,0.42,0,100,500,1,selective_MLE
-0.0601112928232,0.218274335294,0.113176600439,0.018583278581,0.0382532254237,0.0321801187824,0.71,0,100,500,1,selective_MLE
-0.0361396721766,0.110645464006,0.062664606523,0.0104018131365,0.0245477860903,0.024659280186,1.22,0,100,500,1,selective_MLE
-0.0217887602061,0.0798053674236,0.0332560523286,0.00578911789716,0.0131973279945,0.00916534444897,2.07,0,100,500,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35.csv
deleted file mode 100644
index 6886c50f9..000000000
--- a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-1.15804369753,1.03326475867,5.02989513105,1.27125139482,2.71819494978,5.02989513105,0.05,0.35,1000,200,1,selective_MLE
-1.08288774171,1.00318150658,2.95736722228,1.12956825759,1.79266089014,2.95736722228,0.1,0.35,1000,200,1,selective_MLE
-0.993737003883,0.983208270296,1.78432174263,1.01639127537,1.44646897849,1.78432174263,0.15,0.35,1000,200,1,selective_MLE
-0.992526772626,0.991266295579,2.19256133433,1.01242596671,1.36480228762,2.19256133433,0.2,0.35,1000,200,1,selective_MLE
-0.895249457402,0.976643552483,1.80139037275,0.90408118781,1.2427738658,1.80139037275,0.25,0.35,1000,200,1,selective_MLE
-0.86628788711,0.97217454907,1.79804896466,0.860191356047,1.2211458867,1.79804896466,0.3,0.35,1000,200,1,selective_MLE
-0.760968826709,0.950472355433,1.38678502316,0.760063270144,1.05808358132,1.38678502316,0.42,0.35,1000,200,1,selective_MLE
-0.545188750369,0.853407944406,1.17719079209,0.513362787122,0.856116134157,1.17719079209,0.71,0.35,1000,200,1,selective_MLE
-0.354254840901,0.669072845661,0.828578087539,0.255188048196,0.528899193159,0.828578087539,1.22,0.35,1000,200,1,selective_MLE
-0.207900773568,0.567708010316,0.446110763277,0.0793901361815,0.285583228595,0.446110763277,2.07,0.35,1000,200,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35_tRL.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35_tRL.csv
deleted file mode 100644
index 86a155103..000000000
--- a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.35_tRL.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-1.22947708117,1.27519023435,29.9774110469,1.24903915215,6.04763869728,11.14355622,0.05,0.35,1000,200,1,randomized_LASSO
-1.03997065233,1.09469903763,16.293370011,1.08898303471,3.38092077039,5.67042617943,0.1,0.35,1000,200,1,randomized_LASSO
-0.970779284886,1.0261449909,13.5328791418,0.980575255112,2.69468544429,5.11745115543,0.15,0.35,1000,200,1,randomized_LASSO
-0.955857160231,1.00570908043,9.92658362282,0.972031122743,2.42305064218,3.94134638117,0.2,0.35,1000,200,1,randomized_LASSO
-0.899580794678,0.963436312334,8.07491069098,0.904449458809,1.94621905699,2.96076741876,0.25,0.35,1000,200,1,randomized_LASSO
-0.861327468008,0.949610403149,9.07751513011,0.867332470168,1.87440533665,2.72989344456,0.3,0.35,1000,200,1,randomized_LASSO
-0.760023082731,0.876389275514,6.63125065196,0.75529006061,1.48698253691,2.29869229231,0.42,0.35,1000,200,1,randomized_LASSO
-0.56797924093,0.747119128815,5.00555624788,0.525097514,1.03821222608,1.90473171699,0.71,0.35,1000,200,1,randomized_LASSO
-0.364022589518,0.607598814246,2.73622995835,0.261085084031,0.634367967642,0.918711011887,1.22,0.35,1000,200,1,randomized_LASSO
-0.225363033778,0.501801832857,1.53237148385,0.103328651514,0.377559544681,0.81289768376,2.07,0.35,1000,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.7_tRL.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.7_tRL.csv
deleted file mode 100644
index bc1e08396..000000000
--- a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.7_tRL.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-1.09243672774,1.19241631522,66.5802029991,1.23406515401,5.37926240412,66.5802029991,0.05,0.7,1000,200,1,randomized_LASSO
-1.03348868058,1.08132675964,38.3852852404,1.07150225572,3.49735932903,38.3852852404,0.1,0.7,1000,200,1,randomized_LASSO
-0.97478395775,1.05911312813,26.7440029516,1.02051312064,2.93875908586,26.7440029516,0.15,0.7,1000,200,1,randomized_LASSO
-0.91935867248,0.978984630566,20.0408717049,0.939219038505,2.24129394098,20.0408717049,0.2,0.7,1000,200,1,randomized_LASSO
-0.885846251708,0.969227518518,15.6008974535,0.900166766283,1.94366792471,15.6008974535,0.25,0.7,1000,200,1,randomized_LASSO
-0.827632432351,0.938287802512,14.670816331,0.844845584183,1.84385143811,14.670816331,0.3,0.7,1000,200,1,randomized_LASSO
-0.719350085744,0.844098099742,13.5209534407,0.721438073621,1.4054012529,13.5209534407,0.42,0.7,1000,200,1,randomized_LASSO
-0.520966311478,0.730480536029,7.70099169377,0.494283033378,1.03323592945,7.70099169377,0.71,0.7,1000,200,1,randomized_LASSO
-0.322558328992,0.574001051024,4.80133134411,0.236516272445,0.597607242237,4.80133134411,1.22,0.7,1000,200,1,randomized_LASSO
-0.189410896637,0.439628497143,2.90326565422,0.092964938924,0.331250334849,2.90326565422,2.07,0.7,1000,200,1,randomized_LASSO
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.csv
deleted file mode 100644
index 371a248a9..000000000
--- a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-1.18979142946,1.01323890467,3.24705655557,1.26747917859,2.52302676222,3.24705655557,0.05,0,1000,200,1,selective_MLE
-1.0499982218,0.994559752969,2.20315756913,1.08522590394,1.80817304281,2.20315756913,0.1,0,1000,200,1,selective_MLE
-0.986295023502,0.995273245034,1.98416436442,1.02842358859,1.65477241528,1.98416436442,0.15,0,1000,200,1,selective_MLE
-0.946348528327,0.978648208349,1.85651551225,0.957246371957,1.41201355988,1.85651551225,0.2,0,1000,200,1,selective_MLE
-0.877901846227,0.963936970096,1.35427531353,0.892956430716,1.24760051675,1.35427531353,0.25,0,1000,200,1,selective_MLE
-0.871637370414,0.963413654406,1.27509640458,0.878049441441,1.23165619207,1.27509640458,0.3,0,1000,200,1,selective_MLE
-0.739251951337,0.902483553335,1.46313049815,0.739133721282,1.07196731339,1.46313049815,0.42,0,1000,200,1,selective_MLE
-0.56972376987,0.836538976592,1.07824235978,0.532687510942,0.899318445422,1.07824235978,0.71,0,1000,200,1,selective_MLE
-0.37145714765,0.711660402878,0.727206377914,0.27830772286,0.581515000657,0.727206377914,1.22,0,1000,200,1,selective_MLE
-0.20332446773,0.556557304432,0.347558277288,0.0790857133544,0.266649181037,0.347558277288,2.07,0,1000,200,1,selective_MLE
diff --git a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0_tRL.csv b/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0_tRL.csv
deleted file mode 100644
index e16d9c95b..000000000
--- a/selection/adjusted_MLE/output/risk_high_beta_type1_full_rho_0_tRL.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Risk_LASSO_nonrand,Risk_LASSO_rand,Risk_indest,Risk_relLASSO_nonrand,Risk_relLASSO_rand,Risk_selMLE,SNR,correlation,regression_dim,sample_size,signal_type,tuning
-1.17248749115,1.31442104866,23.6453370978,1.28791920231,6.4199010483,8.2886621002,0.05,0,1000,200,1,randomized_LASSO
-1.05070354854,1.10763024692,14.1800839856,1.1238099725,3.67703632915,5.70079582818,0.1,0,1000,200,1,randomized_LASSO
-1.00792015423,1.0430537927,11.2019796169,1.03348070544,2.81963361807,3.98836268352,0.15,0,1000,200,1,randomized_LASSO
-0.938462922739,1.01048679788,7.56513834807,0.959418500699,2.34878604629,3.22973247347,0.2,0,1000,200,1,randomized_LASSO
-0.90117958759,0.97688918139,6.38666109808,0.902395680636,2.0548885926,2.47135003169,0.25,0,1000,200,1,randomized_LASSO
-0.858982589281,0.939293015234,5.73534495114,0.870730532696,1.88688220322,2.36317409857,0.3,0,1000,200,1,randomized_LASSO
-0.75939059585,0.865395486812,5.84219932939,0.745503498889,1.57411396465,2.00307448702,0.42,0,1000,200,1,randomized_LASSO
-0.574733612271,0.770365309897,3.2842446673,0.544215065212,1.08962289716,1.92922645517,0.71,0,1000,200,1,randomized_LASSO
-0.373728618284,0.609913135656,2.01125498031,0.295208597233,0.619868328368,0.783112288547,1.22,0,1000,200,1,randomized_LASSO
-0.208476236462,0.468370989328,1.0464136513,0.0822605369992,0.302679646991,0.411382057681,2.07,0,1000,200,1,randomized_LASSO

From 273f376f30e4b08114183683c60a65dd6c2e5ccc Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Apr 2018 14:32:05 -0700
Subject: [PATCH 599/617] removing ===

---
 selection/SLOPE/slope.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selection/SLOPE/slope.py b/selection/SLOPE/slope.py
index afec70692..31cebedaa 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/SLOPE/slope.py
@@ -295,7 +295,7 @@ def gaussian(X,
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
         return randomized_slope(loglike, np.asarray(feature_weights) / sigma ** 2, ridge_term, randomizer_scale)
-=======
+
 """
 Projection onto selected subgradients of SLOPE
 """

From 08f9a5ffd4e811a8229a3e8c19bbafed3d82a869 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Apr 2018 15:26:30 -0700
Subject: [PATCH 600/617] moving some files around

---
 C-software                                    |   2 +-
 R-software                                    |   2 +-
 selection/SLOPE/__init__.py                   |   0
 selection/SLOPE/tests/__init__.py             |   1 -
 selection/randomized/lasso.py                 |   6 +-
 selection/{SLOPE => randomized}/slope.py      | 147 ++++++++----------
 .../tests/test_slope.py}                      |  10 +-
 .../tests/test_slope_subgrad.py}              |   6 +-
 8 files changed, 74 insertions(+), 100 deletions(-)
 delete mode 100644 selection/SLOPE/__init__.py
 delete mode 100644 selection/SLOPE/tests/__init__.py
 rename selection/{SLOPE => randomized}/slope.py (77%)
 rename selection/{SLOPE/tests/slope_run_test.py => randomized/tests/test_slope.py} (96%)
 rename selection/{SLOPE/tests/projection_subgrad_test.py => randomized/tests/test_slope_subgrad.py} (81%)

diff --git a/C-software b/C-software
index b3acb5740..92d2f9c4a 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit b3acb57407e72605111423af2a4eb0e40cadffa7
+Subproject commit 92d2f9c4ac67aabfab39e67961f7fef3f03611d5
diff --git a/R-software b/R-software
index 8a2a30a5f..9de1b7c4f 160000
--- a/R-software
+++ b/R-software
@@ -1 +1 @@
-Subproject commit 8a2a30a5f14b080e6dea476cfb0dc21d6316afdb
+Subproject commit 9de1b7c4f7b9544262a7168d1717241841742888
diff --git a/selection/SLOPE/__init__.py b/selection/SLOPE/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/selection/SLOPE/tests/__init__.py b/selection/SLOPE/tests/__init__.py
deleted file mode 100644
index 8b1378917..000000000
--- a/selection/SLOPE/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 5a07b5b02..436b7c90b 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -1598,8 +1598,8 @@ def selected_targets(self, features=None, dispersion=None):
             observed_target = self._beta_full[overall]
             crosscov_target_score = score_linear.dot(cov_target)
             Xfeat = X[:, overall]
-            alternatives = [{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][active]] + [
-                                                                                                                       'twosided'] * unpenalized.sum()
+            alternatives = ([{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][active]] + 
+                            ['twosided'] * unpenalized.sum())
 
         else:
 
@@ -1622,7 +1622,6 @@ def selected_targets(self, features=None, dispersion=None):
             dispersion = ((y - self.loglike.saturated_loss.mean_function(
                 Xfeat.dot(observed_target))) ** 2 / self._W).sum() / (n - Xfeat.shape[1])
 
-        print(dispersion, 'dispersion')
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     def full_targets(self, features=None, dispersion=None):
@@ -1690,7 +1689,6 @@ def debiased_targets(self,
             relaxed_soln = self.initial_soln[features] - np.linalg.inv(Qrelax).dot(G[features])
             dispersion = ((y - self.loglike.saturated_loss.mean_function(
                 Xfeat.dot(relaxed_soln))) ** 2 / self._W).sum() / (n - features.sum())
-            #print("dispersion", np.sqrt(dispersion))
         alternatives = ['twosided'] * features.sum()
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
diff --git a/selection/SLOPE/slope.py b/selection/randomized/slope.py
similarity index 77%
rename from selection/SLOPE/slope.py
rename to selection/randomized/slope.py
index 31cebedaa..0b9f335c5 100644
--- a/selection/SLOPE/slope.py
+++ b/selection/randomized/slope.py
@@ -1,37 +1,59 @@
 from __future__ import print_function
+
 import functools
 import numpy as np
-from regreg.atoms.slope import slope
-from selection.randomized.randomization import randomization
+
+# sklearn imports
+
+have_isotonic = False
+try:
+    from sklearn.isotonic import IsotonicRegression
+    have_isotonic = True
+except ImportError:
+    raise ValueError('unable to import isotonic regression from sklearn, SLOPE subgradient projection will not work')
+
+# regreg imports
+
+from regreg.atoms.slope import _basic_proximal_map
 import regreg.api as rr
-from selection.randomized.base import restricted_estimator
-from selection.constraints.affine import constraints
-from selection.randomized.query import (query,
-                                        multiple_queries,
-                                        langevin_sampler,
-                                        affine_gaussian_sampler)
 
-class randomized_slope():
+from ..constraints.affine import constraints
+
+from .randomization import randomization
+from .base import restricted_estimator
+from .lasso import highdim
+from .query import (query,
+                    multiple_queries,
+                    langevin_sampler,
+                    affine_gaussian_sampler)
+
+class slope(highdim):
 
     def __init__(self,
                  loglike,
-                 feature_weights,
+                 slope_weights,
                  ridge_term,
                  randomizer_scale,
                  perturb=None):
         r"""
         Create a new post-selection object for the SLOPE problem
+
         Parameters
         ----------
+
         loglike : `regreg.smooth.glm.glm`
             A (negative) log-likelihood as implemented in `regreg`.
-        feature_weights : np.ndarray
-            Feature weights for L-1 penalty. If a float,
+
+        slope_weights : np.ndarray
+            SLOPE weights for L-1 penalty. If a float,
             it is broadcast to all features.
+
         ridge_term : float
             How big a ridge term to add?
+
         randomizer_scale : float
             Scale for IID components of randomization.
+
         perturb : np.ndarray
             Random perturbation subtracted as a linear
             term in the objective function.
@@ -40,13 +62,13 @@ def __init__(self,
         self.loglike = loglike
         self.nfeature = p = self.loglike.shape[0]
 
-        if np.asarray(feature_weights).shape == ():
-            feature_weights = np.ones(loglike.shape) * feature_weights
-        self.feature_weights = np.asarray(feature_weights)
+        if np.asarray(slope_weights).shape == ():
+            slope_weights = np.ones(loglike.shape) * slope_weights
+        self.slope_weights = np.asarray(slope_weights)
 
         self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
         self.ridge_term = ridge_term
-        self.penalty = slope(feature_weights, lagrange=1.)
+        self.penalty = rr.slope(slope_weights, lagrange=1.)
         self._initial_omega = perturb  # random perturbation
 
     def fit(self,
@@ -65,6 +87,8 @@ def fit(self,
         problem = rr.simple_problem(self.loglike, self.penalty)
         self.initial_soln = problem.solve(quad, **solve_args)
 
+        # now we have to work out SLOPE details, clusters, etc.
+
         active_signs = np.sign(self.initial_soln)
         active = self._active = active_signs != 0
 
@@ -85,7 +109,8 @@ def fit(self,
         sorted_soln = self.initial_soln[indices]
         initial_scalings = np.sort(np.unique(np.fabs(self.initial_soln[active])))[::-1]
         self.observed_opt_state = initial_scalings
-        #print("observed opt state", self.observed_opt_state)
+
+        self._unpenalized = np.zeros(p, np.bool)
 
         _beta_unpenalized = restricted_estimator(self.loglike, self._overall, solve_args=solve_args)
 
@@ -209,16 +234,15 @@ def selective_MLE(self,
         if target == 'selected':
             observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
                                                                                                 dispersion=dispersion)
-
-        # elif target == 'full':
-        #     X, y = self.loglike.data
-        #     n, p = X.shape
-        #     if n > p:
-        #         observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features,
-        #                                                                                         dispersion=dispersion)
-        #     else:
-        #         observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features,
-        #                                                                                             dispersion=dispersion)
+        elif target == 'full':
+            X, y = self.loglike.data
+            n, p = X.shape
+            if n > p:
+                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features,
+                                                                                                dispersion=dispersion)
+            else:
+                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features,
+                                                                                                    dispersion=dispersion)
 
         # working out conditional law of opt variables given
         # target after decomposing score wrt target
@@ -231,54 +255,12 @@ def selective_MLE(self,
 
     # Targets of inference
     # and covariance with score representation
-
-    def selected_targets(self, features=None, dispersion=None):
-
-        X, y = self.loglike.data
-        n, p = X.shape
-
-        if features is None:
-            active = self._active
-            noverall = active.sum()
-            overall = active
-
-            score_linear = self.score_transform[0]
-            Q = -score_linear[overall]
-            cov_target = np.linalg.inv(Q)
-            observed_target = self._beta_full[overall]
-            crosscov_target_score = score_linear.dot(cov_target)
-            Xfeat = X[:, overall]
-            alternatives = [{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][active]] \
-                           + ['twosided']
-
-        else:
-
-            features_b = np.zeros_like(self._overall)
-            features_b[features] = True
-            features = features_b
-
-            Xfeat = X[:, features]
-            Qfeat = Xfeat.T.dot(self._W[:, None] * Xfeat)
-            Gfeat = self.loglike.smooth_objective(self.initial_soln, 'grad')[features]
-            Qfeat_inv = np.linalg.inv(Qfeat)
-            one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat)
-            cov_target = Qfeat_inv
-            _score_linear = -Xfeat.T.dot(self._W[:, None] * X).T
-            crosscov_target_score = _score_linear.dot(cov_target)
-            observed_target = one_step
-            alternatives = ['twosided'] * features.sum()
-
-        if dispersion is None:  # use Pearson's X^2
-            dispersion = ((y - self.loglike.saturated_loss.mean_function(
-                Xfeat.dot(observed_target))) ** 2 / self._W).sum() / (n - Xfeat.shape[1])
-
-        print(dispersion, 'dispersion')
-        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+    # are same as highdim LASSO
 
     @staticmethod
     def gaussian(X,
                  Y,
-                 feature_weights,
+                 slope_weights,
                  sigma=1.,
                  quadratic=None,
                  ridge_term=0.,
@@ -294,21 +276,9 @@ def gaussian(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        return randomized_slope(loglike, np.asarray(feature_weights) / sigma ** 2, ridge_term, randomizer_scale)
-
-"""
-Projection onto selected subgradients of SLOPE
-"""
-import numpy as np
-
-have_isotonic = False
-try:
-    from sklearn.isotonic import IsotonicRegression
-    have_isotonic = True
-except ImportError:
-    raise ValueError('unable to import isotonic regression from sklearn')
+        return slope(loglike, np.asarray(slope_weights) / sigma ** 2, ridge_term, randomizer_scale)
 
-from regreg.atoms.slope import _basic_proximal_map
+# Projection onto selected subgradients of SLOPE
 
 def _projection_onto_selected_subgradients(prox_arg,
                                            weights,
@@ -324,20 +294,27 @@ def _projection_onto_selected_subgradients(prox_arg,
     of this set is p -- the dimensions of the `prox_arg` minus
     the number of unique values in `ordered_clustering` + 1 if the
     last value of the solution was zero (i.e. solution was sparse).
+
     Parameters
     ----------
+
     prox_arg : np.ndarray(p, np.float)
         Point to project
+
     weights : np.ndarray(p, np.float)
         Weights of the SLOPE penalty.
+
     ordering : np.ndarray(p, np.int)
         Order of original argument to SLOPE prox.
         First entry corresponds to largest argument of SLOPE prox.
+
     cluster_sizes : sequence
         Sizes of clusters, starting with
         largest in absolute value.
+
     active_signs : np.ndarray(p, np.int)
          Signs of non-zero coefficients.
+
     last_value_zero : bool
         Is the last solution value equal to 0?
     """
diff --git a/selection/SLOPE/tests/slope_run_test.py b/selection/randomized/tests/test_slope.py
similarity index 96%
rename from selection/SLOPE/tests/slope_run_test.py
rename to selection/randomized/tests/test_slope.py
index 55257be94..13725fa21 100644
--- a/selection/SLOPE/tests/slope_run_test.py
+++ b/selection/randomized/tests/test_slope.py
@@ -12,7 +12,7 @@
 from regreg.atoms.slope import slope
 import regreg.api as rr
 
-from selection.SLOPE.slope import randomized_slope
+from selection.randomized.slope import slope
 import matplotlib.pyplot as plt
 
 def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma = None):
@@ -181,10 +181,10 @@ def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0., ra
                                                           choice_weights="bhq", #put gaussian
                                                           sigma=sigma_)
 
-        conv = randomized_slope.gaussian(X,
-                                         Y,
-                                         r_sigma * r_lambda_seq,
-                                         randomizer_scale=randomizer_scale * sigma_)
+        conv = slope.gaussian(X,
+                              Y,
+                              r_sigma * r_lambda_seq,
+                              randomizer_scale=randomizer_scale * sigma_)
 
         signs = conv.fit()
         nonzero = signs != 0
diff --git a/selection/SLOPE/tests/projection_subgrad_test.py b/selection/randomized/tests/test_slope_subgrad.py
similarity index 81%
rename from selection/SLOPE/tests/projection_subgrad_test.py
rename to selection/randomized/tests/test_slope_subgrad.py
index 0f056e8ec..704d36771 100644
--- a/selection/SLOPE/tests/projection_subgrad_test.py
+++ b/selection/randomized/tests/test_slope_subgrad.py
@@ -1,13 +1,13 @@
 import numpy as np
 
-from selection.SLOPE.slope import _projection_onto_selected_subgradients
+from ..slope import _projection_onto_selected_subgradients
 
 def test_projection():
 
     prox_arg = np.random.normal(0,1,10)
     weights = np.linspace(3, 5, 10)[::-1]
     ordering = np.random.choice(10, 10, replace=False)
-    cluster_sizes= list((2,3,1,1,3))
+    cluster_sizes = [2,3,1,1,3]
     active_signs = np.ones(10)
 
     proj = _projection_onto_selected_subgradients(prox_arg,
@@ -18,4 +18,4 @@ def test_projection():
 
     print("projection", proj)
 
-test_projection()
+

From ce8cbf360a3785174c9d4e289e49ed3541736a8a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 23 Apr 2018 15:33:52 -0700
Subject: [PATCH 601/617] highdim LASSO selective MLE should be reusable

---
 selection/randomized/slope.py | 54 -----------------------------------
 1 file changed, 54 deletions(-)

diff --git a/selection/randomized/slope.py b/selection/randomized/slope.py
index 0b9f335c5..8540b4175 100644
--- a/selection/randomized/slope.py
+++ b/selection/randomized/slope.py
@@ -199,60 +199,6 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                                selection_info=self.selection_variable)
         return active_signs
 
-    def selective_MLE(self,
-                      target="selected",
-                      features=None,
-                      parameter=None,
-                      level=0.9,
-                      compute_intervals=False,
-                      dispersion=None,
-                      solve_args={'tol': 1.e-12}):
-        """
-        Parameters
-        ----------
-        target : one of ['selected', 'full']
-        features : np.bool
-            Binary encoding of which features to use in final
-            model and targets.
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-        level : float
-            Confidence level.
-        ndraw : int (optional)
-            Defaults to 1000.
-        burnin : int (optional)
-            Defaults to 1000.
-        compute_intervals : bool
-            Compute confidence intervals?
-        dispersion : float (optional)
-            Use a known value for dispersion, or Pearson's X^2?
-        """
-
-        if parameter is None:
-            parameter = np.zeros(self.loglike.shape[0])
-
-        if target == 'selected':
-            observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
-                                                                                                dispersion=dispersion)
-        elif target == 'full':
-            X, y = self.loglike.data
-            n, p = X.shape
-            if n > p:
-                observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features,
-                                                                                                dispersion=dispersion)
-            else:
-                observed_target, cov_target, cov_target_score, alternatives = self.debiased_targets(features=features,
-                                                                                                    dispersion=dispersion)
-
-        # working out conditional law of opt variables given
-        # target after decomposing score wrt target
-
-        return self.sampler.selective_MLE(observed_target,
-                                          cov_target,
-                                          cov_target_score,
-                                          self.observed_opt_state,
-                                          solve_args=solve_args)
-
     # Targets of inference
     # and covariance with score representation
     # are same as highdim LASSO

From 6fdb30b69d1d0e3ddf67ba8a8c38adcb7930cc3d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 24 Apr 2018 16:25:28 -0700
Subject: [PATCH 602/617] logistic debiased liu tests

---
 selection/algorithms/tests/test_compareR.py | 256 ++++++++++++++------
 1 file changed, 179 insertions(+), 77 deletions(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index 63ffa51e2..e7d9d7192 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -13,7 +13,7 @@
 
 from ..lasso import lasso, lasso_full
 from ..forward_step import forward_step
-from ...tests.instance import gaussian_instance
+from ...tests.instance import gaussian_instance, logistic_instance
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_fixed_lambda():
@@ -496,82 +496,184 @@ def test_solve_QP():
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_full_lasso_tall():
     n, p, s = 200, 100, 10
-    X, y, _, _, sigma = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)
-
-    lam = 4. * np.sqrt(n)
-    X *= np.sqrt(n)
-    L = lasso_full.gaussian(X, y, lam)
-    L.fit()
-    if len(L.active) > 0:
-        S = L.summary(compute_intervals=False)
-        numpy2ri.activate()
-
-        rpy.r.assign("X", X)
-        rpy.r.assign("y", y)
-        rpy.r.assign("lam", lam)
-        rpy.r("""
-        y = as.numeric(y)
-        n = nrow(X)
-        p = ncol(X)
-        sigma_est = sigma(lm(y ~ X - 1))
-        print(sigma_est)
-        penalty_factor = rep(1, p);
-        lam = lam / n;
-        soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="ls")
-        print(lam)
-        print(soln)
-        PVS = selectiveInference:::inference_group_lasso(X, y, 
-                                                         soln, groups=1:ncol(X), 
-                                                         lambda=lam, penalty_factor=penalty_factor, 
-                                                         sigma_est, loss="ls", algo="Q", 
-                                                         construct_ci=FALSE)
-        active_vars=PVS$active_vars - 1 # for 0-based
-        pvalues = PVS$pvalues
-        """)
-        pvalues = rpy.r('pvalues')
-        active_set = rpy.r('active_vars')
-
-        nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
-
-        numpy2ri.deactivate()
+
+    while True:
+
+        X, y, _, _, sigma = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4, sigma=1.)
+
+        lam = 4. * np.sqrt(n)
+        X *= np.sqrt(n)
+        L = lasso_full.gaussian(X, y, lam)
+        L.fit()
+        if len(L.active) > 0:
+            S = L.summary(compute_intervals=False, dispersion=sigma**2)
+            numpy2ri.activate()
+
+            rpy.r.assign('sigma_est', sigma)
+            rpy.r.assign("X", X)
+            rpy.r.assign("y", y)
+            rpy.r.assign("lam", lam)
+            rpy.r("""
+            y = as.numeric(y)
+            n = nrow(X)
+            p = ncol(X)
+            #sigma_est = sigma(lm(y ~ X - 1))
+            penalty_factor = rep(1, p);
+            lam = lam / n;
+            soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="ls")
+            PVS = selectiveInference:::inference_debiased_full(X, y, 
+                                                             soln, 
+                                                             lambda=lam, penalty_factor=penalty_factor, 
+                                                             sigma_est=sigma_est, loss="ls", algo="Q", 
+                                                             construct_ci=FALSE)
+            active_vars=PVS$active_vars - 1 # for 0-based
+            pvalues = PVS$pvalues
+            """)
+            pvalues = rpy.r('pvalues')
+            active_set = rpy.r('active_vars')
+
+            print(pvalues)
+            print(S['pval'])
+            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+
+            numpy2ri.deactivate()
+            break
+
+@np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
+def test_full_lasso_tall_logistic():
+    n, p, s = 200, 100, 10
+    
+    while True:
+
+        X, y, _, _ = logistic_instance(n=n, p=p, s=s, equicorrelated=False, signal=10)
+
+        lam = 2. * np.sqrt(n)
+        X *= np.sqrt(n)
+        L = lasso_full.logistic(X, y, lam)
+        L.fit()
+        if len(L.active) > 0:
+            S = L.summary(compute_intervals=False)
+            numpy2ri.activate()
+
+            rpy.r.assign("X", X)
+            rpy.r.assign("y", y)
+            rpy.r.assign("lam", lam)
+            rpy.r("""
+            y = as.numeric(y)
+            n = nrow(X)
+            p = ncol(X)
+            sigma_est = sigma(lm(y ~ X - 1))
+            print(sigma_est)
+            penalty_factor = rep(1, p);
+            lam = lam / n;
+            soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="logit")
+            PVS = selectiveInference:::inference_debiased_full(X, y, 
+                                                             soln, 
+                                                             lambda=lam, penalty_factor=penalty_factor, 
+                                                             sigma_est, loss="logit", algo="glmnet", 
+                                                             construct_ci=FALSE)
+            active_vars=PVS$active_vars - 1 # for 0-based
+            pvalues = PVS$pvalues
+            """)
+            pvalues = rpy.r('pvalues')
+            active_set = rpy.r('active_vars')
+
+            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+
+            numpy2ri.deactivate()
+            break 
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_full_lasso_wide():
-    n, p, s = 30, 50, 10
-    X, y, _, _, sigma = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)
-
-    lam = 6. * np.sqrt(n)
-    X *= np.sqrt(n)
-    L = lasso_full.gaussian(X, y, lam)
-    L.fit()
-
-    if len(L.active) > 0:
-        S = L.summary(compute_intervals=False, dispersion=sigma**2)
-        numpy2ri.activate()
-
-        rpy.r.assign("X", X)
-        rpy.r.assign("y", y)
-        rpy.r.assign("sigma_est", sigma)
-        rpy.r.assign("lam", lam)
-        rpy.r("""
-
-        y = as.numeric(y)
-        n = nrow(X)
-        p = ncol(X)
-
-        penalty_factor = rep(1, p);
-        lam = lam / n;
-        soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="ls")
-        PVS = selectiveInference:::inference_group_lasso(X, y, 
-                                                         soln, groups=1:ncol(X), 
-                                                         lambda=lam, penalty_factor=penalty_factor, 
-                                                         sigma_est, loss="ls", algo="glmnet", 
-                                                         construct_ci=FALSE)
-        active_vars=PVS$active_vars - 1 # for 0-based
-        pvalues = PVS$pvalues
-        """)
-        pvalues = rpy.r('pvalues')
-        active_set = rpy.r('active_vars')
-
-        nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
-        numpy2ri.deactivate()
+    n, p, s = 30, 60, 15
+
+    while True:
+        X, y, _, _, sigma = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)
+
+        lam = 1. * np.sqrt(n)
+        X *= np.sqrt(n)
+        L = lasso_full.gaussian(X, y, lam)
+        L.fit()
+
+        if len(L.active) > 0:
+            S = L.summary(compute_intervals=False, dispersion=sigma**2)
+            numpy2ri.activate()
+
+            rpy.r.assign("X", X)
+            rpy.r.assign("y", y)
+            rpy.r.assign("sigma_est", sigma)
+            rpy.r.assign("lam", lam)
+            rpy.r("""
+
+            y = as.numeric(y)
+            n = nrow(X)
+            p = ncol(X)
+
+            penalty_factor = rep(1, p);
+            lam = lam / n;
+            soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="ls")
+            PVS = selectiveInference:::inference_debiased_full(X, y, 
+                                                             soln, 
+                                                             lambda=lam, penalty_factor=penalty_factor, 
+                                                             sigma_est, loss="ls", algo="glmnet", 
+                                                             construct_ci=FALSE)
+            active_vars=PVS$active_vars - 1 # for 0-based
+            pvalues = PVS$pvalues
+            """)
+            pvalues = rpy.r('pvalues')
+            active_set = rpy.r('active_vars')
+
+            import sys
+            sys.stderr.write(repr(pvalues))
+            sys.stderr.write(repr(S['pval']))
+
+            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+            numpy2ri.deactivate()
+            break
+
+@np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
+def test_full_lasso_wide_logistic():
+    n, p, s = 30, 60, 15
+
+    while True:
+        X, y, _, _ = logistic_instance(n=n, p=p, s=s, equicorrelated=False, signal=10)
+
+        lam = 1. * np.sqrt(n)
+        X *= np.sqrt(n)
+        L = lasso_full.logistic(X, y, lam)
+        L.fit()
+
+        if len(L.active) > 0:
+            S = L.summary(compute_intervals=False, dispersion=1.)
+            numpy2ri.activate()
+
+            rpy.r.assign("X", X)
+            rpy.r.assign("y", y)
+            rpy.r.assign("lam", lam)
+            rpy.r("""
+
+            y = as.numeric(y)
+            n = nrow(X)
+            p = ncol(X)
+
+            penalty_factor = rep(1, p);
+            lam = lam / n;
+            soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="logit")
+            PVS = selectiveInference:::inference_debiased_full(X, y, 
+                                                             soln, 
+                                                             lambda=lam, penalty_factor=penalty_factor, 
+                                                             sigma_est=1., loss="logit", algo="glmnet", 
+                                                             construct_ci=FALSE)
+            active_vars=PVS$active_vars - 1 # for 0-based
+            pvalues = PVS$pvalues
+            """)
+            pvalues = rpy.r('pvalues')
+            active_set = rpy.r('active_vars')
+
+            import sys
+            sys.stderr.write(repr(pvalues))
+            sys.stderr.write(repr(S['pval']))
+
+            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+            numpy2ri.deactivate()
+            break

From 76fd48666829eba313e2c3995c257c10fe2234b3 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 26 Apr 2018 10:04:21 -0700
Subject: [PATCH 603/617] need to figure out what score is

---
 selection/randomized/marginal_screening.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/selection/randomized/marginal_screening.py b/selection/randomized/marginal_screening.py
index 7fa67886d..9279f2790 100644
--- a/selection/randomized/marginal_screening.py
+++ b/selection/randomized/marginal_screening.py
@@ -10,6 +10,8 @@
                                         langevin_sampler,
                                         affine_gaussian_sampler)
 
+from ..algorithms.debiased_lasso import debiasing_matrix
+
 class marginal_screening():
 
     def __init__(self,
@@ -18,7 +20,7 @@ def __init__(self,
                  randomizer_scale,
                  perturb=None):
 
-        self.nfeature =  p = score.shape[0]
+        self.nfeature =  p = observed_score.shape[0]
         if np.asarray(threshold).shape == ():
             threshold = np.ones(p) * threshold
         self.threshold = np.asarray(threshold)
@@ -44,7 +46,7 @@ def fit(self, perturb=None):
         active_signs = np.sign(randomized_score[self.boundary])
 
         self.observed_opt_state = self._initial_omega[self.boundary] + self.observed_score[self.boundary] - \
-                                  np.diag(active_signs)* self.threshold[self.boundary]
+                                  np.diag(active_signs).dot(self.threshold[self.boundary])
         self.num_opt_var = self.observed_opt_state.shape[0]
 
         opt_linear = np.zeros((p, self.num_opt_var))
@@ -266,7 +268,7 @@ def gaussian(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        return marginal_screening(-X.dot(Y), threshold, randomizer_scale)
+        return marginal_screening(X.dot(Y), threshold, randomizer_scale)
 
 
 

From 780b86cb7551249bf8fa6615074f7c0d0e4fcd9a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Thu, 26 Apr 2018 23:20:04 -0700
Subject: [PATCH 604/617] commit changes so far

---
 .../tests/test_inferential_metrics.py         | 57 +++++++++++--------
 selection/randomized/marginal_screening.py    | 45 +++++++++++----
 .../randomized/tests/test_selectiveMLE_BH.py  | 46 +++++++++++++++
 3 files changed, 111 insertions(+), 37 deletions(-)
 create mode 100644 selection/randomized/tests/test_selectiveMLE_BH.py

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index ffac8d21e..33ad55b31 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -347,28 +347,36 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
         nactive_LASSO = active_LASSO.sum()
 
         tune_num = 50
+        rand_tune_num = 10
+        rand_scale_seq = np.linspace(0.05, 0.5, num = rand_tune_num)
         lam_seq = sigma_ * np.linspace(0.25, 2.75, num=tune_num) * \
                   np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-        err = np.zeros(tune_num)
-        for k in range(tune_num):
-            W = lam_seq[k]*np.ones(p)
-            conv = highdim.gaussian(X,
-                                    y,
-                                    W,
-                                    randomizer_scale=np.sqrt(n) * 
-                                    randomizer_scale * sigma_)
-            signs = conv.fit()
-            nonzero = signs != 0
-            if tuning == "selective_MLE":
-                estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
-                full_estimate = np.zeros(p)
-                full_estimate[nonzero] = estimate
-                err[k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
-            elif tuning == "randomized_LASSO":
-                err[k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
-
-        lam = lam_seq[np.argmin(err)]
+        err = np.zeros((rand_tune_num, tune_num))
+        for l in range(rand_tune_num):
+            randomizer_scale = rand_scale_seq[l]
+            for k in range(tune_num):
+                W = lam_seq[k] * np.ones(p)
+                conv = highdim.gaussian(X,
+                                        y,
+                                        W,
+                                        randomizer_scale=np.sqrt(n) *
+                                                         randomizer_scale * sigma_)
+                signs = conv.fit()
+                nonzero = signs != 0
+                if tuning == "selective_MLE":
+                    estimate, _, _, _, _, _ = conv.selective_MLE(target=target, dispersion=dispersion)
+                    full_estimate = np.zeros(p)
+                    full_estimate[nonzero] = estimate
+                    err[l, k] = np.mean((y_val - X_val.dot(full_estimate)) ** 2.)
+                elif tuning == "randomized_LASSO":
+                    err[l, k] = np.mean((y_val - X_val.dot(conv.initial_soln)) ** 2.)
+
+        arg_min = np.argwhere(err == np.min(err))
+        lam = lam_seq[arg_min[0, 1]]
+        randomizer_scale = rand_scale_seq[arg_min[0, 0]]
+        #lam = lam_seq[np.argmin(err)]
         sys.stderr.write("lambda from randomized LASSO " + str(lam) + "\n")
+        sys.stderr.write("tuned randomized scale " + str(randomizer_scale) + "\n")
         #print(lam_tuned_lasso * n, lam, lam_seq)
 
         randomized_lasso = highdim.gaussian(X,
@@ -482,20 +490,19 @@ def comparison_risk_inference_full(n=200, p=500, nval=200, rho=0.35, s=5, beta_t
 
 if __name__ == "__main__":
 
-    ndraw = 1
+    ndraw = 50
     output_overall = np.zeros(27)
 
-    target = "full"
+    target = "selected"
     tuning = "selective_MLE"
-    n, p, rho, s, beta_type, snr = 500, 100, 0.35, 5, 1, 0.30
+    n, p, rho, s, beta_type, snr = 500, 100, 0.70, 5, 1, 0.10
+    #nval = 100
 
     if target == "selected":
         for i in range(ndraw):
             output = comparison_risk_inference_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
-                                                        randomizer_scale=np.sqrt(0.5), target=target, tuning= tuning,
+                                                        randomizer_scale=np.sqrt(0.25), target=target, tuning= tuning,
                                                         full_dispersion=True)
-
-            print("output", output)
             output_overall += np.squeeze(output)
 
             sys.stderr.write("overall selMLE risk " + str(output_overall[0] / float(i + 1)) + "\n")
diff --git a/selection/randomized/marginal_screening.py b/selection/randomized/marginal_screening.py
index 9279f2790..dae12d248 100644
--- a/selection/randomized/marginal_screening.py
+++ b/selection/randomized/marginal_screening.py
@@ -9,26 +9,41 @@
                                         multiple_queries,
                                         langevin_sampler,
                                         affine_gaussian_sampler)
+from scipy.stats import norm as ndist
 
 from ..algorithms.debiased_lasso import debiasing_matrix
 
-class marginal_screening():
+def BH_selection(p_values, level):
+
+    m = p_values.shape[0]
+    p_sorted = np.sort(p_values)
+    indices = np.arange(m)
+    indices_order = np.argsort(p_values)
+    order_sig = np.max(indices[p_sorted - np.true_divide(level * (np.arange(m) + 1.), m) <= 0])
+    E_sel = indices_order[:(order_sig+1)]
+
+    active = np.zeros(m, np.bool)
+    active[E_sel] = 1
+    return order_sig+1, active, p_values[indices_order[order_sig+1]]
+
+class BH():
 
     def __init__(self,
                  observed_score,
-                 threshold,
+                 sigma_hat,
                  randomizer_scale,
+                 level,
                  perturb=None):
 
         self.nfeature =  p = observed_score.shape[0]
-        if np.asarray(threshold).shape == ():
-            threshold = np.ones(p) * threshold
-        self.threshold = np.asarray(threshold)
+        self.sigma_hat = sigma_hat
 
         self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
         self._initial_omega = perturb
         self.observed_score = observed_score
 
+        self.level = level
+
     def fit(self, perturb=None):
 
         p = self.nfeature
@@ -39,13 +54,17 @@ def fit(self, perturb=None):
         if self._initial_omega is None:
             self._initial_omega = self.randomizer.sample()
 
-        randomized_score = self.observed_score + self._initial_omega
+        randomized_score = -self.observed_score + self._initial_omega
+        p_values = 2. * (1. - ndist.cdf(np.true_divide(np.abs(randomized_score),self.sigma_hat)))
+        K, active, p_threshold = BH_selection(p_values, self.level)
+        threshold = self.sigma_hat * ndist.ppf(1. - max((K * self.level) / p, p_threshold))
+        self.threshold = threshold
 
         self.boundary = np.fabs(randomized_score) > self.threshold
         self.interior = ~self.boundary
         active_signs = np.sign(randomized_score[self.boundary])
 
-        self.observed_opt_state = self._initial_omega[self.boundary] + self.observed_score[self.boundary] - \
+        self.observed_opt_state = self._initial_omega[self.boundary] - self.observed_score[self.boundary] - \
                                   np.diag(active_signs).dot(self.threshold[self.boundary])
         self.num_opt_var = self.observed_opt_state.shape[0]
 
@@ -53,7 +72,7 @@ def fit(self, perturb=None):
         opt_linear[self.boundary, :] = np.diag(active_signs)
         opt_offset = np.zeros(p)
         opt_offset[self.boundary] = active_signs * self.threshold[self.boundary]
-        opt_offset[self.interior] = self._initial_omega[self.interior] + self.observed_score[self.interior]
+        opt_offset[self.interior] = self._initial_omega[self.interior] - self.observed_score[self.interior]
         self.opt_transform = (opt_linear, opt_offset)
 
         cov, prec = self.randomizer.cov_prec
@@ -87,7 +106,7 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
                                                log_density,
                                                logdens_transform,
                                                selection_info=self.selection_variable)
-        return active_signs
+        return self.boundary
 
 
     def selective_MLE(self,
@@ -258,8 +277,8 @@ def debiased_targets(self,
     @staticmethod
     def gaussian(X,
                  Y,
-                 threshold,
-                 sigma=1.,
+                 sigma = 1.,
+                 level = 0.10,
                  randomizer_scale=None):
 
         n, p = X.shape
@@ -268,7 +287,9 @@ def gaussian(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        return marginal_screening(X.dot(Y), threshold, randomizer_scale)
+        sigma_hat = np.sqrt((sigma **2.) * (np.mean((X ** 2).sum(0))) + (randomizer_scale**2.))
+
+        return BH(-X.dot(Y), sigma_hat, randomizer_scale, level)
 
 
 
diff --git a/selection/randomized/tests/test_selectiveMLE_BH.py b/selection/randomized/tests/test_selectiveMLE_BH.py
new file mode 100644
index 000000000..d768dc6ca
--- /dev/null
+++ b/selection/randomized/tests/test_selectiveMLE_BH.py
@@ -0,0 +1,46 @@
+import numpy as np
+from selection.randomized.marginal_screening import marginal_screening
+from selection.tests.instance import gaussian_instance
+
+def test_full_targets(n=500, p=100, signal_fac=1.1, s=5, sigma=3, rho=0.4, randomizer_scale=0.25,
+                      full_dispersion=True):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst = gaussian_instance
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    idx = np.arange(p)
+    sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
+    print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n))
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+
+    conv = marginal_screening.gaussian(X,
+                                       Y,
+                                       sigma = sigma_,
+                                       randomizer_scale=randomizer_scale * sigma_)
+
+    boundary = conv.fit()
+    nonzero = boundary != 0
+    print("dimensions", n, p, nonzero.sum())
+
+    dispersion = None
+    if full_dispersion:
+        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+    estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="full", dispersion=dispersion)
+
+    coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1])
+    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals

From 7033ebc99933b76f9ac39900eecb079f77670612 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 27 Apr 2018 11:26:27 -0700
Subject: [PATCH 605/617] BH needs debugging

---
 selection/randomized/marginal_screening.py    | 216 +++++++++---------
 .../randomized/tests/test_selectiveMLE_BH.py  |  30 ++-
 2 files changed, 124 insertions(+), 122 deletions(-)

diff --git a/selection/randomized/marginal_screening.py b/selection/randomized/marginal_screening.py
index dae12d248..f5fdd9b8d 100644
--- a/selection/randomized/marginal_screening.py
+++ b/selection/randomized/marginal_screening.py
@@ -21,20 +21,23 @@ def BH_selection(p_values, level):
     indices_order = np.argsort(p_values)
     order_sig = np.max(indices[p_sorted - np.true_divide(level * (np.arange(m) + 1.), m) <= 0])
     E_sel = indices_order[:(order_sig+1)]
+    not_sel =indices_order[(order_sig+1):]
 
     active = np.zeros(m, np.bool)
     active[E_sel] = 1
-    return order_sig+1, active, p_values[indices_order[order_sig+1]]
+    return order_sig+1, active, np.argsort(p_values[np.sort(not_sel)])
 
 class BH():
 
     def __init__(self,
-                 observed_score,
+                 X,
+                 Y,
                  sigma_hat,
                  randomizer_scale,
                  level,
                  perturb=None):
 
+        observed_score = -X.T.dot(Y)
         self.nfeature =  p = observed_score.shape[0]
         self.sigma_hat = sigma_hat
 
@@ -43,6 +46,7 @@ def __init__(self,
         self.observed_score = observed_score
 
         self.level = level
+        self.data = (X, Y)
 
     def fit(self, perturb=None):
 
@@ -56,13 +60,25 @@ def fit(self, perturb=None):
 
         randomized_score = -self.observed_score + self._initial_omega
         p_values = 2. * (1. - ndist.cdf(np.true_divide(np.abs(randomized_score),self.sigma_hat)))
-        K, active, p_threshold = BH_selection(p_values, self.level)
-        threshold = self.sigma_hat * ndist.ppf(1. - max((K * self.level) / p, p_threshold))
-        self.threshold = threshold
+        K, active, sort_notsel_pvals = BH_selection(p_values, self.level)
+        BH_cutoff = self.sigma_hat * ndist.ppf(1. - (K * self.level) /(2.*p))
+        if np.array(BH_cutoff).shape in [(), (1,)]:
+            BH_cutoff = np.ones(p) * BH_cutoff
+        self.BH_cutoff = BH_cutoff
 
-        self.boundary = np.fabs(randomized_score) > self.threshold
+        self.boundary = np.fabs(randomized_score) > self.BH_cutoff
         self.interior = ~self.boundary
         active_signs = np.sign(randomized_score[self.boundary])
+        signs = np.sign(randomized_score)
+
+        self.selection_variable = {'sign': signs.copy(),
+                                   'variables': self.boundary.copy()}
+
+        threshold = np.zeros(p)
+        threshold[self.boundary] = self.BH_cutoff[self.boundary]
+        cut_off_vector = ndist.ppf(1. - ((K+np.arange(self.interior.sum())+1) * self.level) /(2.*p))
+        (threshold[self.interior])[sort_notsel_pvals] = (self.sigma_hat[self.interior])[sort_notsel_pvals] * cut_off_vector
+        self.threshold = threshold
 
         self.observed_opt_state = self._initial_omega[self.boundary] - self.observed_score[self.boundary] - \
                                   np.diag(active_signs).dot(self.threshold[self.boundary])
@@ -139,14 +155,14 @@ def selective_MLE(self,
         """
 
         if parameter is None:
-            parameter = np.zeros(self.loglike.shape[0])
+            parameter = np.zeros(self.nfeature)
 
         if target == 'selected':
             observed_target, cov_target, cov_target_score, alternatives = self.selected_targets(features=features,
                                                                                                 dispersion=dispersion)
 
         elif target == 'full':
-            X, y = self.loglike.data
+            X, y = self.data
             n, p = X.shape
             if n > p:
                 observed_target, cov_target, cov_target_score, alternatives = self.full_targets(features=features,
@@ -164,115 +180,87 @@ def selective_MLE(self,
 
     def selected_targets(self, features=None, dispersion=None):
 
-        X, y = self.loglike.data
+        X, y = self.data
         n, p = X.shape
 
-        if features is None:
-            active = self._active
-            unpenalized = self._unpenalized
-            noverall = active.sum() + unpenalized.sum()
-            overall = active + unpenalized
-
-            score_linear = self.score_transform[0]
-            Q = -score_linear[overall]
-            cov_target = np.linalg.inv(Q)
-            observed_target = self._beta_full[overall]
-            crosscov_target_score = score_linear.dot(cov_target)
-            Xfeat = X[:, overall]
-            alternatives = [{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][active]] + [
-                                                                                                                       'twosided'] * unpenalized.sum()
-
-        else:
-
-            features_b = np.zeros_like(self._overall)
-            features_b[features] = True
-            features = features_b
-
-            Xfeat = X[:, features]
-            Qfeat = Xfeat.T.dot(self._W[:, None] * Xfeat)
-            Gfeat = self.loglike.smooth_objective(self.initial_soln, 'grad')[features]
-            Qfeat_inv = np.linalg.inv(Qfeat)
-            one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat)
-            cov_target = Qfeat_inv
-            _score_linear = -Xfeat.T.dot(self._W[:, None] * X).T
-            crosscov_target_score = _score_linear.dot(cov_target)
-            observed_target = one_step
-            alternatives = ['twosided'] * features.sum()
-
-        if dispersion is None:  # use Pearson's X^2
-            dispersion = ((y - self.loglike.saturated_loss.mean_function(
-                Xfeat.dot(observed_target))) ** 2 / self._W).sum() / (n - Xfeat.shape[1])
+        overall = self.boundary
+        score_linear = -X.T.dot(X[:, overall])
+        Q = -score_linear[overall]
+        cov_target = np.linalg.inv(Q)
+        observed_target = np.linalg.inv(Q).dot(X[:, overall].T.dot(y))
+        crosscov_target_score = score_linear.dot(cov_target)
+        alternatives = ([{1: 'greater', -1: 'less'}[int(s)] for s in self.selection_variable['sign'][self.boundary]])
 
         return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
-    def full_targets(self, features=None, dispersion=None):
-
-        if features is None:
-            features = self._overall
-        features_bool = np.zeros(self._overall.shape, np.bool)
-        features_bool[features] = True
-        features = features_bool
-
-        X, y = self.loglike.data
-        n, p = X.shape
-
-        # target is one-step estimator
-
-        Qfull = X.T.dot(self._W[:, None] * X)
-        G = self.loglike.smooth_objective(self.initial_soln, 'grad')
-        Qfull_inv = np.linalg.inv(Qfull)
-        one_step = self.initial_soln - Qfull_inv.dot(G)
-        cov_target = Qfull_inv[features][:, features]
-        observed_target = one_step[features]
-        crosscov_target_score = np.zeros((p, cov_target.shape[0]))
-        crosscov_target_score[features] = -np.identity(cov_target.shape[0])
-
-        if dispersion is None:  # use Pearson's X^2
-            dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step))) ** 2 / self._W).sum() / (
-            n - p)
-
-        alternatives = ['twosided'] * features.sum()
-        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
-
-    def debiased_targets(self,
-                         features=None,
-                         dispersion=None,
-                         debiasing_args={}):
-
-        if features is None:
-            features = self._overall
-        features_bool = np.zeros(self._overall.shape, np.bool)
-        features_bool[features] = True
-        features = features_bool
-
-        X, y = self.loglike.data
-        n, p = X.shape
-
-        # target is one-step estimator
-
-        G = self.loglike.smooth_objective(self.initial_soln, 'grad')
-        Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(self._W)[:, None],
-                                                  np.nonzero(features)[0],
-                                                  **debiasing_args)) / n
-        observed_target = self.initial_soln[features] - Qinv_hat.dot(G)
-        if p > n:
-            M1 = Qinv_hat.dot(X.T)
-            cov_target = (M1 * self._W[None, :]).dot(M1.T)
-            crosscov_target_score = -(M1 * self._W[None, :]).dot(X).T
-        else:
-            Qfull = X.T.dot(self._W[:, None] * X)
-            cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T))
-            crosscov_target_score = -Qinv_hat.dot(Qfull).T
-
-        if dispersion is None:  # use Pearson's X^2
-            Xfeat = X[:, features]
-            Qrelax = Xfeat.T.dot(self._W[:, None] * Xfeat)
-            relaxed_soln = self.initial_soln[features] - np.linalg.inv(Qrelax).dot(G[features])
-            dispersion = ((y - self.loglike.saturated_loss.mean_function(
-                Xfeat.dot(relaxed_soln))) ** 2 / self._W).sum() / (n - features.sum())
-
-        alternatives = ['twosided'] * features.sum()
-        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+    # def full_targets(self, features=None, dispersion=None):
+    #
+    #     if features is None:
+    #         features = self.boundary
+    #     features_bool = np.zeros(self.boundary.shape, np.bool)
+    #     features_bool[features] = True
+    #     features = features_bool
+    #
+    #     X, y = self.data
+    #     n, p = X.shape
+    #
+    #     # target is one-step estimator
+    #
+    #     Qfull = X.T.dot(self._W[:, None] * X)
+    #     G = self.loglike.smooth_objective(self.initial_soln, 'grad')
+    #     Qfull_inv = np.linalg.inv(Qfull)
+    #     one_step = self.initial_soln - Qfull_inv.dot(G)
+    #     cov_target = Qfull_inv[features][:, features]
+    #     observed_target = one_step[features]
+    #     crosscov_target_score = np.zeros((p, cov_target.shape[0]))
+    #     crosscov_target_score[features] = -np.identity(cov_target.shape[0])
+    #
+    #     if dispersion is None:  # use Pearson's X^2
+    #         dispersion = ((y - self.loglike.saturated_loss.mean_function(X.dot(one_step))) ** 2 / self._W).sum() / (
+    #         n - p)
+    #
+    #     alternatives = ['twosided'] * features.sum()
+    #     return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+    #
+    # def debiased_targets(self,
+    #                      features=None,
+    #                      dispersion=None,
+    #                      debiasing_args={}):
+    #
+    #     if features is None:
+    #         features = self._overall
+    #     features_bool = np.zeros(self._overall.shape, np.bool)
+    #     features_bool[features] = True
+    #     features = features_bool
+    #
+    #     X, y = self.data
+    #     n, p = X.shape
+    #
+    #     # target is one-step estimator
+    #
+    #     G = self.loglike.smooth_objective(self.initial_soln, 'grad')
+    #     Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(self._W)[:, None],
+    #                                               np.nonzero(features)[0],
+    #                                               **debiasing_args)) / n
+    #     observed_target = self.initial_soln[features] - Qinv_hat.dot(G)
+    #     if p > n:
+    #         M1 = Qinv_hat.dot(X.T)
+    #         cov_target = (M1 * self._W[None, :]).dot(M1.T)
+    #         crosscov_target_score = -(M1 * self._W[None, :]).dot(X).T
+    #     else:
+    #         Qfull = X.T.dot(self._W[:, None] * X)
+    #         cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T))
+    #         crosscov_target_score = -Qinv_hat.dot(Qfull).T
+    #
+    #     if dispersion is None:  # use Pearson's X^2
+    #         Xfeat = X[:, features]
+    #         Qrelax = Xfeat.T.dot(self._W[:, None] * Xfeat)
+    #         relaxed_soln = self.initial_soln[features] - np.linalg.inv(Qrelax).dot(G[features])
+    #         dispersion = ((y - self.loglike.saturated_loss.mean_function(
+    #             Xfeat.dot(relaxed_soln))) ** 2 / self._W).sum() / (n - features.sum())
+    #
+    #     alternatives = ['twosided'] * features.sum()
+    #     return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
 
     @staticmethod
     def gaussian(X,
@@ -287,9 +275,9 @@ def gaussian(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        sigma_hat = np.sqrt((sigma **2.) * (np.mean((X ** 2).sum(0))) + (randomizer_scale**2.))
+        sigma_hat = np.sqrt((sigma ** 2.) * (np.diag(X.T.dot(X))) + (randomizer_scale**2.))
 
-        return BH(-X.dot(Y), sigma_hat, randomizer_scale, level)
+        return BH(X, Y, sigma_hat, randomizer_scale, level)
 
 
 
diff --git a/selection/randomized/tests/test_selectiveMLE_BH.py b/selection/randomized/tests/test_selectiveMLE_BH.py
index d768dc6ca..6fb07771a 100644
--- a/selection/randomized/tests/test_selectiveMLE_BH.py
+++ b/selection/randomized/tests/test_selectiveMLE_BH.py
@@ -1,9 +1,9 @@
 import numpy as np
-from selection.randomized.marginal_screening import marginal_screening
+from selection.randomized.marginal_screening import BH
 from selection.tests.instance import gaussian_instance
 
-def test_full_targets(n=500, p=100, signal_fac=1.1, s=5, sigma=3, rho=0.4, randomizer_scale=0.25,
-                      full_dispersion=True):
+def test_selected_targets(n=500, p=100, signal_fac=1.6, s=5, sigma=3, rho=0.4, randomizer_scale=0.25,
+                          full_dispersion=True):
     """
     Compare to R randomized lasso
     """
@@ -27,10 +27,10 @@ def test_full_targets(n=500, p=100, signal_fac=1.1, s=5, sigma=3, rho=0.4, rando
 
     sigma_ = np.std(Y)
 
-    conv = marginal_screening.gaussian(X,
-                                       Y,
-                                       sigma = sigma_,
-                                       randomizer_scale=randomizer_scale * sigma_)
+    conv = BH.gaussian(X,
+                       Y,
+                       sigma = sigma_,
+                       randomizer_scale=randomizer_scale * sigma_)
 
     boundary = conv.fit()
     nonzero = boundary != 0
@@ -40,7 +40,21 @@ def test_full_targets(n=500, p=100, signal_fac=1.1, s=5, sigma=3, rho=0.4, rando
     if full_dispersion:
         dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
-    estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="full", dispersion=dispersion)
+    estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=dispersion)
 
     coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1])
+    print("coverage for selected target", coverage.sum()/float(nonzero.sum()))
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
+
+def main(nsim=100):
+
+    P0, PA, cover, length_int = [], [], [], []
+    for i in range(nsim):
+        p0, pA, cover_, intervals = test_selected_targets()
+
+        cover.extend(cover_)
+        P0.extend(p0)
+        PA.extend(pA)
+        print(np.mean(cover),'coverage so far')
+
+main()

From 5f45a52b26bcedebe712a611d3f960614b5fa7fe Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 27 Apr 2018 16:01:47 -0700
Subject: [PATCH 606/617] affine version of barrier problem

---
 selection/algorithms/tests/test_compareR.py   |  22 ++-
 selection/randomized/selective_MLE_utils.pyx  |  83 +++++++++
 .../randomized/tests/test_selective_MLE.py    |  41 ++++-
 selection/randomized/tests/test_slope.py      | 162 +++++++++---------
 4 files changed, 215 insertions(+), 93 deletions(-)

diff --git a/selection/algorithms/tests/test_compareR.py b/selection/algorithms/tests/test_compareR.py
index e7d9d7192..be1b5c039 100644
--- a/selection/algorithms/tests/test_compareR.py
+++ b/selection/algorithms/tests/test_compareR.py
@@ -505,7 +505,7 @@ def test_full_lasso_tall():
         X *= np.sqrt(n)
         L = lasso_full.gaussian(X, y, lam)
         L.fit()
-        if len(L.active) > 0:
+        if len(L.active) > 2:
             S = L.summary(compute_intervals=False, dispersion=sigma**2)
             numpy2ri.activate()
 
@@ -551,7 +551,7 @@ def test_full_lasso_tall_logistic():
         X *= np.sqrt(n)
         L = lasso_full.logistic(X, y, lam)
         L.fit()
-        if len(L.active) > 0:
+        if len(L.active) > 2:
             S = L.summary(compute_intervals=False)
             numpy2ri.activate()
 
@@ -585,7 +585,7 @@ def test_full_lasso_tall_logistic():
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_full_lasso_wide():
-    n, p, s = 30, 60, 15
+    n, p, s = 100, 200, 15
 
     while True:
         X, y, _, _, sigma = gaussian_instance(n=n, p=p, s=s, equicorrelated=False, signal=4)
@@ -595,7 +595,7 @@ def test_full_lasso_wide():
         L = lasso_full.gaussian(X, y, lam)
         L.fit()
 
-        if len(L.active) > 0:
+        if len(L.active) > 2:
             S = L.summary(compute_intervals=False, dispersion=sigma**2)
             numpy2ri.activate()
 
@@ -623,9 +623,8 @@ def test_full_lasso_wide():
             pvalues = rpy.r('pvalues')
             active_set = rpy.r('active_vars')
 
-            import sys
-            sys.stderr.write(repr(pvalues))
-            sys.stderr.write(repr(S['pval']))
+            print(pvalues)
+            print(np.asarray(S['pval']))
 
             nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
             numpy2ri.deactivate()
@@ -633,7 +632,7 @@ def test_full_lasso_wide():
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_full_lasso_wide_logistic():
-    n, p, s = 30, 60, 15
+    n, p, s = 100, 200, 15
 
     while True:
         X, y, _, _ = logistic_instance(n=n, p=p, s=s, equicorrelated=False, signal=10)
@@ -643,7 +642,7 @@ def test_full_lasso_wide_logistic():
         L = lasso_full.logistic(X, y, lam)
         L.fit()
 
-        if len(L.active) > 0:
+        if len(L.active) > 2:
             S = L.summary(compute_intervals=False, dispersion=1.)
             numpy2ri.activate()
 
@@ -670,9 +669,8 @@ def test_full_lasso_wide_logistic():
             pvalues = rpy.r('pvalues')
             active_set = rpy.r('active_vars')
 
-            import sys
-            sys.stderr.write(repr(pvalues))
-            sys.stderr.write(repr(S['pval']))
+            print(pvalues)
+            print(np.asarray(S['pval']))
 
             nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
             numpy2ri.deactivate()
diff --git a/selection/randomized/selective_MLE_utils.pyx b/selection/randomized/selective_MLE_utils.pyx
index 25c1be2de..719174773 100644
--- a/selection/randomized/selective_MLE_utils.pyx
+++ b/selection/randomized/selective_MLE_utils.pyx
@@ -21,6 +21,22 @@ cdef extern from "randomized_lasso.h":
                          double value_tol,                   # Tolerance for convergence based on value
                          double initial_step)                # Initial stepsize 
 
+    double barrier_solve_affine(double *gradient,                   # Gradient vector
+                                double *opt_variable,               # Optimization variable
+                                double *opt_proposed,               # New value of optimization variable
+                                double *conjugate_arg,              # Argument to conjugate of Gaussian
+                                double *precision,                  # Precision matrix of Gaussian
+                                double *scaling,                    # Diagonal scaling matrix for log barrier
+                                double *linear_term,                # Matrix A in constraint Au \leq b
+                                double *offset,                     # Offset b in constraint Au \leq b
+                                double *affine_term,                # Should be equal to b - A.dot(opt_variable)    
+                                int ndim,                           # Dimension of conjugate_arg, precision
+                                int ncon,                           # Number of constraints
+                                int max_iter,                       # Maximum number of iterations
+                                int min_iter,                       # Minimum number of iterations
+                                double value_tol,                   # Tolerance for convergence based on value
+                                double initial_step);               # Initial step size
+
 def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient vector
                    np.ndarray[DTYPE_float_t, ndim=1] opt_variable,  # Optimization variable
                    np.ndarray[DTYPE_float_t, ndim=1] opt_proposed,  # New value of optimization variable
@@ -50,6 +66,44 @@ def barrier_solve_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient v
     hess = np.linalg.inv(precision + np.diag(barrier_hessian(opt_variable, scaling)))
     return value, opt_variable, hess
 
+def barrier_solve_affine_(np.ndarray[DTYPE_float_t, ndim=1] gradient ,     # Gradient vector
+                          np.ndarray[DTYPE_float_t, ndim=1] opt_variable,  # Optimization variable
+                          np.ndarray[DTYPE_float_t, ndim=1] opt_proposed,  # New value of optimization variable
+                          np.ndarray[DTYPE_float_t, ndim=1] conjugate_arg, # Argument to conjugate of Gaussian
+                          np.ndarray[DTYPE_float_t, ndim=2] precision,     # Precision matrix of Gaussian
+                          np.ndarray[DTYPE_float_t, ndim=1] scaling,       # Diagonal scaling matrix for log barrier
+                          np.ndarray[DTYPE_float_t, ndim=2] linear_term,   # Linear part of affine constraint: A
+                          np.ndarray[DTYPE_float_t, ndim=1] offset,        # Offset part of affine constraint: b
+                          np.ndarray[DTYPE_float_t, ndim=1] affine_term,   # b - A.dot(opt)
+                          double initial_step,
+                          int max_iter=1000,
+                          int min_iter=50,
+                          double value_tol=1.e-8):
+   
+    ndim = precision.shape[0]
+    ncon = linear_term.shape[0]
+
+    value = barrier_solve_affine(<double *>gradient.data,
+                                  <double *>opt_variable.data,
+                                  <double *>opt_proposed.data,
+                                  <double *>conjugate_arg.data,
+                                  <double *>precision.data,
+                                  <double *>scaling.data,
+                                  <double *>linear_term.data,
+                                  <double *>offset.data,
+                                  <double *>affine_term.data,
+                                  ndim,
+                                  ncon,
+                                  max_iter,
+                                  min_iter,
+                                  value_tol,
+                                  initial_step)
+
+    final_affine = offset - linear_term.dot(opt_variable)
+    barrier_hessian = lambda u, v: (-1./((v + u)**2.) + 1./(u**2.))			  
+    hess = np.linalg.inv(precision + linear_term.T.dot(np.diag(barrier_hessian(final_affine, scaling))).dot(linear_term))
+    return value, opt_variable, hess
+
 def solve_barrier_nonneg(conjugate_arg,
                          precision,
                          feasible_point,
@@ -73,3 +127,32 @@ def solve_barrier_nonneg(conjugate_arg,
                           max_iter=max_iter,
                           min_iter=min_iter,
                           value_tol=tol)
+
+def solve_barrier_affine(conjugate_arg,
+                         precision,
+                         feasible_point,
+                         linear_term,
+                         offset,
+                         step=1,
+                         max_iter=1000,
+         		 min_iter=50,
+                         tol=1.e-8):
+
+    gradient = np.zeros_like(conjugate_arg)
+    opt_variable = np.asarray(feasible_point)
+    opt_proposed = opt_variable.copy()
+    A = linear_term
+    scaling = np.sqrt(np.diag(A.dot(precision).dot(A.T)))
+    
+    return barrier_solve_affine_(gradient,
+                                 opt_variable,
+                                 opt_proposed,
+                                 conjugate_arg,
+                                 precision,
+                                 scaling,
+                                 linear_term,
+                                 offset,
+                                 step,
+                                 max_iter=max_iter,
+                                 min_iter=min_iter,
+                                 value_tol=tol)
diff --git a/selection/randomized/tests/test_selective_MLE.py b/selection/randomized/tests/test_selective_MLE.py
index 6e2f38b09..c67a2731a 100644
--- a/selection/randomized/tests/test_selective_MLE.py
+++ b/selection/randomized/tests/test_selective_MLE.py
@@ -2,7 +2,7 @@
 import functools
 
 from ...tests.decorators import set_seed_iftrue
-from ..selective_MLE_utils import barrier_solve_
+from ..selective_MLE_utils import barrier_solve_, barrier_solve_affine_
 
 from .test_selective_MLE_onedim import solve_barrier_nonneg
 
@@ -34,3 +34,42 @@ def test_C_solver():
     np.testing.assert_allclose(hess1, hess2, atol=1.e-4, rtol=1.e-4)
     assert (np.fabs(val1 - val2) < 1.e-4 * np.fabs(val1))
 
+@set_seed_iftrue(True)
+def test_affine_solver():
+
+    X = np.random.standard_normal((10, 5))
+    precision = X.T.dot(X) / 10
+    conjugate_arg = np.random.standard_normal(5)
+
+
+    grad, opt_val, opt_proposed = np.ones((3, 5))
+    scaling = np.sqrt(np.diag(precision))
+
+    val1, soln1, hess1 = barrier_solve_(grad,
+                                        opt_val,
+                                        opt_proposed,
+                                        conjugate_arg,
+                                        precision,
+                                        scaling,
+                                        1.,
+                                        value_tol=1.e-12)
+
+    val2, soln2, hess2 = barrier_solve_affine_(grad,
+                                               opt_val,
+                                               opt_proposed,
+                                               conjugate_arg,
+                                               precision,
+                                               scaling,
+                                               -np.identity(5),
+                                               np.zeros(5),
+                                               opt_val,
+                                               1.,
+                                               value_tol=1.e-12)
+
+    np.testing.assert_allclose(soln1, soln2, atol=1.e-4, rtol=1.e-4)
+    print(soln1)
+    print(soln2)
+    
+    np.testing.assert_allclose(hess1, hess2, atol=1.e-4, rtol=1.e-4)
+    assert (np.fabs(val1 - val2) < 1.e-4 * np.fabs(val1))
+
diff --git a/selection/randomized/tests/test_slope.py b/selection/randomized/tests/test_slope.py
index 13725fa21..f8c1a983b 100644
--- a/selection/randomized/tests/test_slope.py
+++ b/selection/randomized/tests/test_slope.py
@@ -13,6 +13,8 @@
 import regreg.api as rr
 
 from selection.randomized.slope import slope
+from statsmodels.distributions import ECDF
+
 import matplotlib.pyplot as plt
 
 def test_slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma = None):
@@ -101,65 +103,66 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rh
 
     print("relative difference in solns", np.linalg.norm(soln-r_beta)/np.linalg.norm(r_beta))
 
-#compare_outputs_SLOPE_weights()
-
-def test0_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
-                     randomizer_scale= np.sqrt(0.25),
-                     solve_args={'tol':1.e-12, 'min_its':50}):
-
-    inst = gaussian_instance
-    signal = np.sqrt(signal_fac * 2. * np.log(p))
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=signal,
-                      s=s,
-                      equicorrelated=False,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=True)[:3]
-
-    sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
-    r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X,
-                                                      Y,
-                                                      W=None,
-                                                      normalize=True,
-                                                      choice_weights="gaussian",
-                                                      sigma=sigma_)
-
-    pen = slope(r_sigma * r_lambda_seq, lagrange=1.)
-
-    loglike = rr.glm.gaussian(X, Y, coef=1., quadratic=None)
-    _initial_omega = randomizer_scale * sigma_* np.random.standard_normal(p)
-    quad = rr.identity_quadratic(0, 0, -_initial_omega, 0)
-    problem = rr.simple_problem(loglike, pen)
-    initial_soln = problem.solve(quad, **solve_args)
-    initial_subgrad = -(loglike.smooth_objective(initial_soln, 'grad') + quad.objective(initial_soln, 'grad'))
-
-    indices = np.argsort(-np.abs(initial_soln))
-    sorted_soln = initial_soln[indices]
-
-    cur_indx_array = []
-    cur_indx_array.append(0)
-    cur_indx = 0
-    pointer = 0
-    signs_cluster = []
-    for j in range(p-1):
-        if np.abs(sorted_soln[j+1]) != np.abs(sorted_soln[cur_indx]):
-            cur_indx_array.append(j+1)
-            cur_indx = j+1
-            sign_vec = np.zeros(p)
-            sign_vec[np.arange(j+1-cur_indx_array[pointer]) + cur_indx_array[pointer]] = \
-                np.sign(initial_soln[indices[np.arange(j+1-cur_indx_array[pointer]) + cur_indx_array[pointer]]])
-            signs_cluster.append(sign_vec)
-            pointer = pointer + 1
-            if sorted_soln[j+1]== 0:
-                break
-
-    signs_cluster = np.asarray(signs_cluster).T
-    X_clustered = X[:, indices].dot(signs_cluster)
-    print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
-
-def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0., randomizer_scale= np.sqrt(0.5)):
+# #compare_outputs_SLOPE_weights()
+
+# def test0_randomized_slope(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35,
+#                      randomizer_scale= np.sqrt(0.25),
+#                      solve_args={'tol':1.e-12, 'min_its':50}):
+
+#     inst = gaussian_instance
+#     signal = np.sqrt(signal_fac * 2. * np.log(p))
+#     X, Y, beta = inst(n=n,
+#                       p=p,
+#                       signal=signal,
+#                       s=s,
+#                       equicorrelated=False,
+#                       rho=rho,
+#                       sigma=sigma,
+#                       random_signs=True)[:3]
+
+#     sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
+#     r_beta, r_E, r_lambda_seq, r_sigma = test_slope_R(X,
+#                                                       Y,
+#                                                       W=None,
+#                                                       normalize=True,
+#                                                       choice_weights="gaussian",
+#                                                       sigma=sigma_)
+
+#     pen = slope(r_sigma * r_lambda_seq, lagrange=1.)
+
+#     loglike = rr.glm.gaussian(X, Y, coef=1., quadratic=None)
+#     _initial_omega = randomizer_scale * sigma_* np.random.standard_normal(p)
+#     quad = rr.identity_quadratic(0, 0, -_initial_omega, 0)
+#     problem = rr.simple_problem(loglike, pen)
+#     initial_soln = problem.solve(quad, **solve_args)
+#     initial_subgrad = -(loglike.smooth_objective(initial_soln, 'grad') + quad.objective(initial_soln, 'grad'))
+
+#     indices = np.argsort(-np.abs(initial_soln))
+#     sorted_soln = initial_soln[indices]
+
+#     cur_indx_array = []
+#     cur_indx_array.append(0)
+#     cur_indx = 0
+#     pointer = 0
+#     signs_cluster = []
+#     for j in range(p-1):
+#         if np.abs(sorted_soln[j+1]) != np.abs(sorted_soln[cur_indx]):
+#             cur_indx_array.append(j+1)
+#             cur_indx = j+1
+#             sign_vec = np.zeros(p)
+#             sign_vec[np.arange(j+1-cur_indx_array[pointer]) + cur_indx_array[pointer]] = \
+#                 np.sign(initial_soln[indices[np.arange(j+1-cur_indx_array[pointer]) + cur_indx_array[pointer]]])
+#             signs_cluster.append(sign_vec)
+#             pointer = pointer + 1
+#             if sorted_soln[j+1]== 0:
+#                 break
+
+#     signs_cluster = np.asarray(signs_cluster).T
+#     X_clustered = X[:, indices].dot(signs_cluster)
+#     print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
+
+def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0., randomizer_scale= np.sqrt(0.5),
+                          use_MLE=False):
 
     while True:
         inst = gaussian_instance
@@ -190,41 +193,40 @@ def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0., ra
         nonzero = signs != 0
         print("dimensions", n, p, nonzero.sum())
         if nonzero.sum() > 0:
-            estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=sigma_)
-            print("estimate", estimate, pval, intervals)
-
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+            if use_MLE:
+                estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=sigma_)
+                print("estimate", estimate, pval, intervals)
+            else:
+                _, pval, intervals = conv.summary(target="selected", dispersion=sigma_, compute_intervals=True)
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
             break
 
-    if True:
-        return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
+    print(beta_target)
+    return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
 
 def main(nsim=100):
 
     P0, PA, cover, length_int = [], [], [], []
-    #from statsmodels.distributions import ECDF
-
+    
     for i in range(nsim):
         p0, pA, cover_, intervals = test_randomized_slope()
 
         cover.extend(cover_)
         P0.extend(p0)
         PA.extend(pA)
-        print(np.mean(cover),'null pvalue + power')
-
-    #     if i % 3 == 0 and i > 0:
-    #         U = np.linspace(0, 1, 101)
-    #         plt.clf()
-    #         if len(P0) > 0:
-    #             plt.plot(U, ECDF(P0)(U))
-    #         if len(PA) > 0:
-    #             plt.plot(U, ECDF(PA)(U), 'r')
-    #         plt.plot([0, 1], [0, 1], 'k--')
-    #         plt.savefig("/Users/snigdhapanigrahi/Desktop/plot.pdf")
-    # plt.show()
-
-main()
+        print('coverage', np.mean(cover))
+
+        if i % 3 == 0 and i > 0:
+            U = np.linspace(0, 1, 101)
+            plt.clf()
+            if len(P0) > 0:
+                plt.plot(U, ECDF(P0)(U))
+            if len(PA) > 0:
+                plt.plot(U, ECDF(PA)(U), 'r')
+            plt.plot([0, 1], [0, 1], 'k--')
+            plt.draw()
+
 
 
 

From 9b2f212bc5fc5d24c251498f12a9fff8927bf0aa Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 27 Apr 2018 16:03:20 -0700
Subject: [PATCH 607/617] updated of C software for barrier affine

---
 C-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/C-software b/C-software
index 92d2f9c4a..6947acd27 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 92d2f9c4ac67aabfab39e67961f7fef3f03611d5
+Subproject commit 6947acd27a894a25b28f02bbe7cd6a2127b9db05

From 14ca8d979755587810ada80711180be8a3fe129d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 27 Apr 2018 22:54:53 -0700
Subject: [PATCH 608/617] BH is not working yet

---
 selection/randomized/marginal_screening.py    | 25 ++++++++++++-------
 .../randomized/tests/test_selectiveMLE_BH.py  |  3 ++-
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/selection/randomized/marginal_screening.py b/selection/randomized/marginal_screening.py
index f5fdd9b8d..851c75766 100644
--- a/selection/randomized/marginal_screening.py
+++ b/selection/randomized/marginal_screening.py
@@ -25,6 +25,9 @@ def BH_selection(p_values, level):
 
     active = np.zeros(m, np.bool)
     active[E_sel] = 1
+
+    #print("check ordering", ((np.sort(p_values[np.sort(not_sel)])
+    #                          - ((order_sig+1 +np.arange(m-active.sum())+1) * level) /(2.* m))>=0.).sum()+ active.sum())
     return order_sig+1, active, np.argsort(p_values[np.sort(not_sel)])
 
 class BH():
@@ -68,27 +71,31 @@ def fit(self, perturb=None):
 
         self.boundary = np.fabs(randomized_score) > self.BH_cutoff
         self.interior = ~self.boundary
-        active_signs = np.sign(randomized_score[self.boundary])
-        signs = np.sign(randomized_score)
+        active_signs = np.sign(randomized_score)
 
-        self.selection_variable = {'sign': signs.copy(),
+        self.selection_variable = {'sign': active_signs.copy(),
                                    'variables': self.boundary.copy()}
 
         threshold = np.zeros(p)
         threshold[self.boundary] = self.BH_cutoff[self.boundary]
-        cut_off_vector = ndist.ppf(1. - ((K+np.arange(self.interior.sum())+1) * self.level) /(2.*p))
-        (threshold[self.interior])[sort_notsel_pvals] = (self.sigma_hat[self.interior])[sort_notsel_pvals] * cut_off_vector
+        cut_off_vector = ndist.ppf(1. - ((K+np.arange(self.interior.sum())+1) * self.level)/float(2.* p))
+
+        indices_interior = np.asarray([u for u in range(p) if self.interior[u]])
+        threshold[indices_interior[sort_notsel_pvals]] = (self.sigma_hat[self.interior])[sort_notsel_pvals] * cut_off_vector
+
         self.threshold = threshold
 
         self.observed_opt_state = self._initial_omega[self.boundary] - self.observed_score[self.boundary] - \
-                                  np.diag(active_signs).dot(self.threshold[self.boundary])
+                                  np.diag(active_signs[self.boundary]).dot(self.threshold[self.boundary])
         self.num_opt_var = self.observed_opt_state.shape[0]
 
         opt_linear = np.zeros((p, self.num_opt_var))
-        opt_linear[self.boundary, :] = np.diag(active_signs)
+        opt_linear[self.boundary, :] = np.diag(active_signs[self.boundary])
         opt_offset = np.zeros(p)
-        opt_offset[self.boundary] = active_signs * self.threshold[self.boundary]
-        opt_offset[self.interior] = self._initial_omega[self.interior] - self.observed_score[self.interior]
+        opt_offset[self.boundary] = active_signs[self.boundary] * self.threshold[self.boundary]
+        opt_offset[self.interior] = randomized_score[self.interior]
+
+        print("check", (np.abs(opt_offset[self.interior])< threshold[self.interior]).sum(), self.interior.sum())
         self.opt_transform = (opt_linear, opt_offset)
 
         cov, prec = self.randomizer.cov_prec
diff --git a/selection/randomized/tests/test_selectiveMLE_BH.py b/selection/randomized/tests/test_selectiveMLE_BH.py
index 6fb07771a..57fb4eb58 100644
--- a/selection/randomized/tests/test_selectiveMLE_BH.py
+++ b/selection/randomized/tests/test_selectiveMLE_BH.py
@@ -42,7 +42,8 @@ def test_selected_targets(n=500, p=100, signal_fac=1.6, s=5, sigma=3, rho=0.4, r
 
     estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=dispersion)
 
-    coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1])
+    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+    coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
     print("coverage for selected target", coverage.sum()/float(nonzero.sum()))
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 

From 47c2c279c29997eec75fee2d047d3ff58ef6d1ea Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Fri, 27 Apr 2018 23:08:59 -0700
Subject: [PATCH 609/617] commit changes in branch before switch

---
 selection/randomized/tests/test_selectiveMLE_BH.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/selection/randomized/tests/test_selectiveMLE_BH.py b/selection/randomized/tests/test_selectiveMLE_BH.py
index 57fb4eb58..dd17b8867 100644
--- a/selection/randomized/tests/test_selectiveMLE_BH.py
+++ b/selection/randomized/tests/test_selectiveMLE_BH.py
@@ -43,11 +43,12 @@ def test_selected_targets(n=500, p=100, signal_fac=1.6, s=5, sigma=3, rho=0.4, r
     estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=dispersion)
 
     beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+    print("beta_target and intervals", beta_target, intervals)
     coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
     print("coverage for selected target", coverage.sum()/float(nonzero.sum()))
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
-def main(nsim=100):
+def main(nsim=500):
 
     P0, PA, cover, length_int = [], [], [], []
     for i in range(nsim):

From 90af11b059950eccd04d7cdd3f7baf4c1e61bc2d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 1 May 2018 16:30:51 -0700
Subject: [PATCH 610/617] running slope with affine constraints, barrier code
 written in python

---
 selection/randomized/query.py            | 75 ++++++++++++++++++++++--
 selection/randomized/tests/test_slope.py | 28 ++++-----
 2 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 6ee0ed7a8..5fbb8b5d2 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -501,10 +501,11 @@ def selective_MLE(self,
         conjugate_arg = prec_opt.dot(self.affine_con.mean)
 
         init_soln = feasible_point
-        val, soln, hess = solve_barrier_nonneg(conjugate_arg,
-                                               prec_opt,
-                                               init_soln,
-                                               **solve_args)
+        val, soln, hess = _solve_barrier_affine(conjugate_arg,
+                                                prec_opt,
+                                                self.affine_con,
+                                                init_soln,
+                                                **solve_args)
 
         final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(self.affine_con.mean - soln)))
         ind_unbiased_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(self.affine_con.mean
@@ -735,4 +736,70 @@ def naive_pvalues(diag_cov, observed, parameter):
         pvalues[j] = 2 * min(pval, 1-pval)
     return pvalues
 
+def _solve_barrier_affine(conjugate_arg,
+                          precision,
+                          constraints,
+                          feasible_point=None,
+                          step=1,
+                          nstep=1000,
+                          tol=1.e-8):
+
+    con_linear = constraints.linear_part
+    con_offset = constraints.offset
+    scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T)))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
+
+    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. \
+                          + np.log(1.+ 1./((con_offset-con_linear.dot(u))/ scaling)).sum()
+    grad = lambda u: -conjugate_arg + precision.dot(u) -con_linear.T.dot(1./(scaling + con_offset-con_linear.dot(u)) -
+                                                                       1./(con_offset-con_linear.dot(u)))
+    barrier_hessian = lambda u: con_linear.T.dot(np.diag(-1./((scaling + con_offset-con_linear.dot(u))**2.)
+                                                 + 1./((con_offset-con_linear.dot(u))**2.))).dot(con_linear)
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        newton_step = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * newton_step
+            if np.all(con_offset-con_linear.dot(proposal) > 0):
+                break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            proposal = current - step * newton_step
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
+
+    hess = np.linalg.inv(precision + barrier_hessian(current))
+    return current, current_value, hess
+
 
diff --git a/selection/randomized/tests/test_slope.py b/selection/randomized/tests/test_slope.py
index f8c1a983b..ea7648e5d 100644
--- a/selection/randomized/tests/test_slope.py
+++ b/selection/randomized/tests/test_slope.py
@@ -202,8 +202,9 @@ def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0., ra
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
             break
 
-    print(beta_target)
-    return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
+    if True:
+        #print(beta_target)
+        return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
 
 def main(nsim=100):
 
@@ -217,17 +218,16 @@ def main(nsim=100):
         PA.extend(pA)
         print('coverage', np.mean(cover))
 
-        if i % 3 == 0 and i > 0:
-            U = np.linspace(0, 1, 101)
-            plt.clf()
-            if len(P0) > 0:
-                plt.plot(U, ECDF(P0)(U))
-            if len(PA) > 0:
-                plt.plot(U, ECDF(PA)(U), 'r')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.draw()
-
-
-
+        # if i % 3 == 0 and i > 0:
+        #     U = np.linspace(0, 1, 101)
+        #     plt.clf()
+        #     if len(P0) > 0:
+        #         plt.plot(U, ECDF(P0)(U))
+        #     if len(PA) > 0:
+        #         plt.plot(U, ECDF(PA)(U), 'r')
+        #     plt.plot([0, 1], [0, 1], 'k--')
+        #     plt.draw()
+
+main()
 
 

From db64db1d45268fef6a2c477b95a0575cf0d36e9c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 1 May 2018 16:36:33 -0700
Subject: [PATCH 611/617] coverage for SLOPE looks good

---
 selection/randomized/tests/test_slope.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selection/randomized/tests/test_slope.py b/selection/randomized/tests/test_slope.py
index ea7648e5d..b2920258f 100644
--- a/selection/randomized/tests/test_slope.py
+++ b/selection/randomized/tests/test_slope.py
@@ -161,7 +161,7 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rh
 #     X_clustered = X[:, indices].dot(signs_cluster)
 #     print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
 
-def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0., randomizer_scale= np.sqrt(0.5),
+def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.5),
                           use_MLE=False):
 
     while True:
@@ -181,7 +181,7 @@ def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0., ra
                                                           Y,
                                                           W=None,
                                                           normalize=True,
-                                                          choice_weights="bhq", #put gaussian
+                                                          choice_weights="gaussian", #put gaussian
                                                           sigma=sigma_)
 
         conv = slope.gaussian(X,

From 071d9282abaabab13e509830d24be5d2902296c3 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 1 May 2018 18:14:55 -0700
Subject: [PATCH 612/617] rearranged the terms returned by barrier_affine

---
 selection/randomized/query.py            |  3 ++-
 selection/randomized/slope.py            |  9 ++-------
 selection/randomized/tests/test_slope.py | 12 +++++++-----
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 5fbb8b5d2..5dee5448e 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -501,6 +501,7 @@ def selective_MLE(self,
         conjugate_arg = prec_opt.dot(self.affine_con.mean)
 
         init_soln = feasible_point
+        print("check query", self.affine_con.linear_part.dot(init_soln)-self.affine_con.offset)
         val, soln, hess = _solve_barrier_affine(conjugate_arg,
                                                 prec_opt,
                                                 self.affine_con,
@@ -800,6 +801,6 @@ def _solve_barrier_affine(conjugate_arg,
             step *= 2
 
     hess = np.linalg.inv(precision + barrier_hessian(current))
-    return current, current_value, hess
+    return current_value, current, hess
 
 
diff --git a/selection/randomized/slope.py b/selection/randomized/slope.py
index 8540b4175..d6205d09d 100644
--- a/selection/randomized/slope.py
+++ b/selection/randomized/slope.py
@@ -92,8 +92,6 @@ def fit(self,
         active_signs = np.sign(self.initial_soln)
         active = self._active = active_signs != 0
 
-        print("check active terms", active.sum())
-
         self._overall = overall = active> 0
         self._inactive = inactive = ~self._overall
 
@@ -109,6 +107,7 @@ def fit(self,
         sorted_soln = self.initial_soln[indices]
         initial_scalings = np.sort(np.unique(np.fabs(self.initial_soln[active])))[::-1]
         self.observed_opt_state = initial_scalings
+        print("self.observed_opt_state", self.observed_opt_state)
 
         self._unpenalized = np.zeros(p, np.bool)
 
@@ -154,9 +153,6 @@ def fit(self,
         cov, prec = self.randomizer.cov_prec
         opt_linear, opt_offset = self.opt_transform
 
-        print("check if correct", np.allclose(self.observed_score_state + opt_offset + opt_linear.dot(initial_scalings),
-                                              self._initial_omega, rtol=1e-05, atol=1e-08))
-
         cond_precision = opt_linear.T.dot(opt_linear) * prec
         cond_cov = np.linalg.inv(cond_precision)
         logdens_linear = cond_cov.dot(opt_linear.T) * prec
@@ -183,8 +179,7 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
         A_scaling = np.vstack([A_scaling_0, A_scaling_1])
         b_scaling = np.zeros(2*self.num_opt_var-1)
 
-        # A_scaling = -np.identity(self.num_opt_var)
-        # b_scaling = np.zeros(self.num_opt_var)
+        #print("check", (A_scaling.dot(self.observed_opt_state)-b_scaling <= 0).sum(), b_scaling.shape[0])
 
         affine_con = constraints(A_scaling,
                                  b_scaling,
diff --git a/selection/randomized/tests/test_slope.py b/selection/randomized/tests/test_slope.py
index b2920258f..ff7b2a596 100644
--- a/selection/randomized/tests/test_slope.py
+++ b/selection/randomized/tests/test_slope.py
@@ -161,8 +161,8 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rh
 #     X_clustered = X[:, indices].dot(signs_cluster)
 #     print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
 
-def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.5),
-                          use_MLE=False):
+def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.25),
+                          target = "selected", use_MLE=True):
 
     while True:
         inst = gaussian_instance
@@ -193,10 +193,12 @@ def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0.35,
         nonzero = signs != 0
         print("dimensions", n, p, nonzero.sum())
         if nonzero.sum() > 0:
-            beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+            if target == "selected":
+                beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+            else:
+                beta_target = beta[nonzero]
             if use_MLE:
-                estimate, _, _, pval, intervals, _ = conv.selective_MLE(target="selected", dispersion=sigma_)
-                print("estimate", estimate, pval, intervals)
+                estimate, _, _, pval, intervals, _ = conv.selective_MLE(target=target, dispersion=sigma_)
             else:
                 _, pval, intervals = conv.summary(target="selected", dispersion=sigma_, compute_intervals=True)
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])

From 38e152c399fe2b17e701771802ca1ca6a4357d58 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 1 May 2018 18:17:29 -0700
Subject: [PATCH 613/617] removed some unnecessary print checks

---
 selection/randomized/query.py            | 1 -
 selection/randomized/slope.py            | 4 ----
 selection/randomized/tests/test_slope.py | 4 ++--
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/selection/randomized/query.py b/selection/randomized/query.py
index 5dee5448e..65c1c314b 100644
--- a/selection/randomized/query.py
+++ b/selection/randomized/query.py
@@ -501,7 +501,6 @@ def selective_MLE(self,
         conjugate_arg = prec_opt.dot(self.affine_con.mean)
 
         init_soln = feasible_point
-        print("check query", self.affine_con.linear_part.dot(init_soln)-self.affine_con.offset)
         val, soln, hess = _solve_barrier_affine(conjugate_arg,
                                                 prec_opt,
                                                 self.affine_con,
diff --git a/selection/randomized/slope.py b/selection/randomized/slope.py
index d6205d09d..540d58884 100644
--- a/selection/randomized/slope.py
+++ b/selection/randomized/slope.py
@@ -107,8 +107,6 @@ def fit(self,
         sorted_soln = self.initial_soln[indices]
         initial_scalings = np.sort(np.unique(np.fabs(self.initial_soln[active])))[::-1]
         self.observed_opt_state = initial_scalings
-        print("self.observed_opt_state", self.observed_opt_state)
-
         self._unpenalized = np.zeros(p, np.bool)
 
         _beta_unpenalized = restricted_estimator(self.loglike, self._overall, solve_args=solve_args)
@@ -179,8 +177,6 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
         A_scaling = np.vstack([A_scaling_0, A_scaling_1])
         b_scaling = np.zeros(2*self.num_opt_var-1)
 
-        #print("check", (A_scaling.dot(self.observed_opt_state)-b_scaling <= 0).sum(), b_scaling.shape[0])
-
         affine_con = constraints(A_scaling,
                                  b_scaling,
                                  mean=cond_mean,
diff --git a/selection/randomized/tests/test_slope.py b/selection/randomized/tests/test_slope.py
index ff7b2a596..60de0a730 100644
--- a/selection/randomized/tests/test_slope.py
+++ b/selection/randomized/tests/test_slope.py
@@ -161,8 +161,8 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rh
 #     X_clustered = X[:, indices].dot(signs_cluster)
 #     print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
 
-def test_randomized_slope(n=500, p=50, signal_fac=1.5, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.25),
-                          target = "selected", use_MLE=True):
+def test_randomized_slope(n=500, p=100, signal_fac=1.5, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.25),
+                          target = "full", use_MLE=True):
 
     while True:
         inst = gaussian_instance

From 5e5c17c1537e7f78b80b32ede126ec305d275fff Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <>
Date: Tue, 1 May 2018 18:41:51 -0700
Subject: [PATCH 614/617] push changes to SLOPE

---
 selection/randomized/slope.py            | 92 ++++++++++++------------
 selection/randomized/tests/test_slope.py |  2 +-
 2 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/selection/randomized/slope.py b/selection/randomized/slope.py
index 540d58884..57f166e03 100644
--- a/selection/randomized/slope.py
+++ b/selection/randomized/slope.py
@@ -144,51 +144,55 @@ def fit(self,
                     break
 
         signs_cluster = np.asarray(signs_cluster).T
-        X_clustered = X[:, indices].dot(signs_cluster)
-        _opt_linear_term = X.T.dot(X_clustered)
-        self.opt_transform = (_opt_linear_term, self.initial_subgrad)
+        if signs_cluster.size == 0:
+            return active_signs
 
-        cov, prec = self.randomizer.cov_prec
-        opt_linear, opt_offset = self.opt_transform
-
-        cond_precision = opt_linear.T.dot(opt_linear) * prec
-        cond_cov = np.linalg.inv(cond_precision)
-        logdens_linear = cond_cov.dot(opt_linear.T) * prec
-        cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
-
-        logdens_transform = (logdens_linear, opt_offset)
-
-        def log_density(logdens_linear, offset, cond_prec, score, opt):
-            if score.ndim == 1:
-                mean_term = logdens_linear.dot(score.T + offset).T
-            else:
-                mean_term = logdens_linear.dot(score.T + offset[:, None]).T
-            arg = opt + mean_term
-            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-
-        log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
-
-        # now make the constraints
-
-        A_scaling_0 = -np.identity(self.num_opt_var)
-        A_scaling_1 = -np.identity(self.num_opt_var)[:(self.num_opt_var-1), :]
-        for k in range(A_scaling_1.shape[0]):
-           A_scaling_1[k,k+1]= 1
-        A_scaling = np.vstack([A_scaling_0, A_scaling_1])
-        b_scaling = np.zeros(2*self.num_opt_var-1)
-
-        affine_con = constraints(A_scaling,
-                                 b_scaling,
-                                 mean=cond_mean,
-                                 covariance=cond_cov)
-
-        self.sampler = affine_gaussian_sampler(affine_con,
-                                               self.observed_opt_state,
-                                               self.observed_score_state,
-                                               log_density,
-                                               logdens_transform,
-                                               selection_info=self.selection_variable)
-        return active_signs
+        else:
+            X_clustered = X[:, indices].dot(signs_cluster)
+            _opt_linear_term = X.T.dot(X_clustered)
+            self.opt_transform = (_opt_linear_term, self.initial_subgrad)
+
+            cov, prec = self.randomizer.cov_prec
+            opt_linear, opt_offset = self.opt_transform
+
+            cond_precision = opt_linear.T.dot(opt_linear) * prec
+            cond_cov = np.linalg.inv(cond_precision)
+            logdens_linear = cond_cov.dot(opt_linear.T) * prec
+            cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+
+            logdens_transform = (logdens_linear, opt_offset)
+
+            def log_density(logdens_linear, offset, cond_prec, score, opt):
+                if score.ndim == 1:
+                    mean_term = logdens_linear.dot(score.T + offset).T
+                else:
+                    mean_term = logdens_linear.dot(score.T + offset[:, None]).T
+                arg = opt + mean_term
+                return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
+            log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
+
+            # now make the constraints
+
+            A_scaling_0 = -np.identity(self.num_opt_var)
+            A_scaling_1 = -np.identity(self.num_opt_var)[:(self.num_opt_var - 1), :]
+            for k in range(A_scaling_1.shape[0]):
+                A_scaling_1[k, k + 1] = 1
+            A_scaling = np.vstack([A_scaling_0, A_scaling_1])
+            b_scaling = np.zeros(2 * self.num_opt_var - 1)
+
+            affine_con = constraints(A_scaling,
+                                     b_scaling,
+                                     mean=cond_mean,
+                                     covariance=cond_cov)
+
+            self.sampler = affine_gaussian_sampler(affine_con,
+                                                   self.observed_opt_state,
+                                                   self.observed_score_state,
+                                                   log_density,
+                                                   logdens_transform,
+                                                   selection_info=self.selection_variable)
+            return active_signs
 
     # Targets of inference
     # and covariance with score representation
diff --git a/selection/randomized/tests/test_slope.py b/selection/randomized/tests/test_slope.py
index 60de0a730..925dc78f4 100644
--- a/selection/randomized/tests/test_slope.py
+++ b/selection/randomized/tests/test_slope.py
@@ -161,7 +161,7 @@ def compare_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rh
 #     X_clustered = X[:, indices].dot(signs_cluster)
 #     print("start indices of clusters", indices, cur_indx_array, signs_cluster.shape, X_clustered.shape)
 
-def test_randomized_slope(n=500, p=100, signal_fac=1.5, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.25),
+def test_randomized_slope(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.25),
                           target = "full", use_MLE=True):
 
     while True:

From df66abc7d8568247aa618d414338c4049376d97e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 2 May 2018 11:16:13 -0700
Subject: [PATCH 615/617] minor edit

---
 selection/randomized/marginal_screening.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/selection/randomized/marginal_screening.py b/selection/randomized/marginal_screening.py
index 851c75766..ab8200493 100644
--- a/selection/randomized/marginal_screening.py
+++ b/selection/randomized/marginal_screening.py
@@ -19,18 +19,16 @@ def BH_selection(p_values, level):
     p_sorted = np.sort(p_values)
     indices = np.arange(m)
     indices_order = np.argsort(p_values)
-    order_sig = np.max(indices[p_sorted - np.true_divide(level * (np.arange(m) + 1.), m) <= 0])
+    order_sig = np.max(indices[p_sorted - level * (np.arange(m) + 1.) / m <= 0])
     E_sel = indices_order[:(order_sig+1)]
     not_sel =indices_order[(order_sig+1):]
 
     active = np.zeros(m, np.bool)
     active[E_sel] = 1
 
-    #print("check ordering", ((np.sort(p_values[np.sort(not_sel)])
-    #                          - ((order_sig+1 +np.arange(m-active.sum())+1) * level) /(2.* m))>=0.).sum()+ active.sum())
     return order_sig+1, active, np.argsort(p_values[np.sort(not_sel)])
 
-class BH():
+class BH(object):
 
     def __init__(self,
                  X,
@@ -41,7 +39,7 @@ def __init__(self,
                  perturb=None):
 
         observed_score = -X.T.dot(Y)
-        self.nfeature =  p = observed_score.shape[0]
+        self.nfeature = p = observed_score.shape[0]
         self.sigma_hat = sigma_hat
 
         self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)

From 66294b4e888055f63150bc3b9dbd17a5f16a37fa Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 2 May 2018 11:59:23 -0700
Subject: [PATCH 616/617] BF: wrong C file in selective_MLE_utils setup

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4ea768a38..c8fc1e0ec 100755
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,7 @@
 
 EXTS.append(Extension('selection.randomized.selective_MLE_utils',
                       ['selection/randomized/selective_MLE_utils.pyx',
-                       'C-software/src/randomized_lasso.c'],
+                       'C-software/src/selective_mle.c'],
                       libraries=['m'],
                       include_dirs=['C-software/src']))
 

From 895c0086bca9bb26818d724c03d709477f6f3125 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 2 May 2018 12:09:34 -0700
Subject: [PATCH 617/617] moved general LASSO to sandbox, now highdim is just
 lasso

---
 .../tests/test_inferential_metrics.py         |   10 +-
 selection/randomized/convenience.py           |    2 +-
 selection/randomized/lasso.py                 | 1200 +---------------
 selection/randomized/modelQ.py                |    1 -
 selection/randomized/sandbox/general_lasso.py | 1218 +++++++++++++++++
 selection/randomized/slope.py                 |    6 +-
 selection/randomized/tests/test_full_lasso.py |    4 +-
 .../randomized/tests/test_highdim_lasso.py    |    8 +-
 selection/randomized/tests/test_modelQ.py     |    4 +-
 .../tests/test_selective_MLE_high.py          |    6 +-
 .../tests/test_selective_MLE_onedim.py        |   14 +-
 11 files changed, 1251 insertions(+), 1222 deletions(-)
 create mode 100644 selection/randomized/sandbox/general_lasso.py

diff --git a/selection/adjusted_MLE/tests/test_inferential_metrics.py b/selection/adjusted_MLE/tests/test_inferential_metrics.py
index 33ad55b31..fcf6b01fc 100644
--- a/selection/adjusted_MLE/tests/test_inferential_metrics.py
+++ b/selection/adjusted_MLE/tests/test_inferential_metrics.py
@@ -4,8 +4,6 @@
 import rpy2.robjects.numpy2ri
 rpy2.robjects.numpy2ri.activate()
 
-import selection.randomized.lasso as L; reload(L)
-from selection.randomized.lasso import highdim
 from selection.algorithms.lasso import lasso
 from scipy.stats import norm as ndist
 
@@ -189,10 +187,10 @@ def comparison_risk_inference_selected(n=500, p=100, nval=500, rho=0.35, s=5, be
         err = np.zeros(tune_num)
         for k in range(tune_num):
             W = lam_seq[k] * np.ones(p)
-            conv = highdim.gaussian(X,
-                                    y,
-                                    W,
-                                    randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
+            conv = lasso.gaussian(X,
+                                  y,
+                                  W,
+                                  randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
             signs = conv.fit()
             nonzero = signs != 0
             if tuning == "selective_MLE":
diff --git a/selection/randomized/convenience.py b/selection/randomized/convenience.py
index 2c5515f67..ef95051a1 100644
--- a/selection/randomized/convenience.py
+++ b/selection/randomized/convenience.py
@@ -13,7 +13,7 @@
 from .randomization import randomization
 from .query import multiple_queries
 
-from .lasso import highdim as lasso
+from .lasso import lasso
 
 class step(lasso):
 
diff --git a/selection/randomized/lasso.py b/selection/randomized/lasso.py
index 436b7c90b..6edb5237e 100644
--- a/selection/randomized/lasso.py
+++ b/selection/randomized/lasso.py
@@ -30,1197 +30,11 @@
                   glm_parametric_covariance)
 from ..algorithms.debiased_lasso import debiasing_matrix
 
-
-class lasso_view(query):
-    def __init__(self,
-                 loss,
-                 epsilon,
-                 penalty,
-                 randomization,
-                 perturb=None,
-                 solve_args={'min_its': 50, 'tol': 1.e-10}):
-        """
-        Fits the logistic regression to a candidate active set, without penalty.
-        Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
-        Computes $\bar{\beta}_E$ which is the restricted
-        M-estimator (i.e. subject to the constraint $\beta_{-E}=0$).
-        Parameters:
-        -----------
-        active: np.bool
-            The active set from fitting the logistic lasso
-        solve_args: dict
-            Arguments to be passed to regreg solver.
-        Returns:
-        --------
-        None
-        Notes:
-        ------
-        Sets self._beta_unpenalized which will be used in the covariance matrix calculation.
-        Also computes Hessian of loss at restricted M-estimator as well as the bootstrap covariance.
-        """
-
-        query.__init__(self, randomization)
-
-        (self.loss,
-         self.epsilon,
-         self.penalty,
-         self.randomization) = (loss,
-                                epsilon,
-                                penalty,
-                                randomization)
-
-    # Methods needed for subclassing a query
-
-    def solve(self, nboot=2000,
-              solve_args={'min_its': 20, 'tol': 1.e-10},
-              perturb=None):
-
-        self.randomize(perturb=perturb)
-
-        (loss,
-         randomized_loss,
-         epsilon,
-         penalty,
-         randomization) = (self.loss,
-                           self.randomized_loss,
-                           self.epsilon,
-                           self.penalty,
-                           self.randomization)
-
-        # initial solution
-
-        p = penalty.shape[0]
-
-        problem = rr.simple_problem(randomized_loss, penalty)
-        self.initial_soln = problem.solve(**solve_args)
-
-        # find the active groups and their direction vectors
-        # as well as unpenalized groups
-
-        active_signs = np.sign(self.initial_soln)
-        active = self._active = active_signs != 0
-
-        if isinstance(penalty, rr.l1norm):
-            self._lagrange = penalty.lagrange * np.ones(p)
-            unpenalized = np.zeros(p, np.bool)
-        elif isinstance(penalty, rr.weighted_l1norm):
-            self._lagrange = penalty.weights
-            unpenalized = self._lagrange == 0
-        else:
-            raise ValueError('penalty must be `l1norm` or `weighted_l1norm`')
-
-        active *= ~unpenalized
-
-        # solve the restricted problem
-
-        self._overall = (active + unpenalized) > 0
-        self._inactive = ~self._overall
-        self._unpenalized = unpenalized
-
-        _active_signs = active_signs.copy()
-        _active_signs[unpenalized] = np.nan  # don't release sign of unpenalized variables
-        self.selection_variable = {'sign': _active_signs,
-                                   'variables': self._overall}
-
-        # initial state for opt variables
-
-        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
-                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad'))
-        # the quadratic of a smooth_atom is not included in computing the smooth_objective
-        self.initial_subgrad = initial_subgrad
-
-        initial_scalings = np.fabs(self.initial_soln[active])
-        initial_unpenalized = self.initial_soln[self._unpenalized]
-
-        self.observed_opt_state = np.concatenate([initial_scalings,
-                                                  initial_unpenalized,
-                                                  self.initial_subgrad[self._inactive]], axis=0)
-
-        # set the _solved bit
-
-        self._solved = True
-
-        # Now setup the pieces for linear decomposition
-
-        (loss,
-         epsilon,
-         penalty,
-         initial_soln,
-         overall,
-         inactive,
-         unpenalized) = (self.loss,
-                         self.epsilon,
-                         self.penalty,
-                         self.initial_soln,
-                         self._overall,
-                         self._inactive,
-                         self._unpenalized)
-
-        # we are implicitly assuming that
-        # loss is a pairs model
-
-        _beta_unpenalized = restricted_estimator(loss, overall, solve_args=solve_args)
-
-        beta_bar = np.zeros(p)
-        beta_bar[overall] = _beta_unpenalized
-        self._beta_full = beta_bar
-
-        # observed state for score in internal coordinates
-
-        self.observed_internal_state = np.hstack([_beta_unpenalized,
-                                                  -loss.smooth_objective(beta_bar, 'grad')[inactive]])
-
-        # form linear part
-
-        self.num_opt_var = self.observed_opt_state.shape[0]
-
-        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
-        # E for active
-        # U for unpenalized
-        # -E for inactive
-
-        _opt_linear_term = np.zeros((p, p))
-        _score_linear_term = np.zeros((p, p))
-
-        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
-
-        est_slice = slice(0, overall.sum())
-        X, y = loss.data
-        W = self.loss.saturated_loss.hessian(X.dot(beta_bar))
-        _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
-        _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
-
-        _score_linear_term[:, est_slice] = -np.hstack([_hessian_active, _hessian_unpen])
-
-        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
-
-        null_idx = np.arange(overall.sum(), p)
-        inactive_idx = np.nonzero(inactive)[0]
-        for _i, _n in zip(inactive_idx, null_idx):
-            _score_linear_term[_i, _n] = -1
-
-        # c_E piece
-
-        def signed_basis_vector(p, j, s):
-            v = np.zeros(p)
-            v[j] = s
-            return v
-
-        active_directions = np.array([signed_basis_vector(p, j, active_signs[j]) for j in np.nonzero(active)[0]]).T
-
-        scaling_slice = slice(0, active.sum())
-        if np.sum(active) == 0:
-            _opt_hessian = 0
-        else:
-            _opt_hessian = _hessian_active * active_signs[None, active] + epsilon * active_directions
-        _opt_linear_term[:, scaling_slice] = _opt_hessian
-
-        # beta_U piece
-
-        unpenalized_slice = slice(active.sum(), active.sum() + unpenalized.sum())
-        unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
-        if unpenalized.sum():
-            _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
-                                                      + epsilon * unpenalized_directions)
-
-            # subgrad piece
-
-        subgrad_idx = range(active.sum() + unpenalized.sum(), active.sum() + inactive.sum() + unpenalized.sum())
-        subgrad_slice = slice(active.sum() + unpenalized.sum(), active.sum() + inactive.sum() + unpenalized.sum())
-        for _i, _s in zip(inactive_idx, subgrad_idx):
-            _opt_linear_term[_i, _s] = 1
-
-        # form affine part
-
-        _opt_affine_term = np.zeros(p)
-        idx = 0
-        _opt_affine_term[active] = active_signs[active] * self._lagrange[active]
-
-        # two transforms that encode score and optimization
-        # variable roles
-
-        self.opt_transform = (_opt_linear_term, _opt_affine_term)
-        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
-
-        # everything now expressed in observed_score_state
-
-        self.observed_score_state = _score_linear_term.dot(self.observed_internal_state)
-
-        # now store everything needed for the projections
-        # the projection acts only on the optimization
-        # variables
-
-        # we form a dual group lasso object
-        # to do the projection
-
-
-        self._setup = True
-        self.subgrad_slice = subgrad_slice
-        self.scaling_slice = scaling_slice
-        self.unpenalized_slice = unpenalized_slice
-        self.ndim = loss.shape[0]
-
-        self.nboot = nboot
-
-    def get_sampler(self):
-        # setup the default optimization sampler
-
-        if not hasattr(self, "_sampler"):
-
-            penalty, inactive = self.penalty, self._inactive
-            inactive_lagrange = self.penalty.weights[inactive]
-
-            if not hasattr(self.randomization, "cov_prec"):  # means randomization is not Gaussian
-
-                dual = rr.weighted_supnorm(1. / inactive_lagrange, bound=1.)
-
-                def projection(dual, subgrad_slice, scaling_slice, opt_state):
-                    """
-                    Full projection for Langevin.
-                    The state here will be only the state of the optimization variables.
-                    """
-
-                    new_state = opt_state.copy()  # not really necessary to copy
-                    new_state[scaling_slice] = np.maximum(opt_state[scaling_slice], 0)
-                    new_state[subgrad_slice] = dual.bound_prox(opt_state[subgrad_slice])
-                    return new_state
-
-                projection = functools.partial(projection, dual, self.subgrad_slice, self.scaling_slice)
-
-                def grad_log_density(query,
-                                     rand_gradient,
-                                     score_state,
-                                     opt_state):
-                    full_state = score_state + reconstruct_opt(query.opt_transform, opt_state)
-                    return opt_linear.T.dot(rand_gradient(full_state).T)
-
-                grad_log_density = functools.partial(grad_log_density, self, self.randomization.gradient)
-
-                def log_density(query,
-                                opt_linear,
-                                rand_log_density,
-                                score_state,
-                                opt_state):
-                    full_state = score_state + reconstruct_opt(query.opt_transform, opt_state)
-                    return rand_log_density(full_state)
-
-                log_density = functools.partial(log_density, self, self.randomization.log_density)
-
-                self._sampler = langevin_sampler(self.observed_opt_state,
-                                                 self.observed_score_state,
-                                                 self.score_transform,
-                                                 self.opt_transform,
-                                                 projection,
-                                                 grad_log_density,
-                                                 log_density)
-            else:
-
-                # compute implied mean and covariance
-
-                cov, prec = self.randomization.cov_prec
-                prec_array = len(np.asarray(prec).shape) == 2
-                opt_linear, opt_offset = self.opt_transform
-
-                if prec_array:
-                    cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
-                    cond_cov = np.linalg.inv(cond_precision)
-                    logdens_linear = cond_cov.dot(opt_linear.T.dot(prec))
-                else:
-                    cond_precision = opt_linear.T.dot(opt_linear) * prec
-                    cond_cov = np.linalg.inv(cond_precision)
-                    logdens_linear = cond_cov.dot(opt_linear.T) * prec
-
-                cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
-
-                # need a log_density function
-                # the conditional density of opt variables
-                # given the score
-
-                def log_density(logdens_linear, offset, cond_prec, score, opt):
-                    if score.ndim == 1:
-                        mean_term = logdens_linear.dot(score.T + offset).T
-                    else:
-                        mean_term = logdens_linear.dot(score.T + offset[:, None]).T
-                    arg = opt + mean_term
-                    return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-
-                log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
-
-                # now make the constraints
-
-                # scaling constraints
-
-                I = np.identity(cond_cov.shape[0])
-                A_scaling = -I[self.scaling_slice]
-                b_scaling = np.zeros(A_scaling.shape[0])
-
-                A_subgrad = np.vstack([I[self.subgrad_slice],
-                                       -I[self.subgrad_slice]])
-                b_subgrad = np.hstack([inactive_lagrange,
-                                       inactive_lagrange])
-
-                linear_term = np.vstack([A_scaling, A_subgrad])
-                offset = np.hstack([b_scaling, b_subgrad])
-
-                affine_con = constraints(linear_term,
-                                         offset,
-                                         mean=cond_mean,
-                                         covariance=cond_cov)
-
-                logdens_transform = (logdens_linear, opt_offset)
-
-                self._sampler = affine_gaussian_sampler(affine_con,
-                                                        self.observed_opt_state,
-                                                        self.observed_score_state,
-                                                        log_density,
-                                                        logdens_transform,
-                                                        selection_info=self.selection_variable)  # should be signs and the subgradients we've conditioned on
-
-        return self._sampler
-
-    sampler = property(get_sampler, query.set_sampler)
-
-    def decompose_subgradient(self, condition=None, marginalize=None):
-        """
-        ADD DOCSTRING
-        condition and marginalize should be disjoint
-        """
-
-        p = self.penalty.shape[0]
-        condition_inactive = np.zeros(p, dtype=np.bool)
-
-        if condition is None:
-            condition = np.zeros(p, dtype=np.bool)
-
-        if marginalize is None:
-            marginalize = np.zeros(p, dtype=np.bool)
-            marginalize[self._overall] = 0
-
-        if np.any(condition * marginalize):
-            raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
-
-        if not self._setup:
-            raise ValueError('setup_sampler should be called before using this function')
-
-        _inactive = self._inactive
-
-        limits_marginal = np.zeros_like(_inactive, np.float)
-
-        condition_inactive = _inactive * condition
-        moving_inactive = _inactive * ~(marginalize + condition)
-        margin_inactive = _inactive * marginalize
-
-        limits_marginal = self._lagrange
-        if np.asarray(self._lagrange).shape in [(), (1,)]:
-            limits_marginal = np.zeros_like(_inactive) * self._lagrange
-
-        opt_linear, opt_offset = self.opt_transform
-
-        new_linear = np.zeros((opt_linear.shape[0], (self._active.sum() +
-                                                     self._unpenalized.sum() +
-                                                     moving_inactive.sum())))
-        new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice]
-        new_linear[:, self.unpenalized_slice] = opt_linear[:, self.unpenalized_slice]
-
-        inactive_moving_idx = np.nonzero(moving_inactive)[0]
-        subgrad_idx = range(self._active.sum() + self._unpenalized.sum(),
-                            self._active.sum() + self._unpenalized.sum() +
-                            moving_inactive.sum())
-        for _i, _s in zip(inactive_moving_idx, subgrad_idx):
-            new_linear[_i, _s] = 1.
-
-        observed_opt_state = self.observed_opt_state[:(self._active.sum() +
-                                                       self._unpenalized.sum() +
-                                                       moving_inactive.sum())]
-        observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive]
-
-        condition_linear = np.zeros((opt_linear.shape[0], (self._active.sum() +
-                                                           self._unpenalized.sum() +
-                                                           condition_inactive.sum())))
-
-        new_offset = opt_offset + 0.
-        new_offset[condition_inactive] += self.initial_subgrad[condition_inactive]
-        new_opt_transform = (new_linear, new_offset)
-
-        if not hasattr(self.randomization, "cov_prec") or marginalize.sum():  # use Langevin -- not gaussian
-
-            def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive):
-                return (np.divide(_pdf(full_state_plus) - _pdf(full_state_minus),
-                                  _cdf(full_state_plus) - _cdf(full_state_minus)))[margin_inactive]
-
-            def new_grad_log_density(query,
-                                     limits_marginal,
-                                     margin_inactive,
-                                     _cdf,
-                                     _pdf,
-                                     new_opt_transform,
-                                     deriv_log_dens,
-                                     score_state,
-                                     opt_state):
-
-                full_state = score_state + reconstruct_opt(new_opt_transform, opt_state)
-
-                p = query.penalty.shape[0]
-                weights = np.zeros(p)
-
-                if margin_inactive.sum() > 0:
-                    full_state_plus = full_state + limits_marginal * margin_inactive
-                    full_state_minus = full_state - limits_marginal * margin_inactive
-                    weights[margin_inactive] = _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive)
-                weights[~margin_inactive] = deriv_log_dens(full_state)[~margin_inactive]
-                return -opt_linear.T.dot(weights)
-
-            new_grad_log_density = functools.partial(new_grad_log_density,
-                                                     self,
-                                                     limits_marginal,
-                                                     margin_inactive,
-                                                     self.randomization._cdf,
-                                                     self.randomization._pdf,
-                                                     new_opt_transform,
-                                                     self.randomization._derivative_log_density)
-
-            def new_log_density(query,
-                                limits_marginal,
-                                margin_inactive,
-                                _cdf,
-                                _pdf,
-                                new_opt_transform,
-                                log_dens,
-                                score_state,
-                                opt_state):
-
-                full_state = score_state + reconstruct_opt(new_opt_transform, opt_state)
-
-                full_state = np.atleast_2d(full_state)
-                p = query.penalty.shape[0]
-                logdens = np.zeros(full_state.shape[0])
-
-                if margin_inactive.sum() > 0:
-                    full_state_plus = full_state + limits_marginal * margin_inactive
-                    full_state_minus = full_state - limits_marginal * margin_inactive
-                    logdens += np.sum(np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:, margin_inactive],
-                                      axis=1)
-
-                logdens += log_dens(full_state[:, ~margin_inactive])
-
-                return np.squeeze(logdens)  # should this be negative to match the gradient log density?
-
-            new_log_density = functools.partial(new_log_density,
-                                                self,
-                                                limits_marginal,
-                                                margin_inactive,
-                                                self.randomization._cdf,
-                                                self.randomization._pdf,
-                                                new_opt_transform,
-                                                self.randomization._log_density)
-
-            new_lagrange = self.penalty.weights[moving_inactive]
-            new_dual = rr.weighted_l1norm(new_lagrange, lagrange=1.).conjugate
-
-            def new_projection(dual,
-                               noverall,
-                               opt_state):
-                new_state = opt_state.copy()
-                new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
-                new_state[noverall:] = dual.bound_prox(opt_state[noverall:])
-                return new_state
-
-            new_projection = functools.partial(new_projection,
-                                               new_dual,
-                                               self._overall.sum())
-
-            new_selection_variable = copy(self.selection_variable)
-            new_selection_variable['subgradient'] = self.observed_opt_state[condition_inactive]
-
-            self.sampler = langevin_sampler(observed_opt_state,
-                                            self.observed_score_state,
-                                            self.score_transform,
-                                            new_opt_transform,
-                                            new_projection,
-                                            new_grad_log_density,
-                                            new_log_density,
-                                            selection_info=(self, new_selection_variable))
-        else:
-
-            cov, prec = self.randomization.cov_prec
-            prec_array = len(np.asarray(prec).shape) == 2
-
-            if prec_array:
-                cond_precision = new_linear.T.dot(prec.dot(new_linear))
-                cond_cov = np.linalg.inv(cond_precision)
-                logdens_linear = cond_cov.dot(new_linear.T.dot(prec))
-            else:
-                cond_precision = new_linear.T.dot(new_linear) * prec
-                cond_cov = np.linalg.inv(cond_precision)
-                logdens_linear = cond_cov.dot(new_linear.T) * prec
-
-            cond_mean = -logdens_linear.dot(self.observed_score_state + new_offset)
-
-            def log_density(logdens_linear, offset, cond_prec, score, opt):
-                if score.ndim == 1:
-                    mean_term = logdens_linear.dot(score.T + offset).T
-                else:
-                    mean_term = logdens_linear.dot(score.T + offset[:, None]).T
-                arg = opt + mean_term
-                return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-
-            log_density = functools.partial(log_density, logdens_linear, new_offset, cond_precision)
-
-            # now make the constraints
-
-            # scaling constraints
-
-            # the scalings are first set of opt variables
-            # then unpenalized
-            # then the subgradients
-
-            I = np.identity(cond_cov.shape[0])
-            A_scaling = -I[self.scaling_slice]
-            b_scaling = np.zeros(A_scaling.shape[0])
-
-            A_subgrad = np.vstack([I[self._overall.sum():],
-                                   -I[self._overall.sum():]])
-
-            inactive_lagrange = self.penalty.weights[moving_inactive]
-            b_subgrad = np.hstack([inactive_lagrange,
-                                   inactive_lagrange])
-
-            linear_term = np.vstack([A_scaling, A_subgrad])
-            offset = np.hstack([b_scaling, b_subgrad])
-
-            affine_con = constraints(linear_term,
-                                     offset,
-                                     mean=cond_mean,
-                                     covariance=cond_cov)
-
-            logdens_transform = (logdens_linear, new_offset)
-            self._sampler = affine_gaussian_sampler(affine_con,
-                                                    observed_opt_state,
-                                                    self.observed_score_state,
-                                                    log_density,
-                                                    logdens_transform,
-                                                    selection_info=self.selection_variable)  # should be signs and the subgradients we've conditioned on
-
-
-class glm_lasso(lasso_view):
-    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}):
-        bootstrap_score = pairs_bootstrap_glm(self.loss,
-                                              self.selection_variable['variables'],
-                                              beta_full=self._beta_full,
-                                              inactive=~self.selection_variable['variables'])[0]
-
-        return bootstrap_score
-
-
-class glm_lasso_parametric(lasso_view):
-    # this setup_sampler returns only the active set
-
-    def setup_sampler(self):
-        return self.selection_variable['variables']
-
-
-class fixedX_lasso(lasso_view):
-    def __init__(self, X, Y, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
-        loss = glm.gaussian(X, Y)
-        lasso_view.__init__(self,
-                            loss,
-                            epsilon,
-                            penalty,
-                            randomization,
-                            solve_args=solve_args)
-
-    def setup_sampler(self):
-        X, Y = self.loss.data
-
-        bootstrap_score = resid_bootstrap(self.loss,
-                                          self.selection_variable['variables'],
-                                          ~self.selection_variable['variables'])[0]
-        return bootstrap_score
-
-
-##### The class for users
-
-class lasso(object):
-    r"""
-    A class for the LASSO for post-selection inference.
-    The problem solved is
-    .. math::
-        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 +
-            \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
-    where $\lambda$ is `lam`, $\omega$ is a randomization generated below
-    and the last term is a small ridge penalty.
-    """
-
-    def __init__(self,
-                 loglike,
-                 feature_weights,
-                 ridge_term,
-                 randomizer_scale,
-                 randomizer='gaussian',
-                 parametric_cov_estimator=False,
-                 perturb=None):
-        r"""
-        Create a new post-selection object for the LASSO problem
-        Parameters
-        ----------
-        loglike : `regreg.smooth.glm.glm`
-            A (negative) log-likelihood as implemented in `regreg`.
-        feature_weights : np.ndarray
-            Feature weights for L-1 penalty. If a float,
-            it is brodcast to all features.
-        ridge_term : float
-            How big a ridge term to add?
-        randomizer_scale : float
-            Scale for IID components of randomization.
-        randomizer : str (optional)
-            One of ['laplace', 'logistic', 'gaussian']
-        """
-
-        self.loglike = loglike
-        self.nfeature = p = self.loglike.shape[0]
-
-        if np.asarray(feature_weights).shape == ():
-            feature_weights = np.ones(loglike.shape) * feature_weights
-        self.feature_weights = np.asarray(feature_weights)
-
-        self.parametric_cov_estimator = parametric_cov_estimator
-
-        if randomizer == 'laplace':
-            self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
-        elif randomizer == 'gaussian':
-            self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
-        elif randomizer == 'logistic':
-            self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
-
-        self.ridge_term = ridge_term
-
-        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
-
-        self._initial_omega = perturb
-
-    def fit(self,
-            solve_args={'tol': 1.e-12, 'min_its': 50},
-            perturb=None,
-            nboot=1000):
-        """
-        Fit the randomized lasso using `regreg`.
-        Parameters
-        ----------
-        solve_args : keyword args
-             Passed to `regreg.problems.simple_problem.solve`.
-        Returns
-        -------
-        signs : np.float
-             Support and non-zero signs of randomized lasso solution.
-
-        """
-
-        if perturb is not None:
-            self._initial_omega = perturb
-
-        p = self.nfeature
-        if self.parametric_cov_estimator == True:
-            self._view = glm_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
-        else:
-            self._view = glm_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
-        self._view.solve(nboot=nboot, perturb=self._initial_omega, solve_args=solve_args)
-
-        self.signs = np.sign(self._view.initial_soln)
-        self.selection_variable = self._view.selection_variable
-        return self.signs
-
-    def decompose_subgradient(self,
-                              condition=None,
-                              marginalize=None):
-        """
-        Marginalize over some if inactive part of subgradient
-        if applicable.
-        Parameters
-        ----------
-        condition : np.bool
-             Which groups' subgradients should we condition on.
-        marginalize : np.bool
-             Which groups' subgradients should we marginalize over.
-        Returns
-        -------
-        None
-        """
-
-        if not hasattr(self, "_view"):
-            raise ValueError("fit method should be run first")
-        self._view.decompose_subgradient(condition=condition,
-                                         marginalize=marginalize)
-
-    def summary(self,
-                selected_features,
-                parameter=None,
-                level=0.9,
-                ndraw=10000,
-                burnin=2000,
-                compute_intervals=False,
-                bootstrap_sampler=False,
-                subset=None):
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-        Parameters
-        ----------
-        selected_features : np.bool
-            Binary encoding of which features to use in final
-            model and targets.
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-        level : float
-            Confidence level.
-        ndraw : int (optional)
-            Defaults to 1000.
-        burnin : int (optional)
-            Defaults to 1000.
-        bootstrap : bool
-            Use wild bootstrap instead of Gaussian plugin.
-        """
-        if not hasattr(self, "_view"):
-            raise ValueError('run `fit` method before producing summary.')
-
-        if parameter is None:
-            parameter = np.zeros(self.loglike.shape[0])
-
-        if np.asarray(selected_features).dtype != np.bool:
-            raise ValueError('selected_features should be a boolean array')
-
-        unpenalized_mle = restricted_estimator(self.loglike, selected_features)
-
-        if self.parametric_cov_estimator == False:
-            n = self.loglike.data[0].shape[0]
-            form_covariances = glm_nonparametric_bootstrap(n, n)
-            boot_target, boot_target_observed = pairs_bootstrap_glm(self.loglike, selected_features, inactive=None)
-            target_info = boot_target
-        else:
-            target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
-            form_covariances = glm_parametric_covariance(self.loglike)
-
-        opt_samplers = []
-        for q in [self._view]:
-            cov_info = q.setup_sampler()
-            if self.parametric_cov_estimator == False:
-                target_cov, score_cov = form_covariances(target_info,
-                                                         cross_terms=[cov_info],
-                                                         nsample=q.nboot)
-            else:
-                target_cov, score_cov = form_covariances(target_info,
-                                                         cross_terms=[cov_info])
-            opt_samplers.append(q.sampler)
-
-        opt_samples = [opt_sampler.sample(ndraw,
-                                          burnin) for opt_sampler in opt_samplers]
-
-        if subset is not None:
-            target_cov = target_cov[subset][:, subset]
-            score_cov = score_cov[subset]
-            unpenalized_mle = unpenalized_mle[subset]
-
-        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter,
-                                                     sample=opt_samples[0])
-        if not np.all(parameter == 0):
-            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov,
-                                                          parameter=np.zeros_like(parameter), sample=opt_samples[0])
-        else:
-            pvalues = pivots
-
-        intervals = None
-        if compute_intervals:
-            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov,
-                                                             sample=opt_samples[0])
-
-        return pivots, pvalues, intervals
-
-    @staticmethod
-    def gaussian(X,
-                 Y,
-                 feature_weights,
-                 sigma=1.,
-                 parametric_cov_estimator=False,
-                 quadratic=None,
-                 ridge_term=None,
-                 randomizer_scale=None,
-                 randomizer='gaussian',
-                 perturb=None):
-        r"""
-        Squared-error LASSO with feature weights.
-        Objective function (before randomizer) is
-        $$
-        \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-        where $\lambda$ is `feature_weights`. The ridge term
-        is determined by the Hessian and `np.std(Y)` (scaled by $\sqrt{n/(n-1)}$) by default,
-        as is the randomizer scale.
-        Parameters
-        ----------
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-        Y : ndarray
-            Shape (n,) -- the response.
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized
-            features are handled by setting those entries of
-            `feature_weights` to 0. If `feature_weights` is
-            a float, then all parameters are penalized equally.
-        sigma : float (optional)
-            Noise variance. Set to 1 if `covariance_estimator` is not None.
-            This scales the loglikelihood by `sigma**(-2)`.
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic
-            coefficient to 0.
-        ridge_term : float
-            How big a ridge term to add?
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-        Returns
-        -------
-        L : `selection.randomized.convenience.lasso`
-
-        """
-
-        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic)
-        n, p = X.shape
-
-        mean_diag = np.mean((X ** 2).sum(0))
-        if ridge_term is None:
-            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
-
-        return lasso(loglike,
-                     np.asarray(feature_weights) / sigma ** 2,
-                     ridge_term,
-                     randomizer_scale,
-                     randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator,
-                     perturb=perturb)
-
-    @staticmethod
-    def logistic(X,
-                 successes,
-                 feature_weights,
-                 trials=None,
-                 parametric_cov_estimator=False,
-                 quadratic=None,
-                 ridge_term=None,
-                 randomizer='gaussian',
-                 randomizer_scale=None,
-                 perturb=None):
-        r"""
-        Logistic LASSO with feature weights.
-        Objective function is
-        $$
-        \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-        where $\ell$ is the negative of the logistic
-        log-likelihood (half the logistic deviance)
-        and $\lambda$ is `feature_weights`.
-        Parameters
-        ----------
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-        successes : ndarray
-            Shape (n,) -- response vector. An integer number of successes.
-            For data that is proportions, multiply the proportions
-            by the number of trials first.
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized
-            features are handled by setting those entries of
-            `feature_weights` to 0. If `feature_weights` is
-            a float, then all parameters are penalized equally.
-        trials : ndarray (optional)
-            Number of trials per response, defaults to
-            ones the same shape as Y.
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic
-            coefficient to 0.
-        ridge_term : float
-            How big a ridge term to add?
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-        Returns
-        -------
-        L : `selection.randomized.convenience.lasso`
-
-        """
-        n, p = X.shape
-
-        loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
-
-        mean_diag = np.mean((X ** 2).sum(0))
-
-        if ridge_term is None:
-            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5
-
-        return lasso(loglike, feature_weights,
-                     ridge_term,
-                     randomizer_scale,
-                     parametric_cov_estimator=parametric_cov_estimator,
-                     randomizer=randomizer,
-                     perturb=perturb)
-
-    @staticmethod
-    def coxph(X,
-              times,
-              status,
-              feature_weights,
-              parametric_cov_estimator=False,
-              quadratic=None,
-              ridge_term=None,
-              randomizer='gaussian',
-              randomizer_scale=None,
-              perturb=None):
-        r"""
-        Cox proportional hazards LASSO with feature weights.
-        Objective function is
-        $$
-        \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-        where $\ell^{\text{Cox}}$ is the
-        negative of the log of the Cox partial
-        likelihood and $\lambda$ is `feature_weights`.
-        Uses Efron's tie breaking method.
-        Parameters
-        ----------
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-        times : ndarray
-            Shape (n,) -- the survival times.
-        status : ndarray
-            Shape (n,) -- the censoring status.
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized
-            features are handled by setting those entries of
-            `feature_weights` to 0. If `feature_weights` is
-            a float, then all parameters are penalized equally.
-        covariance_estimator : optional
-            If None, use the parameteric
-            covariance estimate of the selected model.
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic
-            coefficient to 0.
-        ridge_term : float
-            How big a ridge term to add?
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-        Returns
-        -------
-        L : `selection.randomized.convenience.lasso`
-
-        """
-        loglike = coxph_obj(X, times, status, quadratic=quadratic)
-
-        # scale for randomization seems kind of meaningless here...
-
-        mean_diag = np.mean((X ** 2).sum(0))
-
-        if ridge_term is None:
-            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
-
-        return lasso(loglike,
-                     feature_weights,
-                     ridge_term,
-                     randomizer_scale,
-                     randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator,
-                     perturb=perturb)
-
-    @staticmethod
-    def poisson(X,
-                counts,
-                feature_weights,
-                parametric_cov_estimator=False,
-                quadratic=None,
-                ridge_term=None,
-                randomizer_scale=None,
-                randomizer='gaussian',
-                perturb=None):
-        r"""
-        Poisson log-linear LASSO with feature weights.
-        Objective function is
-        $$
-        \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-        where $\ell^{\text{Poisson}}$ is the negative
-        of the log of the Poisson likelihood (half the deviance)
-        and $\lambda$ is `feature_weights`.
-        Parameters
-        ----------
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-        counts : ndarray
-            Shape (n,) -- the response.
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized
-            features are handled by setting those entries of
-            `feature_weights` to 0. If `feature_weights` is
-            a float, then all parameters are penalized equally.
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic
-            coefficient to 0.
-        ridge_term : float
-            How big a ridge term to add?
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-        Returns
-        -------
-        L : `selection.randomized.convenience.lasso`
-
-        """
-        n, p = X.shape
-        loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
-
-        # scale for randomizer seems kind of meaningless here...
-
-        mean_diag = np.mean((X ** 2).sum(0))
-
-        if ridge_term is None:
-            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
-
-        return lasso(loglike,
-                     feature_weights,
-                     ridge_term,
-                     randomizer_scale,
-                     randomizer=randomizer,
-                     parametric_cov_estimator=parametric_cov_estimator,
-                     perturb=perturb)
-
-    @staticmethod
-    def sqrt_lasso(X,
-                   Y,
-                   feature_weights,
-                   quadratic=None,
-                   parametric_cov_estimator=False,
-                   sigma_estimate='truncated',
-                   solve_args={'min_its': 200},
-                   randomizer_scale=None,
-                   perturb=None):
-        r"""
-        Use sqrt-LASSO to choose variables.
-        Objective function is
-        $$
-        \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
-        $$
-        where $\lambda$ is `feature_weights`. After solving the problem
-        treat as if `gaussian` with implied variance and choice of
-        multiplier. See arxiv.org/abs/1504.08031 for details.
-        Parameters
-        ----------
-        X : ndarray
-            Shape (n,p) -- the design matrix.
-        Y : ndarray
-            Shape (n,) -- the response.
-        feature_weights: [float, sequence]
-            Penalty weights. An intercept, or other unpenalized
-            features are handled by setting those entries of
-            `feature_weights` to 0. If `feature_weights` is
-            a float, then all parameters are penalized equally.
-        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
-            An optional quadratic term to be added to the objective.
-            Can also be a linear term by setting quadratic
-            coefficient to 0.
-        covariance : str
-            One of 'parametric' or 'sandwich'. Method
-            used to estimate covariance for inference
-            in second stage.
-        sigma_estimate : str
-            One of 'truncated' or 'OLS'. Method
-            used to estimate $\sigma$ when using
-            parametric covariance.
-        solve_args : dict
-            Arguments passed to solver.
-        ridge_term : float
-            How big a ridge term to add?
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-        Returns
-        -------
-        L : `selection.randomized.convenience.lasso`
-
-        Notes
-        -----
-        Unlike other variants of LASSO, this
-        solves the problem on construction as the active
-        set is needed to find equivalent gaussian LASSO.
-        Assumes parametric model is correct for inference,
-        i.e. does not accept a covariance estimator.
-        """
-
-        n, p = X.shape
-
-        if np.asarray(feature_weights).shape == ():
-            feature_weights = np.ones(loglike.shape) * feature_weights
-
-        mean_diag = np.mean((X ** 2).sum(0))
-        if ridge_term is None:
-            ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1)
-
-        if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.sqrt(n / (n - 1.))
-
-        if perturb is None:
-            perturb = np.random.standard_normal(p) * randomizer_scale
-
-        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0)  # a ridge + linear term
-
-        if quadratic is not None:
-            totalQ = randomQ + quadratic
-        else:
-            totalQ = randomQ
-
-        soln, sqrt_loss = solve_sqrt_lasso(X,
-                                           Y,
-                                           weights=feature_weights,
-                                           quadratic=totalQ,
-                                           solve_args=solve_args,
-                                           force_fat=True)
-
-        denom = np.linalg.norm(Y - X.dot(soln))
-
-        loglike = rr.glm.gaussian(X, Y)
-
-        raise NotImplementedError(
-            'lasso_view needs to be modified so that the initial randomization can be set at construction time')
-
-        return lasso(loglike,
-                     np.asarray(feature_weights) * denom,
-                     ridge_term * denom,
-                     randomizer_scale * denom,
-                     randomizer='gaussian',
-                     parametric_cov_estimator=parametric_cov_estimator,
-                     perturb=perturb)
-
-
 #### High dimensional version
 #### - parametric covariance
 #### - Gaussian randomization
 
-class highdim(lasso):
+class lasso(object):
     r"""
     A class for the randomized LASSO for post-selection inference.
     The problem solved is
@@ -1751,7 +565,7 @@ def gaussian(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
-        return highdim(loglike, np.asarray(feature_weights) / sigma ** 2,
+        return lasso(loglike, np.asarray(feature_weights) / sigma ** 2,
                        ridge_term, randomizer_scale)
 
     @staticmethod
@@ -1813,7 +627,7 @@ def logistic(X,
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5
 
-        return highdim(loglike, np.asarray(feature_weights),
+        return lasso(loglike, np.asarray(feature_weights),
                        ridge_term, randomizer_scale)
 
     @staticmethod
@@ -2035,10 +849,10 @@ def sqrt_lasso(X,
         denom = np.linalg.norm(Y - X.dot(soln))
         loglike = rr.glm.gaussian(X, Y)
 
-        obj = highdim(loglike, np.asarray(feature_weights) * denom,
-                      ridge_term * denom,
-                      randomizer_scale * denom,
-                      perturb=perturb * denom)
+        obj = lasso(loglike, np.asarray(feature_weights) * denom,
+                    ridge_term * denom,
+                    randomizer_scale * denom,
+                    perturb=perturb * denom)
         obj._sqrt_soln = soln
 
         return obj
diff --git a/selection/randomized/modelQ.py b/selection/randomized/modelQ.py
index e194e6d54..bde0f7a62 100644
--- a/selection/randomized/modelQ.py
+++ b/selection/randomized/modelQ.py
@@ -5,7 +5,6 @@
 from ..constraints.affine import constraints
 
 from .query import affine_gaussian_sampler
-from .lasso import highdim
 from .randomization import randomization
 
 class modelQ(object):
diff --git a/selection/randomized/sandbox/general_lasso.py b/selection/randomized/sandbox/general_lasso.py
new file mode 100644
index 000000000..4b4d83382
--- /dev/null
+++ b/selection/randomized/sandbox/general_lasso.py
@@ -0,0 +1,1218 @@
+from __future__ import print_function
+import functools
+from copy import copy
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import functools
+from copy import copy
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+import regreg.affine as ra
+
+from ..constraints.affine import constraints
+from ..algorithms.sqrt_lasso import solve_sqrt_lasso, choose_lambda
+
+from .query import (query,
+                    multiple_queries,
+                    langevin_sampler,
+                    affine_gaussian_sampler)
+
+from .reconstruction import reconstruct_opt
+from .randomization import randomization
+from .base import restricted_estimator
+from .glm import (pairs_bootstrap_glm,
+                  glm_nonparametric_bootstrap,
+                  glm_parametric_covariance)
+from ..algorithms.debiased_lasso import debiasing_matrix
+
+
+class lasso_view(query):
+    def __init__(self,
+                 loss,
+                 epsilon,
+                 penalty,
+                 randomization,
+                 perturb=None,
+                 solve_args={'min_its': 50, 'tol': 1.e-10}):
+        """
+        Fits the logistic regression to a candidate active set, without penalty.
+        Calls the method bootstrap_covariance() to bootstrap the covariance matrix.
+        Computes $\bar{\beta}_E$ which is the restricted
+        M-estimator (i.e. subject to the constraint $\beta_{-E}=0$).
+        Parameters:
+        -----------
+        active: np.bool
+            The active set from fitting the logistic lasso
+        solve_args: dict
+            Arguments to be passed to regreg solver.
+        Returns:
+        --------
+        None
+        Notes:
+        ------
+        Sets self._beta_unpenalized which will be used in the covariance matrix calculation.
+        Also computes Hessian of loss at restricted M-estimator as well as the bootstrap covariance.
+        """
+
+        query.__init__(self, randomization)
+
+        (self.loss,
+         self.epsilon,
+         self.penalty,
+         self.randomization) = (loss,
+                                epsilon,
+                                penalty,
+                                randomization)
+
+    # Methods needed for subclassing a query
+
+    def solve(self, nboot=2000,
+              solve_args={'min_its': 20, 'tol': 1.e-10},
+              perturb=None):
+
+        self.randomize(perturb=perturb)
+
+        (loss,
+         randomized_loss,
+         epsilon,
+         penalty,
+         randomization) = (self.loss,
+                           self.randomized_loss,
+                           self.epsilon,
+                           self.penalty,
+                           self.randomization)
+
+        # initial solution
+
+        p = penalty.shape[0]
+
+        problem = rr.simple_problem(randomized_loss, penalty)
+        self.initial_soln = problem.solve(**solve_args)
+
+        # find the active groups and their direction vectors
+        # as well as unpenalized groups
+
+        active_signs = np.sign(self.initial_soln)
+        active = self._active = active_signs != 0
+
+        if isinstance(penalty, rr.l1norm):
+            self._lagrange = penalty.lagrange * np.ones(p)
+            unpenalized = np.zeros(p, np.bool)
+        elif isinstance(penalty, rr.weighted_l1norm):
+            self._lagrange = penalty.weights
+            unpenalized = self._lagrange == 0
+        else:
+            raise ValueError('penalty must be `l1norm` or `weighted_l1norm`')
+
+        active *= ~unpenalized
+
+        # solve the restricted problem
+
+        self._overall = (active + unpenalized) > 0
+        self._inactive = ~self._overall
+        self._unpenalized = unpenalized
+
+        _active_signs = active_signs.copy()
+        _active_signs[unpenalized] = np.nan  # don't release sign of unpenalized variables
+        self.selection_variable = {'sign': _active_signs,
+                                   'variables': self._overall}
+
+        # initial state for opt variables
+
+        initial_subgrad = -(self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
+                            self.randomized_loss.quadratic.objective(self.initial_soln, 'grad'))
+        # the quadratic of a smooth_atom is not included in computing the smooth_objective
+        self.initial_subgrad = initial_subgrad
+
+        initial_scalings = np.fabs(self.initial_soln[active])
+        initial_unpenalized = self.initial_soln[self._unpenalized]
+
+        self.observed_opt_state = np.concatenate([initial_scalings,
+                                                  initial_unpenalized,
+                                                  self.initial_subgrad[self._inactive]], axis=0)
+
+        # set the _solved bit
+
+        self._solved = True
+
+        # Now setup the pieces for linear decomposition
+
+        (loss,
+         epsilon,
+         penalty,
+         initial_soln,
+         overall,
+         inactive,
+         unpenalized) = (self.loss,
+                         self.epsilon,
+                         self.penalty,
+                         self.initial_soln,
+                         self._overall,
+                         self._inactive,
+                         self._unpenalized)
+
+        # we are implicitly assuming that
+        # loss is a pairs model
+
+        _beta_unpenalized = restricted_estimator(loss, overall, solve_args=solve_args)
+
+        beta_bar = np.zeros(p)
+        beta_bar[overall] = _beta_unpenalized
+        self._beta_full = beta_bar
+
+        # observed state for score in internal coordinates
+
+        self.observed_internal_state = np.hstack([_beta_unpenalized,
+                                                  -loss.smooth_objective(beta_bar, 'grad')[inactive]])
+
+        # form linear part
+
+        self.num_opt_var = self.observed_opt_state.shape[0]
+
+        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
+        # E for active
+        # U for unpenalized
+        # -E for inactive
+
+        _opt_linear_term = np.zeros((p, p))
+        _score_linear_term = np.zeros((p, p))
+
+        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
+
+        est_slice = slice(0, overall.sum())
+        X, y = loss.data
+        W = self.loss.saturated_loss.hessian(X.dot(beta_bar))
+        _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
+        _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
+
+        _score_linear_term[:, est_slice] = -np.hstack([_hessian_active, _hessian_unpen])
+
+        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution
+
+        null_idx = np.arange(overall.sum(), p)
+        inactive_idx = np.nonzero(inactive)[0]
+        for _i, _n in zip(inactive_idx, null_idx):
+            _score_linear_term[_i, _n] = -1
+
+        # c_E piece
+
+        def signed_basis_vector(p, j, s):
+            v = np.zeros(p)
+            v[j] = s
+            return v
+
+        active_directions = np.array([signed_basis_vector(p, j, active_signs[j]) for j in np.nonzero(active)[0]]).T
+
+        scaling_slice = slice(0, active.sum())
+        if np.sum(active) == 0:
+            _opt_hessian = 0
+        else:
+            _opt_hessian = _hessian_active * active_signs[None, active] + epsilon * active_directions
+        _opt_linear_term[:, scaling_slice] = _opt_hessian
+
+        # beta_U piece
+
+        unpenalized_slice = slice(active.sum(), active.sum() + unpenalized.sum())
+        unpenalized_directions = np.array([signed_basis_vector(p, j, 1) for j in np.nonzero(unpenalized)[0]]).T
+        if unpenalized.sum():
+            _opt_linear_term[:, unpenalized_slice] = (_hessian_unpen
+                                                      + epsilon * unpenalized_directions)
+
+            # subgrad piece
+
+        subgrad_idx = range(active.sum() + unpenalized.sum(), active.sum() + inactive.sum() + unpenalized.sum())
+        subgrad_slice = slice(active.sum() + unpenalized.sum(), active.sum() + inactive.sum() + unpenalized.sum())
+        for _i, _s in zip(inactive_idx, subgrad_idx):
+            _opt_linear_term[_i, _s] = 1
+
+        # form affine part
+
+        _opt_affine_term = np.zeros(p)
+        idx = 0
+        _opt_affine_term[active] = active_signs[active] * self._lagrange[active]
+
+        # two transforms that encode score and optimization
+        # variable roles
+
+        self.opt_transform = (_opt_linear_term, _opt_affine_term)
+        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
+
+        # everything now expressed in observed_score_state
+
+        self.observed_score_state = _score_linear_term.dot(self.observed_internal_state)
+
+        # now store everything needed for the projections
+        # the projection acts only on the optimization
+        # variables
+
+        # we form a dual group lasso object
+        # to do the projection
+
+
+        self._setup = True
+        self.subgrad_slice = subgrad_slice
+        self.scaling_slice = scaling_slice
+        self.unpenalized_slice = unpenalized_slice
+        self.ndim = loss.shape[0]
+
+        self.nboot = nboot
+
+    def get_sampler(self):
+        # setup the default optimization sampler
+
+        if not hasattr(self, "_sampler"):
+
+            penalty, inactive = self.penalty, self._inactive
+            inactive_lagrange = self.penalty.weights[inactive]
+
+            if not hasattr(self.randomization, "cov_prec"):  # means randomization is not Gaussian
+
+                dual = rr.weighted_supnorm(1. / inactive_lagrange, bound=1.)
+
+                def projection(dual, subgrad_slice, scaling_slice, opt_state):
+                    """
+                    Full projection for Langevin.
+                    The state here will be only the state of the optimization variables.
+                    """
+
+                    new_state = opt_state.copy()  # not really necessary to copy
+                    new_state[scaling_slice] = np.maximum(opt_state[scaling_slice], 0)
+                    new_state[subgrad_slice] = dual.bound_prox(opt_state[subgrad_slice])
+                    return new_state
+
+                projection = functools.partial(projection, dual, self.subgrad_slice, self.scaling_slice)
+
+                def grad_log_density(query,
+                                     rand_gradient,
+                                     score_state,
+                                     opt_state):
+                    full_state = score_state + reconstruct_opt(query.opt_transform, opt_state)
+                    return opt_linear.T.dot(rand_gradient(full_state).T)
+
+                grad_log_density = functools.partial(grad_log_density, self, self.randomization.gradient)
+
+                def log_density(query,
+                                opt_linear,
+                                rand_log_density,
+                                score_state,
+                                opt_state):
+                    full_state = score_state + reconstruct_opt(query.opt_transform, opt_state)
+                    return rand_log_density(full_state)
+
+                log_density = functools.partial(log_density, self, self.randomization.log_density)
+
+                self._sampler = langevin_sampler(self.observed_opt_state,
+                                                 self.observed_score_state,
+                                                 self.score_transform,
+                                                 self.opt_transform,
+                                                 projection,
+                                                 grad_log_density,
+                                                 log_density)
+            else:
+
+                # compute implied mean and covariance
+
+                cov, prec = self.randomization.cov_prec
+                prec_array = len(np.asarray(prec).shape) == 2
+                opt_linear, opt_offset = self.opt_transform
+
+                if prec_array:
+                    cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
+                    cond_cov = np.linalg.inv(cond_precision)
+                    logdens_linear = cond_cov.dot(opt_linear.T.dot(prec))
+                else:
+                    cond_precision = opt_linear.T.dot(opt_linear) * prec
+                    cond_cov = np.linalg.inv(cond_precision)
+                    logdens_linear = cond_cov.dot(opt_linear.T) * prec
+
+                cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+
+                # need a log_density function
+                # the conditional density of opt variables
+                # given the score
+
+                def log_density(logdens_linear, offset, cond_prec, score, opt):
+                    if score.ndim == 1:
+                        mean_term = logdens_linear.dot(score.T + offset).T
+                    else:
+                        mean_term = logdens_linear.dot(score.T + offset[:, None]).T
+                    arg = opt + mean_term
+                    return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
+                log_density = functools.partial(log_density, logdens_linear, opt_offset, cond_precision)
+
+                # now make the constraints
+
+                # scaling constraints
+
+                I = np.identity(cond_cov.shape[0])
+                A_scaling = -I[self.scaling_slice]
+                b_scaling = np.zeros(A_scaling.shape[0])
+
+                A_subgrad = np.vstack([I[self.subgrad_slice],
+                                       -I[self.subgrad_slice]])
+                b_subgrad = np.hstack([inactive_lagrange,
+                                       inactive_lagrange])
+
+                linear_term = np.vstack([A_scaling, A_subgrad])
+                offset = np.hstack([b_scaling, b_subgrad])
+
+                affine_con = constraints(linear_term,
+                                         offset,
+                                         mean=cond_mean,
+                                         covariance=cond_cov)
+
+                logdens_transform = (logdens_linear, opt_offset)
+
+                self._sampler = affine_gaussian_sampler(affine_con,
+                                                        self.observed_opt_state,
+                                                        self.observed_score_state,
+                                                        log_density,
+                                                        logdens_transform,
+                                                        selection_info=self.selection_variable)  # should be signs and the subgradients we've conditioned on
+
+        return self._sampler
+
+    sampler = property(get_sampler, query.set_sampler)
+
+    def decompose_subgradient(self, condition=None, marginalize=None):
+        """
+        ADD DOCSTRING
+        condition and marginalize should be disjoint
+        """
+
+        p = self.penalty.shape[0]
+        condition_inactive = np.zeros(p, dtype=np.bool)
+
+        if condition is None:
+            condition = np.zeros(p, dtype=np.bool)
+
+        if marginalize is None:
+            marginalize = np.zeros(p, dtype=np.bool)
+            marginalize[self._overall] = 0
+
+        if np.any(condition * marginalize):
+            raise ValueError("cannot simultaneously condition and marginalize over a group's subgradient")
+
+        if not self._setup:
+            raise ValueError('setup_sampler should be called before using this function')
+
+        _inactive = self._inactive
+
+        limits_marginal = np.zeros_like(_inactive, np.float)
+
+        condition_inactive = _inactive * condition
+        moving_inactive = _inactive * ~(marginalize + condition)
+        margin_inactive = _inactive * marginalize
+
+        limits_marginal = self._lagrange
+        if np.asarray(self._lagrange).shape in [(), (1,)]:
+            limits_marginal = np.zeros_like(_inactive) * self._lagrange
+
+        opt_linear, opt_offset = self.opt_transform
+
+        new_linear = np.zeros((opt_linear.shape[0], (self._active.sum() +
+                                                     self._unpenalized.sum() +
+                                                     moving_inactive.sum())))
+        new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice]
+        new_linear[:, self.unpenalized_slice] = opt_linear[:, self.unpenalized_slice]
+
+        inactive_moving_idx = np.nonzero(moving_inactive)[0]
+        subgrad_idx = range(self._active.sum() + self._unpenalized.sum(),
+                            self._active.sum() + self._unpenalized.sum() +
+                            moving_inactive.sum())
+        for _i, _s in zip(inactive_moving_idx, subgrad_idx):
+            new_linear[_i, _s] = 1.
+
+        observed_opt_state = self.observed_opt_state[:(self._active.sum() +
+                                                       self._unpenalized.sum() +
+                                                       moving_inactive.sum())]
+        observed_opt_state[subgrad_idx] = self.initial_subgrad[moving_inactive]
+
+        condition_linear = np.zeros((opt_linear.shape[0], (self._active.sum() +
+                                                           self._unpenalized.sum() +
+                                                           condition_inactive.sum())))
+
+        new_offset = opt_offset + 0.
+        new_offset[condition_inactive] += self.initial_subgrad[condition_inactive]
+        new_opt_transform = (new_linear, new_offset)
+
+        if not hasattr(self.randomization, "cov_prec") or marginalize.sum():  # use Langevin -- not gaussian
+
+            def _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive):
+                return (np.divide(_pdf(full_state_plus) - _pdf(full_state_minus),
+                                  _cdf(full_state_plus) - _cdf(full_state_minus)))[margin_inactive]
+
+            def new_grad_log_density(query,
+                                     limits_marginal,
+                                     margin_inactive,
+                                     _cdf,
+                                     _pdf,
+                                     new_opt_transform,
+                                     deriv_log_dens,
+                                     score_state,
+                                     opt_state):
+
+                full_state = score_state + reconstruct_opt(new_opt_transform, opt_state)
+
+                p = query.penalty.shape[0]
+                weights = np.zeros(p)
+
+                if margin_inactive.sum() > 0:
+                    full_state_plus = full_state + limits_marginal * margin_inactive
+                    full_state_minus = full_state - limits_marginal * margin_inactive
+                    weights[margin_inactive] = _fraction(_cdf, _pdf, full_state_plus, full_state_minus, margin_inactive)
+                weights[~margin_inactive] = deriv_log_dens(full_state)[~margin_inactive]
+                return -opt_linear.T.dot(weights)
+
+            new_grad_log_density = functools.partial(new_grad_log_density,
+                                                     self,
+                                                     limits_marginal,
+                                                     margin_inactive,
+                                                     self.randomization._cdf,
+                                                     self.randomization._pdf,
+                                                     new_opt_transform,
+                                                     self.randomization._derivative_log_density)
+
+            def new_log_density(query,
+                                limits_marginal,
+                                margin_inactive,
+                                _cdf,
+                                _pdf,
+                                new_opt_transform,
+                                log_dens,
+                                score_state,
+                                opt_state):
+
+                full_state = score_state + reconstruct_opt(new_opt_transform, opt_state)
+
+                full_state = np.atleast_2d(full_state)
+                p = query.penalty.shape[0]
+                logdens = np.zeros(full_state.shape[0])
+
+                if margin_inactive.sum() > 0:
+                    full_state_plus = full_state + limits_marginal * margin_inactive
+                    full_state_minus = full_state - limits_marginal * margin_inactive
+                    logdens += np.sum(np.log(_cdf(full_state_plus) - _cdf(full_state_minus))[:, margin_inactive],
+                                      axis=1)
+
+                logdens += log_dens(full_state[:, ~margin_inactive])
+
+                return np.squeeze(logdens)  # should this be negative to match the gradient log density?
+
+            new_log_density = functools.partial(new_log_density,
+                                                self,
+                                                limits_marginal,
+                                                margin_inactive,
+                                                self.randomization._cdf,
+                                                self.randomization._pdf,
+                                                new_opt_transform,
+                                                self.randomization._log_density)
+
+            new_lagrange = self.penalty.weights[moving_inactive]
+            new_dual = rr.weighted_l1norm(new_lagrange, lagrange=1.).conjugate
+
+            def new_projection(dual,
+                               noverall,
+                               opt_state):
+                new_state = opt_state.copy()
+                new_state[self.scaling_slice] = np.maximum(opt_state[self.scaling_slice], 0)
+                new_state[noverall:] = dual.bound_prox(opt_state[noverall:])
+                return new_state
+
+            new_projection = functools.partial(new_projection,
+                                               new_dual,
+                                               self._overall.sum())
+
+            new_selection_variable = copy(self.selection_variable)
+            new_selection_variable['subgradient'] = self.observed_opt_state[condition_inactive]
+
+            self.sampler = langevin_sampler(observed_opt_state,
+                                            self.observed_score_state,
+                                            self.score_transform,
+                                            new_opt_transform,
+                                            new_projection,
+                                            new_grad_log_density,
+                                            new_log_density,
+                                            selection_info=(self, new_selection_variable))
+        else:
+
+            cov, prec = self.randomization.cov_prec
+            prec_array = len(np.asarray(prec).shape) == 2
+
+            if prec_array:
+                cond_precision = new_linear.T.dot(prec.dot(new_linear))
+                cond_cov = np.linalg.inv(cond_precision)
+                logdens_linear = cond_cov.dot(new_linear.T.dot(prec))
+            else:
+                cond_precision = new_linear.T.dot(new_linear) * prec
+                cond_cov = np.linalg.inv(cond_precision)
+                logdens_linear = cond_cov.dot(new_linear.T) * prec
+
+            cond_mean = -logdens_linear.dot(self.observed_score_state + new_offset)
+
+            def log_density(logdens_linear, offset, cond_prec, score, opt):
+                if score.ndim == 1:
+                    mean_term = logdens_linear.dot(score.T + offset).T
+                else:
+                    mean_term = logdens_linear.dot(score.T + offset[:, None]).T
+                arg = opt + mean_term
+                return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
+            log_density = functools.partial(log_density, logdens_linear, new_offset, cond_precision)
+
+            # now make the constraints
+
+            # scaling constraints
+
+            # the scalings are first set of opt variables
+            # then unpenalized
+            # then the subgradients
+
+            I = np.identity(cond_cov.shape[0])
+            A_scaling = -I[self.scaling_slice]
+            b_scaling = np.zeros(A_scaling.shape[0])
+
+            A_subgrad = np.vstack([I[self._overall.sum():],
+                                   -I[self._overall.sum():]])
+
+            inactive_lagrange = self.penalty.weights[moving_inactive]
+            b_subgrad = np.hstack([inactive_lagrange,
+                                   inactive_lagrange])
+
+            linear_term = np.vstack([A_scaling, A_subgrad])
+            offset = np.hstack([b_scaling, b_subgrad])
+
+            affine_con = constraints(linear_term,
+                                     offset,
+                                     mean=cond_mean,
+                                     covariance=cond_cov)
+
+            logdens_transform = (logdens_linear, new_offset)
+            self._sampler = affine_gaussian_sampler(affine_con,
+                                                    observed_opt_state,
+                                                    self.observed_score_state,
+                                                    log_density,
+                                                    logdens_transform,
+                                                    selection_info=self.selection_variable)  # should be signs and the subgradients we've conditioned on
+
+
+class glm_lasso(lasso_view):
+    def setup_sampler(self, scaling=1., solve_args={'min_its': 50, 'tol': 1.e-10}):
+        bootstrap_score = pairs_bootstrap_glm(self.loss,
+                                              self.selection_variable['variables'],
+                                              beta_full=self._beta_full,
+                                              inactive=~self.selection_variable['variables'])[0]
+
+        return bootstrap_score
+
+
+class glm_lasso_parametric(lasso_view):
+    # this setup_sampler returns only the active set
+
+    def setup_sampler(self):
+        return self.selection_variable['variables']
+
+
+class fixedX_lasso(lasso_view):
+    def __init__(self, X, Y, epsilon, penalty, randomization, solve_args={'min_its': 50, 'tol': 1.e-10}):
+        loss = glm.gaussian(X, Y)
+        lasso_view.__init__(self,
+                            loss,
+                            epsilon,
+                            penalty,
+                            randomization,
+                            solve_args=solve_args)
+
+    def setup_sampler(self):
+        X, Y = self.loss.data
+
+        bootstrap_score = resid_bootstrap(self.loss,
+                                          self.selection_variable['variables'],
+                                          ~self.selection_variable['variables'])[0]
+        return bootstrap_score
+
+
+##### The class for users
+
+class lasso(object):
+    r"""
+    A class for the LASSO for post-selection inference.
+    The problem solved is
+    .. math::
+        \text{minimize}_{\beta} \frac{1}{2n} \|y-X\beta\|^2_2 +
+            \lambda \|\beta\|_1 - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2
+    where $\lambda$ is `lam`, $\omega$ is a randomization generated below
+    and the last term is a small ridge penalty.
+    """
+
+    def __init__(self,
+                 loglike,
+                 feature_weights,
+                 ridge_term,
+                 randomizer_scale,
+                 randomizer='gaussian',
+                 parametric_cov_estimator=False,
+                 perturb=None):
+        r"""
+        Create a new post-selection object for the LASSO problem
+        Parameters
+        ----------
+        loglike : `regreg.smooth.glm.glm`
+            A (negative) log-likelihood as implemented in `regreg`.
+        feature_weights : np.ndarray
+            Feature weights for L-1 penalty. If a float,
+            it is brodcast to all features.
+        ridge_term : float
+            How big a ridge term to add?
+        randomizer_scale : float
+            Scale for IID components of randomization.
+        randomizer : str (optional)
+            One of ['laplace', 'logistic', 'gaussian']
+        """
+
+        self.loglike = loglike
+        self.nfeature = p = self.loglike.shape[0]
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+        self.feature_weights = np.asarray(feature_weights)
+
+        self.parametric_cov_estimator = parametric_cov_estimator
+
+        if randomizer == 'laplace':
+            self.randomizer = randomization.laplace((p,), scale=randomizer_scale)
+        elif randomizer == 'gaussian':
+            self.randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
+        elif randomizer == 'logistic':
+            self.randomizer = randomization.logistic((p,), scale=randomizer_scale)
+
+        self.ridge_term = ridge_term
+
+        self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
+
+        self._initial_omega = perturb
+
+    def fit(self,
+            solve_args={'tol': 1.e-12, 'min_its': 50},
+            perturb=None,
+            nboot=1000):
+        """
+        Fit the randomized lasso using `regreg`.
+        Parameters
+        ----------
+        solve_args : keyword args
+             Passed to `regreg.problems.simple_problem.solve`.
+        Returns
+        -------
+        signs : np.float
+             Support and non-zero signs of randomized lasso solution.
+
+        """
+
+        if perturb is not None:
+            self._initial_omega = perturb
+
+        p = self.nfeature
+        if self.parametric_cov_estimator == True:
+            self._view = glm_lasso_parametric(self.loglike, self.ridge_term, self.penalty, self.randomizer)
+        else:
+            self._view = glm_lasso(self.loglike, self.ridge_term, self.penalty, self.randomizer)
+        self._view.solve(nboot=nboot, perturb=self._initial_omega, solve_args=solve_args)
+
+        self.signs = np.sign(self._view.initial_soln)
+        self.selection_variable = self._view.selection_variable
+        return self.signs
+
+    def decompose_subgradient(self,
+                              condition=None,
+                              marginalize=None):
+        """
+        Marginalize over some if inactive part of subgradient
+        if applicable.
+        Parameters
+        ----------
+        condition : np.bool
+             Which groups' subgradients should we condition on.
+        marginalize : np.bool
+             Which groups' subgradients should we marginalize over.
+        Returns
+        -------
+        None
+        """
+
+        if not hasattr(self, "_view"):
+            raise ValueError("fit method should be run first")
+        self._view.decompose_subgradient(condition=condition,
+                                         marginalize=marginalize)
+
+    def summary(self,
+                selected_features,
+                parameter=None,
+                level=0.9,
+                ndraw=10000,
+                burnin=2000,
+                compute_intervals=False,
+                bootstrap_sampler=False,
+                subset=None):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+        Parameters
+        ----------
+        selected_features : np.bool
+            Binary encoding of which features to use in final
+            model and targets.
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+        level : float
+            Confidence level.
+        ndraw : int (optional)
+            Defaults to 1000.
+        burnin : int (optional)
+            Defaults to 1000.
+        bootstrap : bool
+            Use wild bootstrap instead of Gaussian plugin.
+        """
+        if not hasattr(self, "_view"):
+            raise ValueError('run `fit` method before producing summary.')
+
+        if parameter is None:
+            parameter = np.zeros(self.loglike.shape[0])
+
+        if np.asarray(selected_features).dtype != np.bool:
+            raise ValueError('selected_features should be a boolean array')
+
+        unpenalized_mle = restricted_estimator(self.loglike, selected_features)
+
+        if self.parametric_cov_estimator == False:
+            n = self.loglike.data[0].shape[0]
+            form_covariances = glm_nonparametric_bootstrap(n, n)
+            boot_target, boot_target_observed = pairs_bootstrap_glm(self.loglike, selected_features, inactive=None)
+            target_info = boot_target
+        else:
+            target_info = (selected_features, np.identity(unpenalized_mle.shape[0]))
+            form_covariances = glm_parametric_covariance(self.loglike)
+
+        opt_samplers = []
+        for q in [self._view]:
+            cov_info = q.setup_sampler()
+            if self.parametric_cov_estimator == False:
+                target_cov, score_cov = form_covariances(target_info,
+                                                         cross_terms=[cov_info],
+                                                         nsample=q.nboot)
+            else:
+                target_cov, score_cov = form_covariances(target_info,
+                                                         cross_terms=[cov_info])
+            opt_samplers.append(q.sampler)
+
+        opt_samples = [opt_sampler.sample(ndraw,
+                                          burnin) for opt_sampler in opt_samplers]
+
+        if subset is not None:
+            target_cov = target_cov[subset][:, subset]
+            score_cov = score_cov[subset]
+            unpenalized_mle = unpenalized_mle[subset]
+
+        pivots = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov, parameter=parameter,
+                                                     sample=opt_samples[0])
+        if not np.all(parameter == 0):
+            pvalues = opt_samplers[0].coefficient_pvalues(unpenalized_mle, target_cov, score_cov,
+                                                          parameter=np.zeros_like(parameter), sample=opt_samples[0])
+        else:
+            pvalues = pivots
+
+        intervals = None
+        if compute_intervals:
+            intervals = opt_samplers[0].confidence_intervals(unpenalized_mle, target_cov, score_cov,
+                                                             sample=opt_samples[0])
+
+        return pivots, pvalues, intervals
+
+    @staticmethod
+    def gaussian(X,
+                 Y,
+                 feature_weights,
+                 sigma=1.,
+                 parametric_cov_estimator=False,
+                 quadratic=None,
+                 ridge_term=None,
+                 randomizer_scale=None,
+                 randomizer='gaussian',
+                 perturb=None):
+        r"""
+        Squared-error LASSO with feature weights.
+        Objective function (before randomizer) is
+        $$
+        \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+        where $\lambda$ is `feature_weights`. The ridge term
+        is determined by the Hessian and `np.std(Y)` (scaled by $\sqrt{n/(n-1)}$) by default,
+        as is the randomizer scale.
+        Parameters
+        ----------
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+        Y : ndarray
+            Shape (n,) -- the response.
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+        sigma : float (optional)
+            Noise variance. Set to 1 if `covariance_estimator` is not None.
+            This scales the loglikelihood by `sigma**(-2)`.
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+        ridge_term : float
+            How big a ridge term to add?
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+        Returns
+        -------
+        L : `selection.randomized.convenience.lasso`
+
+        """
+
+        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic)
+        n, p = X.shape
+
+        mean_diag = np.mean((X ** 2).sum(0))
+        if ridge_term is None:
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
+
+        return lasso(loglike,
+                     np.asarray(feature_weights) / sigma ** 2,
+                     ridge_term,
+                     randomizer_scale,
+                     randomizer=randomizer,
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     perturb=perturb)
+
+    @staticmethod
+    def logistic(X,
+                 successes,
+                 feature_weights,
+                 trials=None,
+                 parametric_cov_estimator=False,
+                 quadratic=None,
+                 ridge_term=None,
+                 randomizer='gaussian',
+                 randomizer_scale=None,
+                 perturb=None):
+        r"""
+        Logistic LASSO with feature weights.
+        Objective function is
+        $$
+        \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+        where $\ell$ is the negative of the logistic
+        log-likelihood (half the logistic deviance)
+        and $\lambda$ is `feature_weights`.
+        Parameters
+        ----------
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y.
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+        ridge_term : float
+            How big a ridge term to add?
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+        Returns
+        -------
+        L : `selection.randomized.convenience.lasso`
+
+        """
+        n, p = X.shape
+
+        loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic)
+
+        mean_diag = np.mean((X ** 2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5
+
+        return lasso(loglike, feature_weights,
+                     ridge_term,
+                     randomizer_scale,
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     randomizer=randomizer,
+                     perturb=perturb)
+
+    @staticmethod
+    def coxph(X,
+              times,
+              status,
+              feature_weights,
+              parametric_cov_estimator=False,
+              quadratic=None,
+              ridge_term=None,
+              randomizer='gaussian',
+              randomizer_scale=None,
+              perturb=None):
+        r"""
+        Cox proportional hazards LASSO with feature weights.
+        Objective function is
+        $$
+        \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+        where $\ell^{\text{Cox}}$ is the
+        negative of the log of the Cox partial
+        likelihood and $\lambda$ is `feature_weights`.
+        Uses Efron's tie breaking method.
+        Parameters
+        ----------
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+        times : ndarray
+            Shape (n,) -- the survival times.
+        status : ndarray
+            Shape (n,) -- the censoring status.
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+        covariance_estimator : optional
+            If None, use the parameteric
+            covariance estimate of the selected model.
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+        ridge_term : float
+            How big a ridge term to add?
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+        Returns
+        -------
+        L : `selection.randomized.convenience.lasso`
+
+        """
+        loglike = coxph_obj(X, times, status, quadratic=quadratic)
+
+        # scale for randomization seems kind of meaningless here...
+
+        mean_diag = np.mean((X ** 2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
+
+        return lasso(loglike,
+                     feature_weights,
+                     ridge_term,
+                     randomizer_scale,
+                     randomizer=randomizer,
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     perturb=perturb)
+
+    @staticmethod
+    def poisson(X,
+                counts,
+                feature_weights,
+                parametric_cov_estimator=False,
+                quadratic=None,
+                ridge_term=None,
+                randomizer_scale=None,
+                randomizer='gaussian',
+                perturb=None):
+        r"""
+        Poisson log-linear LASSO with feature weights.
+        Objective function is
+        $$
+        \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+        where $\ell^{\text{Poisson}}$ is the negative
+        of the log of the Poisson likelihood (half the deviance)
+        and $\lambda$ is `feature_weights`.
+        Parameters
+        ----------
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+        counts : ndarray
+            Shape (n,) -- the response.
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+        ridge_term : float
+            How big a ridge term to add?
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+        randomizer : str
+            One of ['laplace', 'logistic', 'gaussian']
+        Returns
+        -------
+        L : `selection.randomized.convenience.lasso`
+
+        """
+        n, p = X.shape
+        loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
+
+        # scale for randomizer seems kind of meaningless here...
+
+        mean_diag = np.mean((X ** 2).sum(0))
+
+        if ridge_term is None:
+            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
+
+        return lasso(loglike,
+                     feature_weights,
+                     ridge_term,
+                     randomizer_scale,
+                     randomizer=randomizer,
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     perturb=perturb)
+
+    @staticmethod
+    def sqrt_lasso(X,
+                   Y,
+                   feature_weights,
+                   quadratic=None,
+                   parametric_cov_estimator=False,
+                   sigma_estimate='truncated',
+                   solve_args={'min_its': 200},
+                   randomizer_scale=None,
+                   perturb=None):
+        r"""
+        Use sqrt-LASSO to choose variables.
+        Objective function is
+        $$
+        \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i|
+        $$
+        where $\lambda$ is `feature_weights`. After solving the problem
+        treat as if `gaussian` with implied variance and choice of
+        multiplier. See arxiv.org/abs/1504.08031 for details.
+        Parameters
+        ----------
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+        Y : ndarray
+            Shape (n,) -- the response.
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+        covariance : str
+            One of 'parametric' or 'sandwich'. Method
+            used to estimate covariance for inference
+            in second stage.
+        sigma_estimate : str
+            One of 'truncated' or 'OLS'. Method
+            used to estimate $\sigma$ when using
+            parametric covariance.
+        solve_args : dict
+            Arguments passed to solver.
+        ridge_term : float
+            How big a ridge term to add?
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+        Returns
+        -------
+        L : `selection.randomized.convenience.lasso`
+
+        Notes
+        -----
+        Unlike other variants of LASSO, this
+        solves the problem on construction as the active
+        set is needed to find equivalent gaussian LASSO.
+        Assumes parametric model is correct for inference,
+        i.e. does not accept a covariance estimator.
+        """
+
+        n, p = X.shape
+
+        if np.asarray(feature_weights).shape == ():
+            feature_weights = np.ones(loglike.shape) * feature_weights
+
+        mean_diag = np.mean((X ** 2).sum(0))
+        if ridge_term is None:
+            ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.sqrt(n / (n - 1.))
+
+        if perturb is None:
+            perturb = np.random.standard_normal(p) * randomizer_scale
+
+        randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0)  # a ridge + linear term
+
+        if quadratic is not None:
+            totalQ = randomQ + quadratic
+        else:
+            totalQ = randomQ
+
+        soln, sqrt_loss = solve_sqrt_lasso(X,
+                                           Y,
+                                           weights=feature_weights,
+                                           quadratic=totalQ,
+                                           solve_args=solve_args,
+                                           force_fat=True)
+
+        denom = np.linalg.norm(Y - X.dot(soln))
+
+        loglike = rr.glm.gaussian(X, Y)
+
+        raise NotImplementedError(
+            'lasso_view needs to be modified so that the initial randomization can be set at construction time')
+
+        return lasso(loglike,
+                     np.asarray(feature_weights) * denom,
+                     ridge_term * denom,
+                     randomizer_scale * denom,
+                     randomizer='gaussian',
+                     parametric_cov_estimator=parametric_cov_estimator,
+                     perturb=perturb)
+
+
diff --git a/selection/randomized/slope.py b/selection/randomized/slope.py
index 57f166e03..2a8de12cb 100644
--- a/selection/randomized/slope.py
+++ b/selection/randomized/slope.py
@@ -21,13 +21,13 @@
 
 from .randomization import randomization
 from .base import restricted_estimator
-from .lasso import highdim
+from .lasso import lasso
 from .query import (query,
                     multiple_queries,
                     langevin_sampler,
                     affine_gaussian_sampler)
 
-class slope(highdim):
+class slope(lasso):
 
     def __init__(self,
                  loglike,
@@ -196,7 +196,7 @@ def log_density(logdens_linear, offset, cond_prec, score, opt):
 
     # Targets of inference
     # and covariance with score representation
-    # are same as highdim LASSO
+    # are same as LASSO
 
     @staticmethod
     def gaussian(X,
diff --git a/selection/randomized/tests/test_full_lasso.py b/selection/randomized/tests/test_full_lasso.py
index 4bd633dc6..8b8146a0a 100644
--- a/selection/randomized/tests/test_full_lasso.py
+++ b/selection/randomized/tests/test_full_lasso.py
@@ -2,7 +2,7 @@
 import nose.tools as nt
 
 import selection.randomized.lasso as L; reload(L)
-from selection.randomized.lasso import highdim, lasso
+from selection.randomized.lasso import lasso
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
@@ -11,7 +11,7 @@ def test_full_lasso(n=200, p=30, signal_fac=1.5, s=5, ndraw=5000, burnin=1000, s
     General LASSO -- 
     """
 
-    inst, const = gaussian_instance, highdim.gaussian
+    inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * np.log(p))
     X, Y, beta = inst(n=n,
                       p=p, 
diff --git a/selection/randomized/tests/test_highdim_lasso.py b/selection/randomized/tests/test_highdim_lasso.py
index 5fd3232f7..b6584e287 100644
--- a/selection/randomized/tests/test_highdim_lasso.py
+++ b/selection/randomized/tests/test_highdim_lasso.py
@@ -9,7 +9,7 @@
 from rpy2.robjects import numpy2ri
 rpy.r('library(selectiveInference)')
 
-from ..lasso import highdim 
+from ..lasso import lasso
 from ...tests.instance import gaussian_instance
 from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
 import matplotlib.pyplot as plt
@@ -19,7 +19,7 @@ def test_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, target='full'
     Compare to R randomized lasso
     """
 
-    inst, const = gaussian_instance, highdim.gaussian
+    inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * np.log(p))
     X, Y, beta = inst(n=n,
                       p=p, 
@@ -60,7 +60,7 @@ def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, s=5, sigma=3, full=Tru
     Compare to R randomized lasso
     """
 
-    inst, const = gaussian_instance, highdim.sqrt_lasso
+    inst, const = gaussian_instance, lasso.sqrt_lasso
     signal = np.sqrt(signal_fac * 2 * np.log(p))
     X, Y, beta = inst(n=n,
                       p=p, 
@@ -131,7 +131,7 @@ def test_compareR(n=200, p=10, signal=np.sqrt(4) * np.sqrt(2 * np.log(10)), s=5,
     Compare to R randomized lasso
     """
 
-    inst, const = gaussian_instance, highdim.gaussian
+    inst, const = gaussian_instance, lasso.gaussian
     X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=False, rho=0.2, sigma=sigma, random_signs=True)[:3]
 
     n, p = X.shape
diff --git a/selection/randomized/tests/test_modelQ.py b/selection/randomized/tests/test_modelQ.py
index a6622fd8a..e88522423 100644
--- a/selection/randomized/tests/test_modelQ.py
+++ b/selection/randomized/tests/test_modelQ.py
@@ -6,7 +6,7 @@
 import regreg.api as rr
 
 from ..modelQ import modelQ
-from ..lasso import highdim
+from ..lasso import lasso
 from ...tests.instance import gaussian_instance
 
 def test_modelQ():
@@ -19,7 +19,7 @@ def test_modelQ():
 
     lagrange = 5. * np.ones(p) * np.sqrt(n)
     perturb = np.random.standard_normal(p) * n
-    LH = highdim.gaussian(X, y, lagrange)
+    LH = lasso.gaussian(X, y, lagrange)
     LH.fit(perturb=perturb, solve_args={'min_its':1000})
 
     LQ = modelQ(X.T.dot(X), X, y, lagrange)
diff --git a/selection/randomized/tests/test_selective_MLE_high.py b/selection/randomized/tests/test_selective_MLE_high.py
index a773d9340..71fff1671 100644
--- a/selection/randomized/tests/test_selective_MLE_high.py
+++ b/selection/randomized/tests/test_selective_MLE_high.py
@@ -5,7 +5,7 @@
 #rpy.r('library(selectiveInference)')
 
 import selection.randomized.lasso as L; reload(L)
-from selection.randomized.lasso import highdim 
+from selection.randomized.lasso import lasso
 from selection.tests.instance import gaussian_instance
 import matplotlib.pyplot as plt
 
@@ -14,7 +14,7 @@ def test_full_targets(n=2000, p=200, signal_fac=0.5, s=5, sigma=3, rho=0.4, rand
     Compare to R randomized lasso
     """
 
-    inst, const = gaussian_instance, highdim.gaussian
+    inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
     X, Y, beta = inst(n=n,
                       p=p, 
@@ -57,7 +57,7 @@ def test_selected_targets(n=2000, p=200, signal_fac=1.5, s=5, sigma=3, rho=0.4,
     Compare to R randomized lasso
     """
 
-    inst, const = gaussian_instance, highdim.gaussian
+    inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
     X, Y, beta = inst(n=n,
                       p=p, 
diff --git a/selection/randomized/tests/test_selective_MLE_onedim.py b/selection/randomized/tests/test_selective_MLE_onedim.py
index 743781718..970e5dc34 100644
--- a/selection/randomized/tests/test_selective_MLE_onedim.py
+++ b/selection/randomized/tests/test_selective_MLE_onedim.py
@@ -5,7 +5,7 @@
 import matplotlib.pyplot as plt
 import nose.tools as nt
 
-from ..lasso import highdim
+from ..lasso import lasso
 from ...tests.instance import gaussian_instance
 from statsmodels.distributions import ECDF
 
@@ -17,11 +17,11 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=1, randomizer_scale=1):
         X /= np.sqrt((X**2).sum(0))[None, :]
         Y = X.dot(beta) + sigma * np.random.standard_normal(n)
 
-        conv = highdim.gaussian(X,
-                                Y,
-                                W * np.ones(X.shape[1]),
-                                randomizer_scale=randomizer_scale * sigma,
-                                ridge_term=0.)
+        conv = lasso.gaussian(X,
+                              Y,
+                              W * np.ones(X.shape[1]),
+                              randomizer_scale=randomizer_scale * sigma,
+                              ridge_term=0.)
 
         signs = conv.fit()
         nonzero = signs != 0
@@ -257,4 +257,4 @@ def solve_barrier_nonneg(conjugate_arg,
             step *= 2
 
     hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
-    return current, current_value, hess
\ No newline at end of file
+    return current, current_value, hess