lopenguin · nameearly · Mar 4, 2026
diff --git a/experiments/density_estimation.jl b/experiments/density_estimation.jl
@@ -31,8 +31,10 @@ Random.seed!(0)
 # generate independent samples from Gaussian mixture
 function rand_gmm(n)
     indicator = rand(n)
-    out  = (indicator .> p).*(sqrt(σ2_1)*randn(n) .+ μ1)
-    out += (indicator .≤ p).*(sqrt(σ2_2)*randn(n) .+ μ2)
+    # FIX: make sampling weights consistent with pdf(x) below:
+    # P(component 1) = p -> (μ1, σ2_1), P(component 2) = 1-p -> (μ2, σ2_2)
+    out  = (indicator .≤ p).*(sqrt(σ2_1)*randn(n) .+ μ1)
+    out += (indicator .> p).*(sqrt(σ2_2)*randn(n) .+ μ2)
 end
 # this is our "calibration" data (iid samples from distribution)
 x_cal = rand_gmm(n)
@@ -41,21 +43,26 @@ x_cal = rand_gmm(n)
 # the goal is to estimate P(x | x_cal)
 # we'll use a semidefinite model
 B = Semidefinite(n)
-v(x) = k.([x], x_cal)
+# FIX: v(x) must be an n-vector v(x) = (k(x, x_i))_i as in the paper.
+# Using k.(x, x_cal) ensures we get a length-n vector (not a 1×n matrix).
+v(x) = k.(x, x_cal)
 # Model: P(x | x_cal) = v(x)'*B*v(x)
 
 # maximum likelihood estimate
 # max P(x | x_cal)
 # take the -log likelihood:
-objective  = -1/n * sum([log(v(xi_cal)'*B*v(xi_cal)) for xi_cal in x_cal])
-# regularization term
-objective += λ*(nuclearnorm(B) + 0.005*tr(B))
+z_cal = [v(xi_cal)'*B*v(xi_cal) for xi_cal in x_cal]
+# FIX: avoid log(0) by enforcing strict positivity on training evaluations.
+ϵ = 1e-9
+objective  = -1/n * sum(log.(z_cal))
+# FIX: elastic-net style regularizer (paper): nuclear norm + Frobenius^2
+objective += λ*(nuclearnorm(B) + 0.01/2 * sumsquares(B))
 
 # integral constraint
 # enforces integral of P(x | x_cal) over x = 1
 # M is just the integral of the product of kernels
 M = s^2*exp.(-1/(4σ^2)*((x_cal .- x_cal').^2))*sqrt(σ^2*π) # closed form from kernel
-constraints = [tr(B*M) == 1]
+constraints = [tr(B*M) == 1, z_cal .>= ϵ]
 
 # solve!
 problem = minimize(objective, constraints)
@@ -64,18 +71,13 @@ solve!(problem, Mosek.Optimizer)
 # we have our model!
 B_val = evaluate(B)
 
-# ## Mode estimation
-# x = Variable()
-# obj_mode = sum([-(1/(2σ^2))*((x-x_cal[i])^2 + (x-x_cal[j])^2) for i = 1:n, j = 1:n])
-# prob_mode = maximize(obj_mode)
-# solve!(prob_mode, Mosek.Optimizer)
-
-
 ## plot
 pdf_n(x, μ, σ2) = 1/(sqrt(2π*σ2))*exp(-(x-μ)^2 / (2σ2))
 pdf(x) = p*pdf_n(x, μ1, σ2_1) + (1-p)*pdf_n(x, μ2, σ2_2)
 
 x_dense = collect(-4:0.01:4)
-Plots.plot(x_dense, transpose.(v.(x_dense)) .* [B_val] .* v.(x_dense), label="learned",lw=2)
+# FIX: plot the scalar quadratic form f(x)=v(x)'*B*v(x) (no elementwise broadcasting).
+learned = [v(x)'*B_val*v(x) for x in x_dense]
+Plots.plot(x_dense, learned, label="learned",lw=2)
 Plots.plot!(x_dense, pdf.(x_dense), label="ground truth", ylabel="p(x | cal)",lw=2)
 Plots.scatter!(x_cal, 0 .*x_cal, label="training points", markershape=:x, msw=2)