diff --git a/experiments/density_estimation.jl b/experiments/density_estimation.jl index d6163ae..2d0e78a 100644 --- a/experiments/density_estimation.jl +++ b/experiments/density_estimation.jl @@ -31,8 +31,10 @@ Random.seed!(0) # generate independent samples from Gaussian mixture function rand_gmm(n) indicator = rand(n) - out = (indicator .> p).*(sqrt(σ2_1)*randn(n) .+ μ1) - out += (indicator .≤ p).*(sqrt(σ2_2)*randn(n) .+ μ2) + # FIX: make sampling weights consistent with pdf(x) below: + # P(component 1) = p -> (μ1, σ2_1), P(component 2) = 1-p -> (μ2, σ2_2) + out = (indicator .≤ p).*(sqrt(σ2_1)*randn(n) .+ μ1) + out += (indicator .> p).*(sqrt(σ2_2)*randn(n) .+ μ2) end # this is our "calibration" data (iid samples from distribution) x_cal = rand_gmm(n) @@ -41,21 +43,26 @@ x_cal = rand_gmm(n) # the goal is to estimate P(x | x_cal) # we'll use a semidefinite model B = Semidefinite(n) -v(x) = k.([x], x_cal) +# FIX: v(x) must be an n-vector v(x) = (k(x, x_i))_i as in the paper. +# Using k.(x, x_cal) ensures we get a length-n vector (not a 1×n matrix). +v(x) = k.(x, x_cal) # Model: P(x | x_cal) = v(x)'*B*v(x) # maximum likelihood estimate # max P(x | x_cal) # take the -log likelihood: -objective = -1/n * sum([log(v(xi_cal)'*B*v(xi_cal)) for xi_cal in x_cal]) -# regularization term -objective += λ*(nuclearnorm(B) + 0.005*tr(B)) +z_cal = [v(xi_cal)'*B*v(xi_cal) for xi_cal in x_cal] +# FIX: avoid log(0) by enforcing strict positivity on training evaluations. +ϵ = 1e-9 +objective = -1/n * sum(log.(z_cal)) +# FIX: elastic-net style regularizer (paper): nuclear norm + Frobenius^2 +objective += λ*(nuclearnorm(B) + 0.01/2 * sumsquares(B)) # integral constraint # enforces integral of P(x | x_cal) over x = 1 # M is just the integral of the product of kernels M = s^2*exp.(-1/(4σ^2)*((x_cal .- x_cal').^2))*sqrt(σ^2*π) # closed form from kernel -constraints = [tr(B*M) == 1] +constraints = [tr(B*M) == 1, z_cal .>= ϵ] # solve! problem = minimize(objective, constraints) @@ -64,18 +71,13 @@ solve!(problem, Mosek.Optimizer) # we have our model! B_val = evaluate(B) -# ## Mode estimation -# x = Variable() -# obj_mode = sum([-(1/(2σ^2))*((x-x_cal[i])^2 + (x-x_cal[j])^2) for i = 1:n, j = 1:n]) -# prob_mode = maximize(obj_mode) -# solve!(prob_mode, Mosek.Optimizer) - - ## plot pdf_n(x, μ, σ2) = 1/(sqrt(2π*σ2))*exp(-(x-μ)^2 / (2σ2)) pdf(x) = p*pdf_n(x, μ1, σ2_1) + (1-p)*pdf_n(x, μ2, σ2_2) x_dense = collect(-4:0.01:4) -Plots.plot(x_dense, transpose.(v.(x_dense)) .* [B_val] .* v.(x_dense), label="learned",lw=2) +# FIX: plot the scalar quadratic form f(x)=v(x)'*B*v(x) (no elementwise broadcasting). +learned = [v(x)'*B_val*v(x) for x in x_dense] +Plots.plot(x_dense, learned, label="learned",lw=2) Plots.plot!(x_dense, pdf.(x_dense), label="ground truth", ylabel="p(x | cal)",lw=2) Plots.scatter!(x_cal, 0 .*x_cal, label="training points", markershape=:x, msw=2) \ No newline at end of file