diff --git a/src/genfunc.jl b/src/genfunc.jl index bbdc715..855f362 100644 --- a/src/genfunc.jl +++ b/src/genfunc.jl @@ -53,37 +53,6 @@ end # return sum(m.w * sigmoid.(m.α + m.β * x)) # end -function gen_data_GP(n::Int, m::Int; ℓ = 1, n0 = 5, prediction = false, σ = 0.0) - # x = rand(n) * 2 .- 1 - x0 = rand(n0) * 2 .- 1 - K0 = gen_kern(x0) - f = rand(MvNormal(K0)) - x = range(-1, 1, length = n) - K1 = gen_kern(x) - K01 = gen_kern(x0, x) - if !prediction - # if without prediction, return here - # return x0, f - return x, rand(MvNormal(K1 + sqrt(eps()) * 1.0I ), m) - else - # by prediction - iK0 = inv(K0 + σ^2 * 1.0I) - μ = K01' * iK0 * f - Σ = K1 - K01' * iK0 * K01 - println(eigen(Σ).values) - # dist = MvNormal(K) - dist = MvNormal(μ, Symmetric(Σ)) - return x0, f, x, rand(dist, m) - end -end - -# Square Exponential -function sq_exp(x::AbstractVector{T}; ℓ = 1) where T <: AbstractFloat - K = gen_kern(x, ℓ) - ϵ = sqrt(eps()) - return rand(MvNormal(K + ϵ * 1.0I)) -end - """ gp(x; K) diff --git a/src/mono_test.jl b/src/mono_test.jl index 61b11ea..cf42d48 100644 --- a/src/mono_test.jl +++ b/src/mono_test.jl @@ -6,7 +6,11 @@ using LaTeXStrings using RCall +""" + gen_data_bowman() +Generate Curves for Monotonicity Test used in Bowman et al. (1998) +""" function gen_data_bowman(;n = 50, a = 0, σ = 0.1, plt = false, kw...) xs = range(0, 1, length = n) f(x, a) = 1 + x - a * exp(-1/2 * (x-0.5)^2 / 0.1^2) @@ -20,6 +24,11 @@ function gen_data_bowman(;n = 50, a = 0, σ = 0.1, plt = false, kw...) end end +""" + demo_data() + +Generate demo data for illustration. (Figure 6 in the paper) +""" function demo_data(; figfolder = "/tmp" # "../notes" ) figs = Plots.Plot[] @@ -34,6 +43,11 @@ function demo_data(; figfolder = "/tmp" # "../notes" savefig(joinpath(figfolder, "ex-ghosal.pdf")) end +""" + gen_mono_data() + +Generate monotonic curves, used for checking type I error under H0. (Table 4 and Figure 5 in the paper) +""" function gen_mono_data(; n = 100, σ = 0.1, equidistant = false) if equidistant x = range(0, 1, length = n) @@ -48,11 +62,11 @@ function gen_mono_data(; n = 100, σ = 0.1, equidistant = false) return x, m1, m2, m3, m4, m5 end -function gen_desc_data(; n = 100, σ = 0.1) - x, m1, m2, m3, m4, m5 = gen_mono_data(; n = n, σ = σ) - return x, -m1, -m2, -m3, -m4, -m5 -end +""" + gen_data_ghosal() +Generate curves used in Ghosal et al. (2000). +""" function gen_data_ghosal(; n = 100, σ = 0.1, plt = false) xs = rand(n) m10 = zeros(n) @@ -315,54 +329,6 @@ function mono_test_bootstrap_cs(x::AbstractVector{T}, y::AbstractVector{T}; nrep return pval, D end -function mono_test_bootstrap_sup(x::AbstractVector{T}, y::AbstractVector{T}; - nrep = 100, nμ = 10, - nfold = 2, fixJ = true, - nblock = 10, - kw...) where T <: AbstractFloat - n = length(y) - μs0 = 10.0 .^ (-6:0.5:6) - D1, μ0 = cv_mono_decomp_cs(x, y, ss = μs0, one_se_rule = true, fixJ = fixJ, nfold = nfold) - μ1 = D1.μ - J = D1.workspace.J - # μ0 < μ1 - if μ1 == μ0 - μ1 = maximum(μs0) - μ0 = minimum(μs0) - end - # μ0 is without 1se rule - μl = μ0 - 0.75 * (μ1 - μ0) - if μl < 0 - μl = μ0 * 0.25 - end - μr = μ1 #μ1 + 0.75 * (μ1 - μ0) - μs = vcat(range(μl, μr, length = nμ), μ1, μ0) - # μs = range(μ0, μ1, length = nμ) - pval = Float64[] - for (k, μ) in enumerate(μs) - D = mono_decomp_cs(x, y, s = μ, s_is_μ = true, J = J) - error = y - D.yhat - tobs = var(D.γdown) #/ var(y - D.yhat) - ts = zeros(nrep) - c = mean(D.yhat) / 2 - for i = 1:nrep - yi = construct_bootstrap_y(y, error, D.workspace.B, D.γup, c, nblock = nblock) - Di = mono_decomp_cs(x, yi, s = μ, s_is_μ = true, J = J) - # ts[i] = var(Di.γdown) / var(y - Di.yhat) - ts[i] = var(Di.γdown) - end - # pval[k] = sum(ts .> tobs) / nrep - append!(pval, sum(ts .> tobs) / nrep) - end - # return pval - # must include one - # if length(pval) == 0 - # @warn "$ρ is too small, and no mono decomp satisfies the condition" - # end - @debug "pval = $pval" - return [maximum(pval) < 0.05, pval[end] < 0.05, pval[end-1] < 0.05] -end - maxgap(x::AbstractVector{T}) where T <: Real = maximum(x) - minimum(x) ## aim for hete error, but if we can change different md decomposition method such that the md is homo, then no need (paper#12). @@ -416,93 +382,6 @@ function construct_bootstrap_y(y::AbstractVector{T}, e::AbstractVector{T}, B::Ab return construct_bootstrap_y(y, e, B * γ .+ c, nblock = nblock, σe = σe, debias_mean_yi = debias_mean_yi) end -function mono_test_bootstrap_supss(x::AbstractVector{T}, y::AbstractVector{T}; - nrep = 100, nμ = 10, nfold = 2, seed = rand(UInt64), - opstat::Union{String, Function} = var, - md_method = "single_lambda", - tol = 1e-4, - nblock = -1, - use_σ_from_ss = false, - debias_mean_yi = true, - kw... - ) where T <: Real - # for block index - idx = sortperm(x) - x = x[idx] - y = y[idx] - n = length(y) - res, μ0, μs0, errs, σerrs, yhat, yhatnew = cv_mono_decomp_ss(x, y; one_se_rule = true, nfold = nfold, seed = seed, method = md_method, tol = tol, kw...) - σe0 = std(y - yhat) - μ1 = res.μ - # μ0 < μ1 - # μ0 is not with 1se rule - if μ1 == μ0 - μ1 = maximum(μs0) - μ0 = minimum(μs0) - end - μl = μ0 - 0.75 * (μ1 - μ0) - if μl < 0 - # μl = μ0 * 0.25 - μl = min(cbrt(eps()), 0.25μ0) - end - μr = μ1#μ1 + 0.75 * (μ1 - μ0) - μs = vcat(range(μl, μr, length = nμ), μ1, μ0) - @debug "perform monotone test: μ_min = $μ0, μ_1se = $μ1" - @debug "construct composite hypothesis in the range [$μl, $μr]" - # μs = range(μ0, μ1, length = nμ) - pval = Float64[] - for (k, μ) in enumerate(μs) - D = mono_decomp_ss(res.workspace, x, y, res.λ, μ) - error = y - D.yhat - σe = std(error) - ts = zeros(nrep) - c = mean(D.yhat) / 2 - if isa(opstat, Function) - tobs = opstat(D.γdown) - else - tobs = sum((D.γdown .- c).^2) - end - @debug "σe = $σe" - for i = 1:nrep - yi = construct_bootstrap_y(y, error, D.workspace.B, D.γup, c, nblock = nblock, - σe = ifelse(use_σ_from_ss, σe0, σe), - debias_mean_yi = debias_mean_yi) - try - Di = mono_decomp_ss(res.workspace, x, yi, res.λ, μ, strict = true) - if isa(opstat, Function) - ts[i] = opstat(Di.γdown) - else - ts[i] = sum((Di.γdown .- c).^2) - end - catch - @warn "due to error in optimization, assign test statistic as Inf" - # TODO: is Inf reasonable? - # alternatively, exclude Inf when calculting p-value - ts[i] = Inf - end - # ts[i] = var(Di.γdown) / var(y - Di.yhat) - # savefig(scatter(x, yi), "/tmp/toy-$i.png") - # ts[i] = sum((Di.γdown .- c).^2) - end - # if k == 2 - # fig = histogram(ts[.!isinf.(ts)]) - # vline!(fig, [tobs]) - # savefig(fig, "/tmp/mu.png") - # end - # println(ts) - # println(tobs) - # pval[k] = sum(ts .> tobs) / nrep - append!(pval, sum(ts .> tobs) / nrep) - end - # return pval - # must include one - # if length(pval) == 0 - # @warn "$ρ is too small, and no mono decomp satisfies the condition" - # end - @debug "pval = $pval" - return [maximum(pval) < 0.05, pval[end] < 0.05, pval[end-1] < 0.05] -end - """ mono_test_bootstrap_ss(x, y) @@ -540,17 +419,3 @@ function mono_test_bootstrap_ss(x::AbstractVector{T}, y::AbstractVector{T}; nrep pval = (sum(ts .> tobs) + sum(ts .== tobs) * 0.5) / nrep return pval, D end - -function mono_test(x::AbstractVector{T}, y::AbstractVector{T}) where T <: Real - # x, y = gen_data(a = 0.15, σ = 0.025) - res, _ = cv_mono_decomp_cs(x, y, Js = 4:20, ss = 10.0 .^ (-6:0.5:-1), fixJ = false) - # scatter(x, y) - # plot!(x, res.yhat) - c = mean(res.yhat) / 2 - n = length(y) - σhat2 = var(y - res.yhat) / 4n - tobs = sum((res.γdown .- c).^2 / σhat2) - pval = 1 - cdf(Chisq(res.workspace.J), tobs) - # println("pval = $pval, tobs = $tobs, critical = $(quantile(Chisq(J), 0.95))") - return pval < 0.05 -end \ No newline at end of file diff --git a/test/test_genfunc.jl b/test/test_genfunc.jl index 4cc359a..d545558 100644 --- a/test/test_genfunc.jl +++ b/test/test_genfunc.jl @@ -5,7 +5,7 @@ using GaussianProcesses using Statistics @testset "GP" begin - @testset "kernel function" begin + @testset "square exponential kernel function" begin x = randn(1, 3) K = cov(SEIso(0.0, 0.0), x) K2 = zeros(3, 3) diff --git a/test/test_monotest.jl b/test/test_monotest.jl index 2ef8c0d..eeae945 100644 --- a/test/test_monotest.jl +++ b/test/test_monotest.jl @@ -27,6 +27,11 @@ end @test c[100] ≈ 3.05099 atol=1e-5 @test c[200] ≈ 3.07254 atol=1e-5 @test c[500] ≈ 3.10625 atol=1e-5 + @testset "critical value vs pvalue" begin + x = randn(100) + @test !MonotoneDecomposition.ghosal_S1n(x, x, c[100]) + @test MonotoneDecomposition.ghosal_S1n(x, x) >= 0.05 + end end @testset "experiments" begin