Skip to content

Commit

Permalink
add more 2d kde handeling functions
Browse files Browse the repository at this point in the history
  • Loading branch information
njericha committed May 8, 2024
1 parent 122d701 commit 30cb2a7
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 5 deletions.
8 changes: 7 additions & 1 deletion Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.0"
manifest_format = "2.0"
project_hash = "49de629289277247d16c6846b32eb296b8facd17"
project_hash = "71f408cd0b90fec1446e7c41f51cc12750942e43"

[[deps.AbstractFFTs]]
deps = ["LinearAlgebra"]
Expand Down Expand Up @@ -1126,6 +1126,12 @@ deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
version = "1.0.3"

[[deps.TSVD]]
deps = ["Adapt", "LinearAlgebra"]
git-tree-sha1 = "c39caef6bae501e5607a6caf68dd9ac6e8addbcb"
uuid = "9449cd9e-2762-5aa3-a617-5413e99d722e"
version = "0.4.4"

[[deps.Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
Expand Down
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e"
Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
3 changes: 3 additions & 0 deletions docs/src/MatrixTensorFactor.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,11 @@ DEFAULT_ALPHA
```@docs
default_bandwidth
make_densities
make_densities2d
standardize_KDEs
standardize_2d_KDEs
filter_inner_percentile
filter_2d_inner_percentile
```

## 2D
Expand Down
2 changes: 1 addition & 1 deletion src/MatrixTensorFactor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export nnmtf_proxgrad_online

export DEFAULT_ALPHA, DEFAULT_N_SAMPLES, MIN_STEP, MAX_STEP # Constants
export IMPLIMENTED_OPTIONS, IMPLIMENTED_NORMALIZATIONS, IMPLIMENTED_PROJECTIONS, IMPLIMENTED_CRITERIA, IMPLIMENTED_STEPSIZES # implimented options
export default_bandwidth, make_densities, standardize_KDEs, filter_inner_percentile # Functions
export default_bandwidth, make_densities, standardize_KDEs, standardize_2d_KDEs, filter_inner_percentile, filter_2d_inner_percentile # Functions
export repeatcoord, kde2d, coordzip # 2d density estimation functions

include("utils.jl")
Expand Down
6 changes: 3 additions & 3 deletions src/densityestimation.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Filters elements so only the ones in the inner P percentile remain.
Filters elements so only the ones in the inner P percentile remain. See [`filter_2d_inner_percentile`](@ref).
"""
filter_inner_percentile(v, P) = filter(_inrange(v, P), v)

Expand Down Expand Up @@ -97,7 +97,7 @@ function make_densities(
#for (i, (measurement_values, b)) in enumerate(zip(data, bandwidths))
for (i, measurement_values) in enumerate(data)
# Estimate density based on the inner precentile to ignore outliers
#measurement_values = filter_inner_percentile(measurement_values, inner_percentile)
measurement_values = filter_inner_percentile(measurement_values, inner_percentile)
density_estimates[i] = kde(measurement_values)#, bandwidth=b)
end

Expand All @@ -124,7 +124,7 @@ const DEFAULT_N_SAMPLES = 64::Integer
"""
standardize_KDEs(KDEs::AbstractVector{UnivariateKDE}; n_samples=DEFAULT_N_SAMPLES,)
Resample the densities so they all are smapled from the same domain.
Resample the densities so they all are sampled from the same domain.
"""
function standardize_KDEs(KDEs; n_samples=DEFAULT_N_SAMPLES,)
a = minimum(d -> d.x[begin], KDEs) # smallest left endpoint
Expand Down
64 changes: 64 additions & 0 deletions src/densityestimation2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,70 @@
Holds functions relevent for making 2D kernel density estimation
"""

"""
Filters 2d elements so only the ones in the inner P percentile remain. See [`filter_inner_percentile`](@ref).
"""
filter_2d_inner_percentile(vs, P) = filter(_in2drange(vs, P), vs)

"""
Returns a function that checks if each coordinate is in the inner P percentile of the values in vs.
"""
function _in2drange(vs, P)
p_low = (100 - P) / 2
p_high = 100 - p_low
a, b = quantile([v[1] for v in vs], [p_low, p_high] ./ 100)
c, d = quantile([v[2] for v in vs], [p_low, p_high] ./ 100)
return x -> ((a x[1] b) && (c x[2] d))
end

# TODO extend this to arbitrary number of dimentions

"""
make_densities2d(s::Sink; kwargs...)
make_densities2d(s::Sink, domains::AbstractVector{<:AbstractVector}; kwargs...)
Similar to [`make_densities`](@ref) but performs the KDE on 2 measurements jointly.
"""
function make_densities2d(
data::AbstractVector{T};
inner_percentile::Integer=100,
#bandwidths::AbstractVector{<:Real}=default_bandwidth.(
# collect(eachmeasurement(s)),DEFAULT_ALPHA,inner_percentile),
) where T
# Argument Handeling: check inner_percentile is a percentile
(0 < inner_percentile <= 100) ||
ArgumentError("inner_percentile must be between 0 and 100, got $inner_percentile")

#(length(data[begin]) == 2) ||
# ArgumentError("should only be 2 measurements for the grain in s, got $length(getmeasurements(s))")

#data = filter_2d_inner_percentile(data)

KDE = kde(hcat(collect(array(g) for g in data)...)'; bandwidth=tuple(bandwidths...))
return KDE
end

"""
standardize_2d_KDEs(KDEs::AbstractVector{BivariateKDE}; n_samples=DEFAULT_N_SAMPLES,)
Resample the densities so they all are sampled from the same x and y coordinates.
"""
function standardize_2d_KDEs(KDEs; n_samples=DEFAULT_N_SAMPLES,)
a = minimum(f -> f.x[begin], KDEs) # smallest left endpoint
b = maximum(f -> f.x[end] , KDEs) # biggest right endpoint
c = minimum(f -> f.y[begin], KDEs) # smallest left endpoint
d = maximum(f -> f.y[end] , KDEs) # biggest right endpoint

x_new = range(a, b, length=n_samples) # make the (larger) x-values range
y_new = range(c, d, length=n_samples) # make the (larger) y-values range
KDEs_new = pdf.(KDEs, (x_new,), (y_new,)) # Resample the densities on the new domain.
# Note the second argument is a 1-tuple so that we can
# broadcast over the first argument only, i.e.
# KDEs_new[i] = pdf(KDEs[i], x_new)
return KDEs_new, x_new, y_new
end


"""
repeatcoord(coordinates, values)
Expand Down

0 comments on commit 30cb2a7

Please sign in to comment.