Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
filt perf optimizations (#516)
Browse files Browse the repository at this point in the history
* create `si` only once, and reuse

* no inbounds faster for N < 24

* @generated `_small_filt_fir!`

- use Base.Cartesian.@ntuple

* inbounds if N > 18

major speedups for N > 18, much slower if inbounds for small N

* reorder store in `_filt_fir` and `_filt_iir`

>20% faster for fir

* maybe prevent undefvarerror

no need to assign values to enum

* small things

- eachindex stuff
- `==` to `===`  nothing
- `npairs = max(nz, np)`

* isone, iszero, etc.

* `filt` for `FIRFilter`: remove redundant definitions

* extract type instability from loop

* remove fft conv type instability

- in `unsafe_conv_kern_os!`
- probably no real impact

* correct `filt` arg types

* nicer loop

* inv plan

* cleanup filt_order

* use `muladd`s in filt.jl

* `_prod_freq`

* more `muladd`s

* `fill!`, `reverse`, don't broadcast scalar /

* micro-optimizations, cosmetics

- reduced exceptions in `@code_llvm` for `N < 18`
- `@noinline mul!`, `broadcast`, only for julia > v1.8
- name `SMALL_FILT_VECT_CUTOFF`, selective bounds check

* Compat noinline

* add test for error

(cherry picked from commit 9de4c0c)
wheeheee authored and martinholters committed Sep 13, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 1737340 commit 1e72e21
Showing 12 changed files with 257 additions and 293 deletions.
1 change: 1 addition & 0 deletions src/DSP.jl
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@ module DSP
using FFTW
using LinearAlgebra: mul!, rmul!
using IterTools: subsets
using Compat: Compat

export conv, deconv, filt, filt!, xcorr

20 changes: 9 additions & 11 deletions src/Filters/design.jl
Original file line number Diff line number Diff line change
@@ -86,9 +86,7 @@ function Chebyshev2(::Type{T}, n::Integer, ripple::Real) where {T<:Real}

ε = 1/sqrt(10^(convert(T, ripple)/10)-1)
p = chebyshev_poles(T, n, ε)
for i = 1:length(p)
p[i] = inv(p[i])
end
map!(inv, p, p)

z = zeros(Complex{T}, n-isodd(n))
k = one(T)
@@ -155,7 +153,7 @@ function asne(w::Number, k::Real)
# Eq. (50)
k = abs2(k/(1+sqrt(1-abs2(k))))
# Eq. (56)
w = 2*w/((1+k)*(1+sqrt(1-abs2(kold)*w^2)))
w = 2w / ((1 + k) * (1 + sqrt(muladd(-abs2(kold), w^2, 1))))
end
2*asin(w)/π
end
@@ -355,9 +353,9 @@ function transform_prototype(ftype::Bandpass, proto::ZeroPoleGain{:s})
newz = zeros(TR, 2*nz+np-ncommon)
newp = zeros(TR, 2*np+nz-ncommon)
for (oldc, newc) in ((p, newp), (z, newz))
for i = 1:length(oldc)
for i in eachindex(oldc)
b = oldc[i] * ((ftype.w2 - ftype.w1)/2)
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newc[2i-1] = b + pm
newc[2i] = b - pm
end
@@ -372,25 +370,25 @@ function transform_prototype(ftype::Bandstop, proto::ZeroPoleGain{:s})
k = proto.k
nz = length(z)
np = length(p)
npairs = nz+np-min(nz, np)
npairs = max(nz, np)
TR = Base.promote_eltype(z, p)
newz = Vector{TR}(undef, 2*npairs)
newp = Vector{TR}(undef, 2*npairs)

num = one(eltype(z))
for i = 1:nz
num *= -z[i]
b = (ftype.w2 - ftype.w1)/2/z[i]
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
b = (ftype.w2 - ftype.w1)/2z[i]
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newz[2i-1] = b - pm
newz[2i] = b + pm
end

den = one(eltype(p))
for i = 1:np
den *= -p[i]
b = (ftype.w2 - ftype.w1)/2/p[i]
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
b = (ftype.w2 - ftype.w1)/2p[i]
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newp[2i-1] = b - pm
newp[2i] = b + pm
end
113 changes: 54 additions & 59 deletions src/Filters/filt.jl
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@


## PolynomialRatio
_zerosi(f::PolynomialRatio{:z,T}, x::AbstractArray{S}) where {T,S} =
_zerosi(f::PolynomialRatio{:z,T}, ::AbstractArray{S}) where {T,S} =
zeros(promote_type(T, S), max(-firstindex(f.a), -firstindex(f.b)))

"""
@@ -35,7 +35,7 @@ selected based on the data and filter length.
filt(f::PolynomialRatio{:z}, x, si=_zerosi(f, x)) = filt(coefb(f), coefa(f), x, si)

## SecondOrderSections
_zerosi(f::SecondOrderSections{:z,T,G}, x::AbstractArray{S}) where {T,G,S} =
_zerosi(f::SecondOrderSections{:z,T,G}, ::AbstractArray{S}) where {T,G,S} =
zeros(promote_type(T, G, S), 2, length(f.biquads))

# filt! algorithm (no checking, returns si)
@@ -44,14 +44,14 @@ function _filt!(out::AbstractArray, si::AbstractArray{S,N}, f::SecondOrderSectio
g = f.g
biquads = f.biquads
n = length(biquads)
@inbounds for i = 1:size(x, 1)
@inbounds for i in axes(x, 1)
yi = x[i, col]
for fi = 1:n
biquad = biquads[fi]
xi = yi
yi = si[1, fi] + biquad.b0*xi
si[1, fi] = si[2, fi] + biquad.b1*xi - biquad.a1*yi
si[2, fi] = biquad.b2*xi - biquad.a2*yi
yi = muladd(biquad.b0, xi, si[1, fi])
si[1, fi] = muladd(biquad.a1, -yi, muladd(biquad.b1, xi, si[2, fi]))
si[2, fi] = muladd(biquad.b2, xi, -biquad.a2 * yi)
end
out[i, col] = yi*g
end
@@ -80,17 +80,17 @@ filt(f::SecondOrderSections{:z,T,G}, x::AbstractArray{S}, si=_zerosi(f, x)) wher
filt!(Array{promote_type(T, G, S)}(undef, size(x)), f, x, si)

## Biquad
_zerosi(f::Biquad{:z,T}, x::AbstractArray{S}) where {T,S} =
_zerosi(::Biquad{:z,T}, ::AbstractArray{S}) where {T,S} =
zeros(promote_type(T, S), 2)

# filt! algorithm (no checking, returns si)
function _filt!(out::AbstractArray, si1::Number, si2::Number, f::Biquad{:z},
x::AbstractArray, col::Int)
@inbounds for i = 1:size(x, 1)
@inbounds for i in axes(x, 1)
xi = x[i, col]
yi = si1 + f.b0*xi
si1 = si2 + f.b1*xi - f.a1*yi
si2 = f.b2*xi - f.a2*yi
yi = muladd(f.b0, xi, si1)
si1 = muladd(f.a1, -yi, muladd(f.b1, xi, si2))
si2 = muladd(f.b2, xi, -f.a2 * yi)
out[i, col] = yi
end
(si1, si2)
@@ -105,9 +105,8 @@ function filt!(out::AbstractArray, f::Biquad{:z}, x::AbstractArray,
(size(si, 1) != 2 || (N > 1 && Base.trailingsize(si, 2) != ncols)) &&
error("si must have two rows and 1 or nsignals columns")

initial_si = si
for col = 1:ncols
_filt!(out, initial_si[1, N > 1 ? col : 1], initial_si[2, N > 1 ? col : 1], f, x, col)
_filt!(out, si[1, N > 1 ? col : 1], si[2, N > 1 ? col : 1], f, x, col)
end
out
end
@@ -170,13 +169,13 @@ function filt!(out::AbstractVector, f::DF2TFilter{<:PolynomialRatio,<:Vector}, x
if n == 1
mul!(out, x, b[1])
else
@inbounds for i=1:length(x)
@inbounds for i in eachindex(x, out)
xi = x[i]
val = si[1] + b[1]*xi
val = muladd(b[1], xi, si[1])
for j=2:n-1
si[j-1] = si[j] + b[j]*xi - a[j]*val
si[j-1] = muladd(a[j], -val, muladd(b[j], xi, si[j]))
end
si[n-1] = b[n]*xi - a[n]*val
si[n-1] = muladd(b[n], xi, -a[n] * val)
out[i] = val
end
end
@@ -200,13 +199,13 @@ DF2TFilter(coef::Biquad{:z,T}, state::Vector{S}=zeros(T, 2)) where {T,S} =
function filt!(out::AbstractVector, f::DF2TFilter{<:Biquad,<:Vector}, x::AbstractVector)
length(x) != length(out) && throw(ArgumentError("out size must match x"))
si = f.state
(si[1], si[2]) =_filt!(out, si[1], si[2], f.coef, x, 1)
(si[1], si[2]) = _filt!(out, si[1], si[2], f.coef, x, 1)
out
end

# Variant that allocates the output
filt(f::DF2TFilter{<:FilterCoefficients{:z},S}, x::AbstractVector) where {S<:Array} =
filt!(Vector{eltype(S)}(undef, length(x)), f, x)
filt(f::DF2TFilter{<:FilterCoefficients{:z},<:Array{T}}, x::AbstractVector) where {T} =
filt!(Vector{T}(undef, length(x)), f, x)

# Fall back to SecondOrderSections
DF2TFilter(coef::FilterCoefficients{:z}) = DF2TFilter(convert(SecondOrderSections, coef))
@@ -346,7 +345,7 @@ filtfilt(f::PolynomialRatio{:z}, x) = filtfilt(coefb(f), coefa(f), x)
# response to a step function is steady state.
function filt_stepstate(b::Union{AbstractVector{T}, T}, a::Union{AbstractVector{T}, T}) where T<:Number
scale_factor = a[1]
if scale_factor != 1.0
if !isone(scale_factor)
a = a ./ scale_factor
b = b ./ scale_factor
end
@@ -362,8 +361,8 @@ function filt_stepstate(b::Union{AbstractVector{T}, T}, a::Union{AbstractVector{
as<sz && (a = copyto!(zeros(eltype(a), sz), a))

# construct the companion matrix A and vector B:
A = [-a[2:end] [I; zeros(T, 1, sz-2)]]
B = b[2:end] - a[2:end] * b[1]
A = [-a[2:end] Matrix{T}(I, sz-1, sz-2)]
B = @views @. muladd(a[2:end], -b[1], b[2:end])
# Solve si = A*si + B
# (I - A)*si = B
scale_factor \ (I - A) \ B
@@ -375,18 +374,18 @@ function filt_stepstate(f::SecondOrderSections{:z,T}) where T
y = one(T)
for i = 1:length(biquads)
biquad = biquads[i]
a1, a2, b0, b1, b2 = biquad.a1, biquad.a2, biquad.b0, biquad.b1, biquad.b2

# At steady state, we have:
# y = s1 + b0*x
# s1 = s2 + b1*x - a1*y
# s2 = b2*x - a2*y
# where x is the input and y is the output. Solving these
# equations yields the following.
si[1, i] = (-(biquad.a1 + biquad.a2)*biquad.b0 + biquad.b1 + biquad.b2)/
(1 + biquad.a1 + biquad.a2)*y
si[2, i] = (biquad.a1*biquad.b2 - biquad.a2*(biquad.b0 + biquad.b1) + biquad.b2)/
(1 + biquad.a1 + biquad.a2)*y
y *= (biquad.b0 + biquad.b1 + biquad.b2)/(1 + biquad.a1 + biquad.a2)
den = (1 + a1 + a2)
si[1, i] = muladd((a1 + a2), -b0, b1 + b2) / den * y
si[2, i] = muladd(a1, b2, muladd(-a2, (b0 + b1), b2)) / den * y
y *= (b0 + b1 + b2) / den
end
si
end
@@ -397,8 +396,8 @@ end
Apply filter or filter coefficients `h` along the first dimension
of array `x` using a naïve time-domain algorithm
"""
function tdfilt(h::AbstractVector, x::AbstractArray{T}) where T<:Real
filt!(Array{T}(undef, size(x)), h, ones(eltype(h), 1), x)
function tdfilt(h::AbstractVector{H}, x::AbstractArray{T}) where {H,T<:Real}
filt(h, one(H), x)
end

"""
@@ -407,12 +406,12 @@ end
Like `tdfilt`, but writes the result into array `out`. Output array `out` may
not be an alias of `x`, i.e. filtering may not be done in place.
"""
function tdfilt!(out::AbstractArray, h::AbstractVector, x::AbstractArray)
filt!(out, h, ones(eltype(h), 1), x)
function tdfilt!(out::AbstractArray, h::AbstractVector{H}, x::AbstractArray) where H
filt!(out, h, one(H), x)
end

filt(h::AbstractArray, x::AbstractArray) =
filt!(Array{eltype(x)}(undef, size(x)), h, x)
filt(h::AbstractVector{H}, x::AbstractArray{T}) where {H,T} =
filt!(Array{promote_type(H, T)}(undef, size(x)), h, x)

#
# fftfilt and filt
@@ -447,48 +446,44 @@ end
# Like fftfilt! but does not check if out and x are the same size
function _fftfilt!(
out::AbstractArray{<:Real},
b::AbstractVector{<:Real},
b::AbstractVector{H},
x::AbstractArray{T},
nfft::Integer
) where T<:Real
) where {T<:Real,H<:Real}
nb = length(b)
nx = size(x, 1)
normfactor = 1/nfft
normfactor = nfft
W = promote_type(H, T)

L = min(nx, nfft - (nb - 1))
tmp1 = Vector{T}(undef, nfft)
tmp2 = Vector{Complex{T}}(undef, nfft >> 1 + 1)
tmp1 = Vector{W}(undef, nfft)
tmp2 = Vector{Complex{W}}(undef, nfft >> 1 + 1)

p1 = plan_rfft(tmp1)
p2 = plan_brfft(tmp2, nfft)

# FFT of filter
filterft = similar(tmp2)
tmp1[1:nb] .= b .* normfactor
tmp1[nb+1:end] .= zero(T)
tmp1[1:nb] .= b ./ normfactor
tmp1[nb+1:end] .= zero(W)
mul!(filterft, p1, tmp1)

# FFT of chunks
for colstart = 0:nx:length(x)-1
off = 1
while off <= nx
npadbefore = max(0, nb - off)
xstart = off - nb + npadbefore + 1
n = min(nfft - npadbefore, nx - xstart + 1)
for colstart = 0:nx:length(x)-1, off = 1:L:nx
npadbefore = max(0, nb - off)
xstart = off - nb + npadbefore + 1
n = min(nfft - npadbefore, nx - xstart + 1)

tmp1[1:npadbefore] .= zero(T)
tmp1[npadbefore+n+1:end] .= zero(T)
tmp1[1:npadbefore] .= zero(W)
tmp1[npadbefore+n+1:end] .= zero(W)

copyto!(tmp1, npadbefore+1, x, colstart+xstart, n)
mul!(tmp2, p1, tmp1)
broadcast!(*, tmp2, tmp2, filterft)
mul!(tmp1, p2, tmp2)
copyto!(tmp1, npadbefore+1, x, colstart+xstart, n)
mul!(tmp2, p1, tmp1)
broadcast!(*, tmp2, tmp2, filterft)
mul!(tmp1, p2, tmp2)

# Copy to output
copyto!(out, colstart+off, tmp1, nb, min(L, nx - off + 1))

off += L
end
# Copy to output
copyto!(out, colstart+off, tmp1, nb, min(L, nx - off + 1))
end

out
@@ -524,6 +519,6 @@ function filt_choose_alg!(
end
end

function filt_choose_alg!(out::AbstractArray, b::AbstractArray, x::AbstractArray)
function filt_choose_alg!(out::AbstractArray, b::AbstractVector, x::AbstractArray)
tdfilt!(out, b, x)
end
114 changes: 59 additions & 55 deletions src/Filters/filt_order.jl
Original file line number Diff line number Diff line change
@@ -56,32 +56,34 @@ Source Software, 2018. https://github.com/JuliaNLSolvers/Optim.jl/blob/master/sr
====================================================================#

sort_W((a, b)::Tuple{Real,Real}) = (a > b) ? (b, a) : (a, b)


toprototype(Wp::Real, Ws::Real, ftype::Type{Lowpass}) = Ws / Wp
toprototype(Wp::Real, Ws::Real, ftype::Type{Highpass}) = Wp / Ws
function toprototype(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, ftype::Type{Bandpass})
toprototype(Wp::Real, Ws::Real, ::Type{Lowpass}) = Ws / Wp
toprototype(Wp::Real, Ws::Real, ::Type{Highpass}) = Wp / Ws
function toprototype(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, ::Type{Bandpass})
# bandpass filter must have two corner frequencies we're computing with
Wa = (Ws .^ 2 .- Wp[1] * Wp[2]) ./ (Ws .* (Wp[1] - Wp[2]))
minimum(abs.(Wa))
Wa = muladd.(-Wp[1], Wp[2], Ws .^ 2) ./ (Ws .* (Wp[1] - Wp[2]))
min(abs.(Wa)...)
end
toprototype(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real, ftype::Type{Bandstop}) = butterworth_bsfmin(Wp, Ws, Rp, Rs)
fromprototype(Wp::Real, Wscale::Real, ftype::Type{Lowpass}) = Wp * Wscale
fromprototype(Wp::Real, Wscale::Real, ftype::Type{Highpass}) = Wp / Wscale
toprototype(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real, ::Type{Bandstop}) = butterworth_bsfmin(Wp, Ws, Rp, Rs)
fromprototype(Wp::Real, Wscale::Real, ::Type{Lowpass}) = Wp * Wscale
fromprototype(Wp::Real, Wscale::Real, ::Type{Highpass}) = Wp / Wscale

function fromprototype(Wp::Tuple{Real,Real}, Wscale::Real, ftype::Type{Bandstop})
Wa = zeros(2)
function fromprototype(Wp::Tuple{Real,Real}, Wscale::Real, ::Type{Bandstop})
diff = Wp[2] - Wp[1]
prod = Wp[2] * Wp[1]
Wa[1] = (diff + sqrt(diff^2 + 4 * (Wscale^2) * prod)) / (2 * Wscale)
Wa[2] = (diff - sqrt(diff^2 + 4 * (Wscale^2) * prod)) / (2 * Wscale)
sort!(abs.(Wa))
k = sqrt(muladd(4 * (Wscale^2), prod, diff^2))
den = 2 * Wscale
Wa = (diff + k) / den, (diff - k) / den
sort_W(abs.(Wa))
end

function fromprototype(Wp::Tuple{Real,Real}, Wscale::Real, ftype::Type{Bandpass})
Wsc = [-Wscale, Wscale]
Wa = -Wsc .* (Wp[2] - Wp[1]) ./ 2 .+ sqrt.(Wsc .^ 2 / 4 * (Wp[2] - Wp[1]) .^ 2 .+ Wp[1] * Wp[2])
sort!(abs.(Wa))
function fromprototype(Wp::Tuple{Real,Real}, Wscale::Real, ::Type{Bandpass})
Wsc = (-Wscale, Wscale)
diff = Wp[2] - Wp[1]
prod = Wp[2] * Wp[1]
Wa = muladd.(.-Wsc, diff / 2, sqrt(muladd(Wscale^2 / 4, diff^2, prod)))
sort_W(abs.(Wa))
end

butterworth_order_estimate(Rp::Real, Rs::Real, warp::Real) = (log(db2pow(Rs) - 1) - log(db2pow(Rp) - 1)) / (2 * log(warp))
@@ -94,8 +96,8 @@ function elliptic_order_estimate(Rp::Real, Rs::Real, Wa::Real)
k = Wa^-1
(k^2 < 1) || throw(DomainError(k^2, "Selectivity parameter specifies too narrow of a transition width.")) # check if in-bounds (k₁ << k <~ 1)
(1 - k₁^2 < 1) || throw(DomainError(1 - k₁^2, "Discrimination parameter specifies too deep of a stopband."))
K = tuple(ellipk(k^2), ellipk(1 - k^2)) # define the complementary moduli
K₁ = tuple(ellipk(k₁^2), ellipk(1 - k₁^2))
K = (ellipk(k^2), ellipk(1 - k^2)) # define the complementary moduli
K₁ = (ellipk(k₁^2), ellipk(1 - k₁^2))

# other order approach would be using (k₁'/k₁) / (k′/k).
(K[1] * K₁[2]) / (K[2] * K₁[1])
@@ -116,7 +118,7 @@ function brent(f, x1::T, x2::T) where {T<:AbstractFloat}
k1 = zero(T)

# first interval calculation.
m = a + g * (b - a)
m = muladd(g, (b - a), a)
m1, m2 = m, m
fm = f(m)
fm1, fm2 = fm, fm
@@ -133,7 +135,7 @@ function brent(f, x1::T, x2::T) where {T<:AbstractFloat}
if (abs(k1) > xt) # trial parabolic fit
r = (m - m1) * (fm - fm2)
q = (m - m2) * (fm - fm1)
p = (m - m2) * q - (m - m1) * r
p = muladd((m - m2), q, -(m - m1) * r)
q = 2 * (q - r)
if (q > 0)
p = -p
@@ -196,29 +198,29 @@ end
#
for filt in (:butterworth, :elliptic, :chebyshev)
@eval begin
function $(Symbol(string(filt, "_bsfcost")))(Wx::Real, uselowband::Bool, Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real)
function $(Symbol(filt, :_bsfcost))(Wx::Real, uselowband::Bool, Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real)
# override one of the passband edges with the test frequency.
Wpc = uselowband ? tuple(Wx, Wp[2]) : tuple(Wp[1], Wx)
Wpc = uselowband ? (Wx, Wp[2]) : (Wp[1], Wx)

# get the new warp frequency.
warp = minimum(abs.((Ws .* (Wpc[1] - Wpc[2])) ./ (Ws .^ 2 .- (Wpc[1] * Wpc[2]))))
warp = min(abs.((Ws .* (Wpc[1] - Wpc[2])) ./ muladd.(-Wpc[1], Wpc[2], Ws .^ 2))...)

# use the new frequency to determine the filter order.
$(Symbol(string(filt, "_order_estimate")))(Rp, Rs, warp)
$(Symbol(filt, :_order_estimate))(Rp, Rs, warp)
end

function $(Symbol(string(filt, "_bsfmin")))(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real)
function $(Symbol(filt, :_bsfmin))(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real)
# NOTE: the optimization function will adjust the corner frequencies in Wp, returning a new passband tuple.
Δ = eps(typeof(Wp[1]))^(2 / 3)
C₁(w) = $(Symbol(string(filt, "_bsfcost")))(w, true, Wp, Ws, Rp, Rs)
C₁(w) = $(Symbol(filt, :_bsfcost))(w, true, Wp, Ws, Rp, Rs)
p1 = brent(C₁, Wp[1], Ws[1] - Δ)

C₂(w) = $(Symbol(string(filt, "_bsfcost")))(w, false, tuple(p1, Wp[2]), Ws, Rp, Rs)
C₂(w) = $(Symbol(filt, :_bsfcost))(w, false, (p1, Wp[2]), Ws, Rp, Rs)
p2 = brent(C₂, Ws[2] + Δ, Wp[2])
Wadj = tuple(p1, p2)
Wadj = (p1, p2)

Wa = (Ws .* (Wadj[1] - Wadj[2])) ./ (Ws .^ 2 .- (Wadj[1] * Wadj[2]))
minimum(abs.(Wa)), Wadj
Wa = (Ws .* (p1 - p2)) ./ muladd.(-p1, p2, Ws .^ 2)
min(abs.(Wa)...), Wadj
end
end
end
@@ -237,8 +239,8 @@ domain is `:z`, interpretting the input frequencies as normalized from 0 to 1, w
function buttord(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real; domain::Symbol=:z)

# make sure the band edges are in increasing order.
Wps = (Wp[1] > Wp[2]) ? tuple(Wp[2], Wp[1]) : tuple(Wp[1], Wp[2])
Wss = (Ws[1] > Ws[2]) ? tuple(Ws[2], Ws[1]) : tuple(Ws[1], Ws[2])
Wps = sort_W(Wp)
Wss = sort_W(Ws)

# infer filter type based on ordering of edges.
(Wps[1] < Wss[1]) != (Wps[2] > Wss[2]) && throw(ArgumentError("Pass and stopband edges must be ordered for Bandpass/Bandstop filters."))
@@ -350,7 +352,7 @@ for (fcn, est, filt) in ((:ellipord, :elliptic, "Elliptic (Cauer)"),
end
# Lowpass/Highpass prototype xform is same as Butterworth.
wa = toprototype(Ωp, Ωs, ftype)
N = ceil(Int, $(Symbol(string(est, "_order_estimate")))(Rp, Rs, wa))
N = ceil(Int, $(Symbol(est, :_order_estimate))(Rp, Rs, wa))
if (domain == :z)
ωn = (2 / π) * atan(Ωp)
else
@@ -372,19 +374,19 @@ for (fcn, est, filt) in ((:ellipord, :elliptic, "Elliptic (Cauer)"),
frequencies as normalized from 0 to 1, where 1 corresponds to π radians/sample.
"""
function $fcn(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real; domain::Symbol=:z)
Wps = (Wp[1] > Wp[2]) ? tuple(Wp[2], Wp[1]) : tuple(Wp[1], Wp[2])
Wss = (Ws[1] > Ws[2]) ? tuple(Ws[2], Ws[1]) : tuple(Ws[1], Ws[2])
Wps = sort_W(Wp)
Wss = sort_W(Ws)
(Wps[1] < Wss[1]) != (Wps[2] > Wss[2]) && throw(ArgumentError("Pass and stopband edges must be ordered for Bandpass/Bandstop filters."))
ftype = (Wps[1] < Wss[1]) ? Bandstop : Bandpass
# pre-warp to analog if z-domain.
(Ωp, Ωs) = (domain == :z) ? (tan.(π / 2 .* Wps), tan.(π / 2 .* Wss)) : (Wps, Wss)
if (ftype == Bandpass)
Wa = (Ωs .^ 2 .- (Ωp[1] * Ωp[2])) ./ (Ωs .* (Ωp[1] - Ωp[2]))
Wa = muladd.(-Ωp[1], Ωp[2], Ωs .^ 2) ./ (Ωs .* (Ωp[1] - Ωp[2]))
Ωpadj = Ωp
else
(Wa, Ωpadj) = $(Symbol(string(est, "_bsfmin")))(Ωp, Ωs, Rp, Rs) # check scipy.
(Wa, Ωpadj) = $(Symbol(est, :_bsfmin))(Ωp, Ωs, Rp, Rs) # check scipy.
end
N = ceil(Int, $(Symbol(string(est, "_order_estimate")))(Rp, Rs, minimum(abs.(Wa))))
N = ceil(Int, $(Symbol(est, :_order_estimate))(Rp, Rs, min(abs.(Wa)...)))
ωn = (domain == :z) ? Wps : Ωpadj
N, ωn
end
@@ -431,31 +433,33 @@ order and natural frequencies for an analog filter. The default domain is `:z`,
frequencies as normalized from 0 to 1, where 1 corresponds to π radians/sample.
"""
function cheb2ord(Wp::Tuple{Real,Real}, Ws::Tuple{Real,Real}, Rp::Real, Rs::Real; domain::Symbol=:z)
Wps = (Wp[1] > Wp[2]) ? tuple(Wp[2], Wp[1]) : tuple(Wp[1], Wp[2])
Wss = (Ws[1] > Ws[2]) ? tuple(Ws[2], Ws[1]) : tuple(Ws[1], Ws[2])
Wps = sort_W(Wp)
Wss = sort_W(Ws)
(Wps[1] < Wss[1]) != (Wps[2] > Wss[2]) && throw(ArgumentError("Pass and stopband edges must be ordered for Bandpass/Bandstop filters."))
ftype = (Wps[1] < Wss[1]) ? Bandstop : Bandpass
(Ωp, Ωs) = (domain == :z) ? (tan.(π / 2 .* Wps), tan.(π / 2 .* Wss)) : (Wps, Wss)
if (ftype == Bandpass)
Wa = (Ωs .^ 2 .- (Ωp[1] * Ωp[2])) ./ (Ωs .* (Ωp[1] - Ωp[2]))
prod = Ωp[1] * Ωp[2]
diff = Ωp[1] - Ωp[2]
Wa = muladd.(Ωs, Ωs, -prod) ./ (Ωs .* diff)
else
(Wa, Ωpadj) = chebyshev_bsfmin(Ωp, Ωs, Rp, Rs)
prod = Ωpadj[1] * Ωpadj[2]
diff = Ωpadj[1] - Ωpadj[2]
end
N = ceil(Int, chebyshev_order_estimate(Rp, Rs, minimum(abs.(Wa))))
N = ceil(Int, chebyshev_order_estimate(Rp, Rs, min(abs.(Wa)...)))

# new frequency for stopband spec.
wnew = 1 / cosh(1 / N * acosh((db2pow(Rs) - 1) / (db2pow(Rp) - 1)))

# lowpass prototype to analog filter re-map.
Wna = zeros(2)
if (ftype == Bandpass)
Wna[1] = (Ωp[1] - Ωp[2]) / (2 * wnew) + ((Ωp[2] - Ωp[1])^2 / (4 * wnew^2) + Ωp[1] * Ωp[2])
Wna[2] = (Ωp[1] * Ωp[2]) / Wna[1]
Wna1 = diff / (2 * wnew) + (diff^2 / (4 * wnew^2) + prod)
else
Wna[1] = ((Ωpadj[1] - Ωpadj[2]) * wnew) / 2 + ((Ωpadj[1] - Ωpadj[2])^2 * wnew^2 / 4 + (Ωpadj[1] * Ωpadj[2]))
Wna[2] = (Ωpadj[1] * Ωpadj[2]) / Wna[1]
Wna1 = (diff * wnew) / 2 + (muladd(diff^2, wnew^2 / 4, prod))
end
ωn = (domain == :z) ? tuple((2 / π) * atan(Wna[1]), (2 / π) * atan(Wna[2])) : tuple(Wna[1], Wna[2])
Wna2 = prod / Wna1
ωn = (domain == :z) ? ((2 / π) * atan(Wna1), (2 / π) * atan(Wna2)) : (Wna1, Wna2)
N, ωn
end

@@ -480,9 +484,9 @@ function remezord(Wp::Real, Ws::Real, Rp::Real, Rs::Real)
(0 > Wp > 0.5) || (0 > Ws > 0.5) && throw(ArgumentError("Pass and stopband edges must be greater than DC and less than Nyquist."))
L1, L2 = log10(Rp), log10(Rs)
df = abs(Ws - Wp) # works in HPF case if passband/stopband edges are flipped
A = (5.309e-3 * L1^2) + (7.114e-2 * L1) - 0.4761
B = (2.66e-3 * L1^2) + (0.5941 * L1) + 0.4278
Kf = 0.51244 * (L1 - L2) + 11.01217
D = A * L2 - B
return ceil(Int, ((D - Kf * df^2) / df))
A = muladd(5.309e-3, L1^2, muladd(7.114e-2, L1, -0.4761))
B = muladd(2.66e-3, L1^2, muladd(0.5941, L1, 0.4278))
Kf = muladd(0.51244, (L1 - L2), 11.01217)
D = muladd(A, L2, -B)
return ceil(Int, (muladd(-Kf, df^2, D) / df))
end
42 changes: 23 additions & 19 deletions src/Filters/remez_fir.jl
Original file line number Diff line number Diff line change
@@ -88,7 +88,7 @@ C CODE BANNER
# RemezFilterType:
# Type I and II symmetric linear phase: neg==0 (filter_type==bandpass)
# Type III and IV negative symmetric linear phase: neg==1 (filter_type==hilbert or differentiator)
@enum RemezFilterType filter_type_bandpass=1 filter_type_differentiator=2 filter_type_hilbert=3
@enum RemezFilterType filter_type_bandpass filter_type_differentiator filter_type_hilbert



@@ -212,10 +212,10 @@ function freq_eval(xf, x::AbstractVector, y::AbstractVector, ad::AbstractVector)
d = 0.0
p = 0.0

for j = 1:length(ad)
for j in eachindex(ad)
c = ad[j] / (xf - x[j])
d += c
p += c * y[j]
p = muladd(c, y[j], p)
end

p/d
@@ -439,8 +439,8 @@ function remez(numtaps::Integer, band_defs;

#
# Start next iteration
#
@label L100
#
# @label L100
iext[nzz] = ngrid + 1
niter += 1
if niter > maxiter
@@ -449,7 +449,9 @@ function remez(numtaps::Integer, band_defs;
break
end

x[1:nz] = grid[iext[1:nz]]
for j = 1:nz
x[j] = grid[iext[j]]
end

for j = 1 : nz
ad[j] = lagrange_interp(j, nz, jet, x)
@@ -460,8 +462,8 @@ function remez(numtaps::Integer, band_defs;
k = 1
for j = 1 : nz
l = iext[j]
dnum += ad[j] * des[l]
dden += k * ad[j] / wt[l]
dnum = muladd(ad[j], des[l], dnum)
dden = muladd(k, ad[j] / wt[l], dden)
k = -k
end
dev = dnum / dden
@@ -495,6 +497,8 @@ function remez(numtaps::Integer, band_defs;
nut = -nu
j = 1

local comp

@label L200
j == nzz && (ynz = comp) # equivalent to "if (j == nzz) ynz = comp; end"
j >= nzz && @goto L300
@@ -604,13 +608,13 @@ function remez(numtaps::Integer, band_defs;
iext[nzz-j] = iext[nz-j]
end
iext[1] = k1
@goto L100
continue # @goto L100
@label L350
for j = 1:nz
iext[j] = iext[j+1]
end

@goto L100
continue # @goto L100
@label L370


@@ -669,9 +673,9 @@ function remez(numtaps::Integer, band_defs;
for j = 1 : nfcns
dtemp = 0.0
for k = 1 : nm1
dtemp += a[k+1] * cospi(2 * (j-1) * delf * k)
dtemp = muladd(a[k+1], cospi(2 * (j-1) * delf * k), dtemp)
end
alpha[j] = 2.0 * dtemp + a[1]
alpha[j] = 2dtemp + a[1]
end

for j = 2 : nfcns
@@ -680,7 +684,7 @@ function remez(numtaps::Integer, band_defs;
alpha[1] *= delf

if !full_grid
p[1] = 2.0*alpha[nfcns]*bb+alpha[nm1]
p[1] = muladd(2alpha[nfcns], bb, alpha[nm1])
p[2] = 2.0*aa*alpha[nfcns]
q[1] = alpha[nfcns-2]-alpha[nfcns]
for j = 2 : nm1
@@ -693,12 +697,12 @@ function remez(numtaps::Integer, band_defs;
a[k] = p[k]
p[k] = 2.0 * bb * a[k]
end
p[2] += a[1] * 2.0 *aa
p[2] = muladd(a[1], 2aa, p[2])
for k = 1 : j-1
p[k] += q[k] + aa * a[k+1]
p[k] += muladd(aa, a[k+1], q[k])
end
for k = 3 : j+1
p[k] += aa * a[k-1]
p[k] = muladd(aa, a[k-1], p[k])
end

if j != nm1
@@ -732,7 +736,7 @@ function remez(numtaps::Integer, band_defs;
for j = 2 : nm1
h[j] = 0.25 * (alpha[nz-j] + alpha[nfcns+2-j])
end
h[nfcns] = 0.5*alpha[1] + 0.25*alpha[2]
h[nfcns] = muladd(0.5, alpha[1], 0.25 * alpha[2])
end
else
if nodd
@@ -741,14 +745,14 @@ function remez(numtaps::Integer, band_defs;
for j = 1 : nm1
h[j] = 0.25 * (alpha[nz-j] - alpha[nfcns+3-j])
end
h[nfcns] = 0.5 * alpha[1] - 0.25 * alpha[3]
h[nfcns] = muladd(0.5, alpha[1], -0.25 * alpha[3])
h[nz] = 0.0
else
h[1] = 0.25 * alpha[nfcns]
for j = 2 : nm1
h[j] = 0.25 * (alpha[nz-j] - alpha[nfcns+2-j])
end
h[nfcns] = 0.5 * alpha[1] - 0.25 * alpha[2]
h[nfcns] = muladd(0.5, alpha[1], -0.25 * alpha[2])
end
end

16 changes: 10 additions & 6 deletions src/Filters/response.jl
Original file line number Diff line number Diff line change
@@ -39,13 +39,17 @@ _freq(filter::FilterCoefficients) = x::Number -> _freq(filter, x)

_freq(filter::FilterCoefficients, x::Number) =
_freq(convert(PolynomialRatio, filter), x)
_freq(filter::PolynomialRatio, x::Number) = filter.b(x) ./ filter.a(x)
_freq(filter::ZeroPoleGain, x::Number) =
filter.k * prod([x - z for z in filter.z]) / prod([x - p for p in filter.p])
_freq(filter::PolynomialRatio, x::Number) = filter.b(x) / filter.a(x)

_prod_freq(f, v::Vector{<:Union{Z,Biquad{D,Z}}}, ::Type{T}) where {D,T<:Number,Z<:Number} =
mapreduce(f, Base.mul_prod, v; init=one(promote_type(T, Z)))

_freq(filter::ZeroPoleGain, x::T) where {T<:Number} =
filter.k * _prod_freq(z -> x - z, filter.z, T) / _prod_freq(p -> x - p, filter.p, T)
_freq(filter::Biquad, x::Number) =
((filter.b0*x + filter.b1)*x + filter.b2) / ((x + filter.a1)*x + filter.a2)
_freq(filter::SecondOrderSections, x::Number) =
filter.g * prod([_freq(b, x) for b in filter.biquads])
muladd(muladd(filter.b0, x, filter.b1), x, filter.b2) / muladd((x + filter.a1), x, filter.a2)
_freq(filter::SecondOrderSections, x::T) where {T<:Number} =
filter.g * _prod_freq(b -> _freq(b, x), filter.biquads, T)


"""
57 changes: 9 additions & 48 deletions src/Filters/stream_filt.jl
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ mutable struct FIRStandard{T} <: FIRKernel{T}
end

function FIRStandard(h::Vector)
h = reverse(h, dims=1)
h = reverse(h)
hLen = length(h)
FIRStandard(h, hLen)
end
@@ -48,7 +48,7 @@ mutable struct FIRDecimator{T} <: FIRKernel{T}
end

function FIRDecimator(h::Vector, decimation::Integer)
h = reverse(h, dims=1)
h = reverse(h)
hLen = length(h)
inputDeficit = 1
FIRDecimator(h, hLen, decimation, inputDeficit)
@@ -268,7 +268,7 @@ end

# For FIRFilter, set history vector to zeros of same type and required length
function reset!(self::FIRFilter)
self.history = zeros(eltype(self.history), self.historyLen)
fill!(self.history, 0)
reset!(self.kernel)
self
end
@@ -320,7 +320,7 @@ function outputlength(inputlength::Integer, ratio::Union{Integer,Rational}, init
ceil(Int, outLen)
end

function outputlength(kernel::FIRStandard, inputlength::Integer)
function outputlength(::FIRStandard, inputlength::Integer)
inputlength
end

@@ -357,7 +357,7 @@ function inputlength(outputlength::Int, ratio::Union{Integer,Rational}, initial
floor(Int, inLen)
end

function inputlength(kernel::FIRStandard, outputlength::Integer)
function inputlength(::FIRStandard, outputlength::Integer)
outputlength
end

@@ -431,16 +431,6 @@ function filt!(buffer::AbstractVector{Tb}, self::FIRFilter{FIRStandard{Th}}, x::
return xLen
end

function filt(self::FIRFilter{FIRStandard{Th}}, x::AbstractVector{Tx}) where {Th,Tx}
bufLen = outputlength(self, length(x))
buffer = Vector{promote_type(Th,Tx)}(undef, bufLen)
samplesWritten = filt!(buffer, self, x)

samplesWritten == bufLen || resize!(buffer, samplesWritten)

return buffer
end


#
# Interpolation
@@ -482,16 +472,6 @@ function filt!(buffer::AbstractVector{Tb}, self::FIRFilter{FIRInterpolator{Th}},
return bufIdx
end

function filt(self::FIRFilter{FIRInterpolator{Th}}, x::AbstractVector{Tx}) where {Th,Tx}
bufLen = outputlength(self, length(x))
buffer = Vector{promote_type(Th,Tx)}(undef, bufLen)
samplesWritten = filt!(buffer, self, x)

samplesWritten == bufLen || resize!(buffer, samplesWritten)

return buffer
end


#
# Rational resampling
@@ -538,15 +518,6 @@ function filt!(buffer::AbstractVector{Tb}, self::FIRFilter{FIRRational{Th}}, x::
return bufIdx
end

function filt(self::FIRFilter{FIRRational{Th}}, x::AbstractVector{Tx}) where {Th,Tx}
bufLen = outputlength(self, length(x))
buffer = Vector{promote_type(Th,Tx)}(undef, bufLen)
samplesWritten = filt!(buffer, self, x)

samplesWritten == bufLen || resize!(buffer, samplesWritten)
return buffer
end


#
# Decimation
@@ -590,24 +561,14 @@ function filt!(buffer::AbstractVector{Tb}, self::FIRFilter{FIRDecimator{Th}}, x:
return bufIdx
end

function filt(self::FIRFilter{FIRDecimator{Th}}, x::AbstractVector{Tx}) where {Th,Tx}
bufLen = outputlength(self, length(x))
buffer = Vector{promote_type(Th,Tx)}(undef, bufLen)
samplesWritten = filt!(buffer, self, x)

samplesWritten == bufLen || resize!(buffer, samplesWritten)

return buffer
end


#
# Arbitrary resampling
#
# Updates FIRArbitrary state. See Section 7.5.1 in [1].
# [1] uses a phase accumilator that increments by Δ (Nϕ/rate)

function update(kernel::FIRArbitrary)
function update!(kernel::FIRArbitrary)
kernel.ϕAccumulator += kernel.Δ

if kernel.ϕAccumulator > kernel.
@@ -657,8 +618,8 @@ function filt!(

# Used to have @inbounds. Restore @inbounds if buffer length
# can be verified prior to access.
buffer[bufIdx] = yLower + yUpper * kernel.α
update(kernel)
buffer[bufIdx] = muladd(yUpper, kernel.α, yLower)
update!(kernel)
end

kernel.inputDeficit = kernel.xIdx - xLen
@@ -667,7 +628,7 @@ function filt!(
return bufIdx
end

function filt(self::FIRFilter{FIRArbitrary{Th}}, x::AbstractVector{Tx}) where {Th,Tx}
function filt(self::FIRFilter{Tk}, x::AbstractVector{Tx}) where {Th,Tx,Tk<:FIRKernel{Th}}
bufLen = outputlength(self, length(x))
buffer = Vector{promote_type(Th,Tx)}(undef, bufLen)
samplesWritten = filt!(buffer, self, x)
133 changes: 66 additions & 67 deletions src/dspbase.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# This file was formerly a part of Julia. License is MIT: https://julialang.org/license

import Base: trailingsize

const SMALL_FILT_CUTOFF = 58

_zerosi(b,a,T) = zeros(promote_type(eltype(b), eltype(a), T), max(length(a), length(b))-1)
@@ -39,38 +37,40 @@ function filt!(out::AbstractArray, b::Union{AbstractVector, Number}, a::Union{Ab
bs = length(b)
sz = max(as, bs)
silen = sz - 1
ncols = trailingsize(x,2)
ncols = size(x, 2)

if size(si, 1) != silen
throw(ArgumentError("initial state vector si must have max(length(a),length(b))-1 rows"))
end
if N > 1 && trailingsize(si,2) != ncols
throw(ArgumentError("initial state vector si must be a vector or have the same number of columns as x"))
elseif N > 1 && size(si, 2) != ncols
throw(ArgumentError("initial state si must be a vector or have the same number of columns as x"))
end

size(x,1) == 0 && return out
sz == 1 && return mul!(out, x, b[1]/a[1]) # Simple scaling without memory
iszero(size(x, 1)) && return out
isone(sz) && return (k = b[1] / a[1]; Compat.@noinline mul!(out, x, k)) # Simple scaling without memory

# Filter coefficient normalization
if a[1] != 1
if !isone(a[1])
norml = a[1]
a = a ./ norml
b = b ./ norml
a = Compat.@noinline broadcast(/, a, norml)
b = Compat.@noinline broadcast(/, b, norml)
end
# Pad the coefficients with zeros if needed
bs<sz && (b = copyto!(zeros(eltype(b), sz), b))
1<as<sz && (a = copyto!(zeros(eltype(a), sz), a))

initial_si = si
for col = 1:ncols
# Reset the filter state
si = initial_si[:, N > 1 ? col : 1]
if as > 1
_filt_iir!(out, b, a, x, si, col)
elseif bs <= SMALL_FILT_CUTOFF
_small_filt_fir!(out, b, x, si, col)
else
_filt_fir!(out, b, x, si, col)
if as == 1 && bs <= SMALL_FILT_CUTOFF
_small_filt_fir!(out, b, x, si, Val(bs))
else
initial_si = si
si = similar(si, axes(si, 1))
for col = 1:ncols
# Reset the filter state
copyto!(si, view(initial_si, :, N > 1 ? col : 1))
if as > 1
_filt_iir!(out, b, a, x, si, col)
else
_filt_fir!(out, b, x, si, col)
end
end
end
return out
@@ -79,75 +79,72 @@ end
# Transposed direct form II
function _filt_iir!(out, b, a, x, si, col)
silen = length(si)
@inbounds for i=1:size(x, 1)
xi = x[i,col]
@inbounds for i in axes(x, 1)
xi = x[i, col]
val = muladd(xi, b[1], si[1])
out[i, col] = val
for j=1:(silen-1)
si[j] = muladd(val, -a[j+1], muladd(xi, b[j+1], si[j+1]))
end
si[silen] = muladd(xi, b[silen+1], -a[silen+1]*val)
out[i,col] = val
end
end

# Transposed direct form II
function _filt_fir!(out, b, x, si, col)
silen = length(si)
@inbounds for i=1:size(x, 1)
xi = x[i,col]
val = muladd(xi, b[1], si[1])
@inbounds for i in axes(x, 1)
xi = x[i, col]
out[i, col] = muladd(xi, b[1], si[1])
for j=1:(silen-1)
si[j] = muladd(xi, b[j+1], si[j+1])
end
si[silen] = b[silen+1]*xi
out[i,col] = val
si[silen] = b[silen+1] * xi
end
end

#
# filt implementation for FIR filters (faster than Base)
#

for n = 2:SMALL_FILT_CUTOFF
silen = n-1
si = [Symbol("si$i") for i = 1:silen]
# Transposed direct form II
@eval function _filt_fir!(out, b::NTuple{$n,T}, x, siarr, col) where {T}
offset = (col - 1) * size(x, 1)

$(Expr(:block, [:(@inbounds $(si[i]) = siarr[$i]) for i = 1:silen]...))
@inbounds for i=1:size(x, 1)
xi = x[i+offset]
val = muladd(xi, b[1], $(si[1]))
$(Expr(:block, [:($(si[j]) = muladd(xi, b[$(j+1)], $(si[j+1]))) for j = 1:(silen-1)]...))
$(si[silen]) = b[$(silen+1)]*xi
out[i+offset] = val
# Transposed direct form II
@generated function _filt_fir!(out, b::NTuple{N,T}, x, siarr, col) where {N,T}
silen = N - 1
si_end = Symbol(:si_, silen)
SMALL_FILT_VECT_CUTOFF = 18
si_check = N > SMALL_FILT_VECT_CUTOFF ? :(nothing) : :(@assert length(siarr) == $silen)

q = quote
$si_check
Base.@nextract $silen si siarr
for i in axes(x, 1)
xi = x[i, col]
val = muladd(xi, b[1], si_1)
Base.@nexprs $(silen-1) j -> (si_j = muladd(xi, b[j+1], si_{j+1}))
$si_end = b[N] * xi
out[i, col] = val
end
end
end

# Convert array filter tap input to tuple for small-filtering
let chain = :(throw(ArgumentError("invalid tuple size")))
for n = SMALL_FILT_CUTOFF:-1:2
chain = quote
if length(h) == $n
_filt_fir!(
out,
($([:(@inbounds(h[$i])) for i = 1:n]...),),
x,
si,
col
)
else
$chain
end
if N > SMALL_FILT_VECT_CUTOFF
loop_args = q.args[6].args[2].args
for i in (2, 10)
loop_args[i] = :(@inbounds $(loop_args[i]))
end
end
q
end

@eval function _small_filt_fir!(
out::AbstractArray, h::AbstractVector{T}, x::AbstractArray, si, col
) where T
$chain
# Convert array filter tap input to tuple for small-filtering
function _small_filt_fir!(
out::AbstractArray, h::AbstractVector, x::AbstractArray,
si::AbstractArray{S,N}, ::Val{bs}) where {S,N,bs}

bs < 2 && throw(ArgumentError("invalid tuple size"))
b = ntuple(j -> @inbounds(h[j]), Val(bs))
for col in axes(x, 2)
v_si = view(si, :, N > 1 ? col : 1)
_filt_fir!(out, b, x, v_si, col)
end
end

@@ -310,7 +307,7 @@ end
buff = similar(u, nffts)

p = plan_fft!(buff)
ip = plan_bfft!(buff)
ip = inv(p).p

buff, buff, p, ip # Only one buffer for complex
end
@@ -525,6 +522,7 @@ function unsafe_conv_kern_os!(out,
lastfull > 1 ? [1:firstfull - 1, lastfull + 1 : nblock] : [1:nblock]
end
all_dims = 1:N
val_dims = ntuple(Val, Val(N))
# Buffer to store ranges of indices for a single region of the perimeter
perimeter_range = Vector{UnitRange{Int}}(undef, N)

@@ -540,15 +538,15 @@ function unsafe_conv_kern_os!(out,
# 2 | Edges of Cube
# 3 | Corners of Cube
#
for n_edges in all_dims
for n_edges in val_dims
unsafe_conv_kern_os_edge!(
# These arrays and buffers will be mutated
out,
tdbuff,
fdbuff,
perimeter_range,
# Number of edge dimensions to pad and convolve
Val(n_edges),
n_edges,
# Data to be convolved
u,
filter_fd,
@@ -628,10 +626,11 @@ function _conv_kern_fft!(out, u, v, su, sv, outsize, nffts)
upad = _zeropad(u, nffts)
vpad = _zeropad(v, nffts)
p! = plan_fft!(upad)
ip! = inv(p!)
p! * upad # Operates in place on upad
p! * vpad
upad .*= vpad
ifft!(upad)
ip! * upad
copyto!(out,
CartesianIndices(out),
upad,
4 changes: 2 additions & 2 deletions test/FilterTestHelpers.jl
Original file line number Diff line number Diff line change
@@ -63,7 +63,7 @@ function zpkfilter_accuracy(f1, f2, accurate_f; relerr=1, compare_gain_at=nothin
z2, p2 = sort(f2.z, lt=lt), sort(f2.p, lt=lt)
accurate_z, accurate_p = sort(accurate_f.z, lt=lt), sort(accurate_f.p, lt=lt)
if !isempty(z1) || !isempty(z2) || !isempty(accurate_z)
if eps != nothing
if eps !== nothing
@test (z1, accurate_z, atol=eps)
@test (z2, accurate_z, atol=eps)
else
@@ -72,7 +72,7 @@ function zpkfilter_accuracy(f1, f2, accurate_f; relerr=1, compare_gain_at=nothin
end
accuracy_check(loss(z1, accurate_z), loss(z2, accurate_z), "z", relerr)
end
if eps != nothing
if eps !== nothing
@test (p1, accurate_p, atol=eps)
@test (p2, accurate_p, atol=eps)
@test (f1.k, accurate_f.k, atol=eps)
24 changes: 11 additions & 13 deletions test/dsp.jl
Original file line number Diff line number Diff line change
@@ -179,16 +179,16 @@ end
end

@testset "Overlap-Save" begin
to_sizetuple = (diml, N) -> ntuple(_ -> diml, N)
function os_test_data(eltype, diml, N)
to_sizetuple(diml, N) = ntuple(_ -> diml, N)
function os_test_data(::Type{T}, diml, N) where T
s = to_sizetuple(diml, N)
arr = rand(eltype, s)
arr = rand(T, s)
s, arr
end
function test_os(eltype, nu, nv, N, nfft)
function test_os(::Type{T}, nu, nv, N, nfft) where T
nffts = to_sizetuple(nfft, N)
su, u = os_test_data(eltype, nu, N)
sv, v = os_test_data(eltype, nv, N)
su, u = os_test_data(T, nu, N)
sv, v = os_test_data(T, nv, N)
sout = su .+ sv .- 1
out = _conv_similar(u, sout, axes(u), axes(v))
unsafe_conv_kern_os!(out, u, v, su, sv, sout, nffts)
@@ -202,13 +202,9 @@ end
nlarge = 128

regular_nsmall = [12, 128]
for numdim in Ns
for elt in eltypes
for nsmall in regular_nsmall
nfft = optimalfftfiltlength(nsmall, nlarge)
test_os(elt, nlarge, nsmall, Val{numdim}(), nfft)
end
end
for numdim in Ns, elt in eltypes, nsmall in regular_nsmall
nfft = optimalfftfiltlength(nsmall, nlarge)
test_os(elt, nlarge, nsmall, Val{numdim}(), nfft)
end

# small = 12, fft = 256 exercises output being smaller than the normal
@@ -266,6 +262,8 @@ end

# Issue #288
@test xcorr(off_a, off_b, padmode = :longest) == OffsetVector(vcat(0, exp), -3:1)

@test_throws ArgumentError xcorr([1], [2]; padmode=:bug)
end

@testset "deconv" begin
20 changes: 10 additions & 10 deletions test/filt_order.jl
Original file line number Diff line number Diff line change
@@ -50,13 +50,13 @@ using DSP, Test
# [n, Wn] = buttord(Wp, Ws, Rp, Rs)
#

(nbp, Wnbp) = buttord(tuple(100 / 500, 200 / 500), tuple(50 / 500, 250 / 500), 3, 40, domain=:z)
(nbp, Wnbp) = buttord((100 / 500, 200 / 500), (50 / 500, 250 / 500), 3, 40, domain=:z)
@test nbp == 8
@test Wnbp[1] 0.195101359239
@test Wnbp[2] 0.408043633382

# s-domain
(nbps, Wnbps) = buttord(tuple(100 / 500, 200 / 500), tuple(50 / 500, 250 / 500), 3, 40, domain=:s)
(nbps, Wnbps) = buttord((100 / 500, 200 / 500), (50 / 500, 250 / 500), 3, 40, domain=:s)
@test nbps == 9
@test Wnbps[1] 0.198730150231
@test Wnbps[2] 0.402555927759
@@ -70,13 +70,13 @@ using DSP, Test

# this test may be more sensitive...MATLAB's implementation of bounded minimization
# will yield different results in comparison to Optim.jl.
(nbs, Wnbs) = buttord(tuple(3200 / 22050, 7800 / 22050), tuple(4800 / 22050, 5600 / 22050), 2, 60, domain=:z)
(nbs, Wnbs) = buttord((3200 / 22050, 7800 / 22050), (4800 / 22050, 5600 / 22050), 2, 60, domain=:z)
@test nbs == 5
@test (Wnbs[1], 0.172660908966, rtol=Δ)
@test (Wnbs[2], 0.314956388749, rtol=Δ)

# s-domain
(nbss, Wnbss) = buttord(tuple(3200 / 22050, 7800 / 22050), tuple(4800 / 22050, 5600 / 22050), 2, 60, domain=:s)
(nbss, Wnbss) = buttord((3200 / 22050, 7800 / 22050), (4800 / 22050, 5600 / 22050), 2, 60, domain=:s)
@test (Wnbss[1], 0.173677826752, rtol=Δ)
@test (Wnbss[2], 0.318267164272, rtol=Δ)

@@ -88,8 +88,8 @@ end
@testset "ellipord" begin

Rp, Rs = 3, 40
Wp = tuple(0.2, 0.7)
Ws = tuple(0.1, 0.8)
Wp = (0.2, 0.7)
Ws = (0.1, 0.8)

# Lowpass
(nl, Wnl) = ellipord(0.1, 0.2, Rp, Rs, domain=:z)
@@ -127,8 +127,8 @@ end

@testset "cheb1ord" begin
Rp, Rs = 2, 70
Wp = tuple(0.2, 0.5)
Ws = tuple(0.1, 0.6)
Wp = (0.2, 0.5)
Ws = (0.1, 0.6)

# Lowpass
(nl, Wnl) = cheb1ord(0.1, 0.21, Rp, Rs, domain=:z)
@@ -165,8 +165,8 @@ end

@testset "cheb2ord" begin
Rp, Rs = 1.2, 80
Wp = tuple(0.22, 0.51)
Ws = tuple(0.14, 0.63)
Wp = (0.22, 0.51)
Ws = (0.14, 0.63)

# Lowpass
(nl, Wnl) = cheb2ord(0.1, 0.21, Rp, Rs, domain=:z)
6 changes: 3 additions & 3 deletions test/filt_stream.jl
Original file line number Diff line number Diff line change
@@ -68,7 +68,7 @@ end
# Single rate filtering
#

function test_singlerate(h, x)
function test_singlerate(h::AbstractVector{T}, x::AbstractVector) where T
xLen = length(x)
hLen = length(h)
pivotPoint = min(rand(50:150), div(xLen, 4))
@@ -81,8 +81,8 @@ function test_singlerate(h, x)
@printfifinteractive( "___] | | \\| |__] |___ |___ | \\ | | | |___\n" )
@printfifinteractive( "\nTesting single-rate fitering, h is %s, x is %s. xLen = %d, hLen = %d", string(eltype(h)), string(eltype(x)), xLen, hLen )

@printfifinteractive( "\n\tBase.filt\n\t\t")
@timeifinteractive naiveResult = filt(h, 1.0, x)
@printfifinteractive( "\n\tfilt\n\t\t")
@timeifinteractive naiveResult = filt(h, one(T), x)

@printfifinteractive( "\n\tDSP.filt( h, x, 1//1 )\n\t\t" )
@timeifinteractive statelesResult = DSP.filt( h, x )

0 comments on commit 1e72e21

Please sign in to comment.