Skip to content

Commit

Permalink
filt perf optimizations (#516)
Browse files Browse the repository at this point in the history
* create `si` only once, and reuse

* no inbounds faster for N < 24

* @generated `_small_filt_fir!`

- use Base.Cartesian.@ntuple

* inbounds if N > 18

major speedups for N > 18, much slower if inbounds for small N

* reorder store in `_filt_fir` and `_filt_iir`

>20% faster for fir

* maybe prevent undefvarerror

no need to assign values to enum

* small things

- eachindex stuff
- `==` to `===`  nothing
- `npairs = max(nz, np)`

* isone, iszero, etc.

* `filt` for `FIRFilter`: remove redundant definitions

* extract type instability from loop

* remove fft conv type instability

- in `unsafe_conv_kern_os!`
- probably no real impact

* correct `filt` arg types

* nicer loop

* inv plan

* cleanup filt_order

* use `muladd`s in filt.jl

* `_prod_freq`

* more `muladd`s

* `fill!`, `reverse`, don't broadcast scalar /

* micro-optimizations, cosmetics

- reduced exceptions in `@code_llvm` for `N < 18`
- `@noinline mul!`, `broadcast`, only for julia > v1.8
- name `SMALL_FILT_VECT_CUTOFF`, selective bounds check

* Compat noinline

* add test for error

(cherry picked from commit 9de4c0c)
wheeheee authored and martinholters committed Sep 13, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 26ca32e commit d2e69ee
Showing 12 changed files with 257 additions and 293 deletions.
1 change: 1 addition & 0 deletions src/DSP.jl
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@ module DSP
using FFTW
using LinearAlgebra: mul!, rmul!
using IterTools: subsets
using Compat: Compat

export conv, deconv, filt, filt!, xcorr

20 changes: 9 additions & 11 deletions src/Filters/design.jl
Original file line number Diff line number Diff line change
@@ -86,9 +86,7 @@ function Chebyshev2(::Type{T}, n::Integer, ripple::Real) where {T<:Real}

ε = 1/sqrt(10^(convert(T, ripple)/10)-1)
p = chebyshev_poles(T, n, ε)
for i = 1:length(p)
p[i] = inv(p[i])
end
map!(inv, p, p)

z = zeros(Complex{T}, n-isodd(n))
k = one(T)
@@ -155,7 +153,7 @@ function asne(w::Number, k::Real)
# Eq. (50)
k = abs2(k/(1+sqrt(1-abs2(k))))
# Eq. (56)
w = 2*w/((1+k)*(1+sqrt(1-abs2(kold)*w^2)))
w = 2w / ((1 + k) * (1 + sqrt(muladd(-abs2(kold), w^2, 1))))
end
2*asin(w)/π
end
@@ -355,9 +353,9 @@ function transform_prototype(ftype::Bandpass, proto::ZeroPoleGain{:s})
newz = zeros(TR, 2*nz+np-ncommon)
newp = zeros(TR, 2*np+nz-ncommon)
for (oldc, newc) in ((p, newp), (z, newz))
for i = 1:length(oldc)
for i in eachindex(oldc)
b = oldc[i] * ((ftype.w2 - ftype.w1)/2)
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newc[2i-1] = b + pm
newc[2i] = b - pm
end
@@ -372,25 +370,25 @@ function transform_prototype(ftype::Bandstop, proto::ZeroPoleGain{:s})
k = proto.k
nz = length(z)
np = length(p)
npairs = nz+np-min(nz, np)
npairs = max(nz, np)
TR = Base.promote_eltype(z, p)
newz = Vector{TR}(undef, 2*npairs)
newp = Vector{TR}(undef, 2*npairs)

num = one(eltype(z))
for i = 1:nz
num *= -z[i]
b = (ftype.w2 - ftype.w1)/2/z[i]
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
b = (ftype.w2 - ftype.w1)/2z[i]
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newz[2i-1] = b - pm
newz[2i] = b + pm
end

den = one(eltype(p))
for i = 1:np
den *= -p[i]
b = (ftype.w2 - ftype.w1)/2/p[i]
pm = sqrt(b^2 - ftype.w2 * ftype.w1)
b = (ftype.w2 - ftype.w1)/2p[i]
pm = sqrt(muladd(-ftype.w2, ftype.w1, b^2))
newp[2i-1] = b - pm
newp[2i] = b + pm
end
113 changes: 54 additions & 59 deletions src/Filters/filt.jl
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@


## PolynomialRatio
_zerosi(f::PolynomialRatio{:z,T}, x::AbstractArray{S}) where {T,S} =
_zerosi(f::PolynomialRatio{:z,T}, ::AbstractArray{S}) where {T,S} =
zeros(promote_type(T, S), max(-firstindex(f.a), -firstindex(f.b)))

"""
@@ -35,7 +35,7 @@ selected based on the data and filter length.
filt(f::PolynomialRatio{:z}, x, si=_zerosi(f, x)) = filt(coefb(f), coefa(f), x, si)

## SecondOrderSections
_zerosi(f::SecondOrderSections{:z,T,G}, x::AbstractArray{S}) where {T,G,S} =
_zerosi(f::SecondOrderSections{:z,T,G}, ::AbstractArray{S}) where {T,G,S} =
zeros(promote_type(T, G, S), 2, length(f.biquads))

# filt! algorithm (no checking, returns si)
@@ -44,14 +44,14 @@ function _filt!(out::AbstractArray, si::AbstractArray{S,N}, f::SecondOrderSectio
g = f.g
biquads = f.biquads
n = length(biquads)
@inbounds for i = 1:size(x, 1)
@inbounds for i in axes(x, 1)
yi = x[i, col]
for fi = 1:n
biquad = biquads[fi]
xi = yi
yi = si[1, fi] + biquad.b0*xi
si[1, fi] = si[2, fi] + biquad.b1*xi - biquad.a1*yi
si[2, fi] = biquad.b2*xi - biquad.a2*yi
yi = muladd(biquad.b0, xi, si[1, fi])
si[1, fi] = muladd(biquad.a1, -yi, muladd(biquad.b1, xi, si[2, fi]))
si[2, fi] = muladd(biquad.b2, xi, -biquad.a2 * yi)
end
out[i, col] = yi*g
end
@@ -80,17 +80,17 @@ filt(f::SecondOrderSections{:z,T,G}, x::AbstractArray{S}, si=_zerosi(f, x)) wher
filt!(Array{promote_type(T, G, S)}(undef, size(x)), f, x, si)

## Biquad
_zerosi(f::Biquad{:z,T}, x::AbstractArray{S}) where {T,S} =
_zerosi(::Biquad{:z,T}, ::AbstractArray{S}) where {T,S} =
zeros(promote_type(T, S), 2)

# filt! algorithm (no checking, returns si)
function _filt!(out::AbstractArray, si1::Number, si2::Number, f::Biquad{:z},
x::AbstractArray, col::Int)
@inbounds for i = 1:size(x, 1)
@inbounds for i in axes(x, 1)
xi = x[i, col]
yi = si1 + f.b0*xi
si1 = si2 + f.b1*xi - f.a1*yi
si2 = f.b2*xi - f.a2*yi
yi = muladd(f.b0, xi, si1)
si1 = muladd(f.a1, -yi, muladd(f.b1, xi, si2))
si2 = muladd(f.b2, xi, -f.a2 * yi)
out[i, col] = yi
end
(si1, si2)
@@ -105,9 +105,8 @@ function filt!(out::AbstractArray, f::Biquad{:z}, x::AbstractArray,
(size(si, 1) != 2 || (N > 1 && Base.trailingsize(si, 2) != ncols)) &&
error("si must have two rows and 1 or nsignals columns")

initial_si = si
for col = 1:ncols
_filt!(out, initial_si[1, N > 1 ? col : 1], initial_si[2, N > 1 ? col : 1], f, x, col)
_filt!(out, si[1, N > 1 ? col : 1], si[2, N > 1 ? col : 1], f, x, col)
end
out
end
@@ -170,13 +169,13 @@ function filt!(out::AbstractVector, f::DF2TFilter{<:PolynomialRatio,<:Vector}, x
if n == 1
mul!(out, x, b[1])
else
@inbounds for i=1:length(x)
@inbounds for i in eachindex(x, out)
xi = x[i]
val = si[1] + b[1]*xi
val = muladd(b[1], xi, si[1])
for j=2:n-1
si[j-1] = si[j] + b[j]*xi - a[j]*val
si[j-1] = muladd(a[j], -val, muladd(b[j], xi, si[j]))
end
si[n-1] = b[n]*xi - a[n]*val
si[n-1] = muladd(b[n], xi, -a[n] * val)
out[i] = val
end
end
@@ -200,13 +199,13 @@ DF2TFilter(coef::Biquad{:z,T}, state::Vector{S}=zeros(T, 2)) where {T,S} =
function filt!(out::AbstractVector, f::DF2TFilter{<:Biquad,<:Vector}, x::AbstractVector)
length(x) != length(out) && throw(ArgumentError("out size must match x"))
si = f.state
(si[1], si[2]) =_filt!(out, si[1], si[2], f.coef, x, 1)
(si[1], si[2]) = _filt!(out, si[1], si[2], f.coef, x, 1)
out
end

# Variant that allocates the output
filt(f::DF2TFilter{<:FilterCoefficients{:z},S}, x::AbstractVector) where {S<:Array} =
filt!(Vector{eltype(S)}(undef, length(x)), f, x)
filt(f::DF2TFilter{<:FilterCoefficients{:z},<:Array{T}}, x::AbstractVector) where {T} =
filt!(Vector{T}(undef, length(x)), f, x)

# Fall back to SecondOrderSections
DF2TFilter(coef::FilterCoefficients{:z}) = DF2TFilter(convert(SecondOrderSections, coef))
@@ -346,7 +345,7 @@ filtfilt(f::PolynomialRatio{:z}, x) = filtfilt(coefb(f), coefa(f), x)
# response to a step function is steady state.
function filt_stepstate(b::Union{AbstractVector{T}, T}, a::Union{AbstractVector{T}, T}) where T<:Number
scale_factor = a[1]
if scale_factor != 1.0
if !isone(scale_factor)
a = a ./ scale_factor
b = b ./ scale_factor
end
@@ -362,8 +361,8 @@ function filt_stepstate(b::Union{AbstractVector{T}, T}, a::Union{AbstractVector{
as<sz && (a = copyto!(zeros(eltype(a), sz), a))

# construct the companion matrix A and vector B:
A = [-a[2:end] [I; zeros(T, 1, sz-2)]]
B = b[2:end] - a[2:end] * b[1]
A = [-a[2:end] Matrix{T}(I, sz-1, sz-2)]
B = @views @. muladd(a[2:end], -b[1], b[2:end])
# Solve si = A*si + B
# (I - A)*si = B
scale_factor \ (I - A) \ B
@@ -375,18 +374,18 @@ function filt_stepstate(f::SecondOrderSections{:z,T}) where T
y = one(T)
for i = 1:length(biquads)
biquad = biquads[i]
a1, a2, b0, b1, b2 = biquad.a1, biquad.a2, biquad.b0, biquad.b1, biquad.b2

# At steady state, we have:
# y = s1 + b0*x
# s1 = s2 + b1*x - a1*y
# s2 = b2*x - a2*y
# where x is the input and y is the output. Solving these
# equations yields the following.
si[1, i] = (-(biquad.a1 + biquad.a2)*biquad.b0 + biquad.b1 + biquad.b2)/
(1 + biquad.a1 + biquad.a2)*y
si[2, i] = (biquad.a1*biquad.b2 - biquad.a2*(biquad.b0 + biquad.b1) + biquad.b2)/
(1 + biquad.a1 + biquad.a2)*y
y *= (biquad.b0 + biquad.b1 + biquad.b2)/(1 + biquad.a1 + biquad.a2)
den = (1 + a1 + a2)
si[1, i] = muladd((a1 + a2), -b0, b1 + b2) / den * y
si[2, i] = muladd(a1, b2, muladd(-a2, (b0 + b1), b2)) / den * y
y *= (b0 + b1 + b2) / den
end
si
end
@@ -397,8 +396,8 @@ end
Apply filter or filter coefficients `h` along the first dimension
of array `x` using a naïve time-domain algorithm
"""
function tdfilt(h::AbstractVector, x::AbstractArray{T}) where T<:Real
filt!(Array{T}(undef, size(x)), h, ones(eltype(h), 1), x)
function tdfilt(h::AbstractVector{H}, x::AbstractArray{T}) where {H,T<:Real}
filt(h, one(H), x)
end

"""
@@ -407,12 +406,12 @@ end
Like `tdfilt`, but writes the result into array `out`. Output array `out` may
not be an alias of `x`, i.e. filtering may not be done in place.
"""
function tdfilt!(out::AbstractArray, h::AbstractVector, x::AbstractArray)
filt!(out, h, ones(eltype(h), 1), x)
function tdfilt!(out::AbstractArray, h::AbstractVector{H}, x::AbstractArray) where H
filt!(out, h, one(H), x)
end

filt(h::AbstractArray, x::AbstractArray) =
filt!(Array{eltype(x)}(undef, size(x)), h, x)
filt(h::AbstractVector{H}, x::AbstractArray{T}) where {H,T} =
filt!(Array{promote_type(H, T)}(undef, size(x)), h, x)

#
# fftfilt and filt
@@ -447,48 +446,44 @@ end
# Like fftfilt! but does not check if out and x are the same size
function _fftfilt!(
out::AbstractArray{<:Real},
b::AbstractVector{<:Real},
b::AbstractVector{H},
x::AbstractArray{T},
nfft::Integer
) where T<:Real
) where {T<:Real,H<:Real}
nb = length(b)
nx = size(x, 1)
normfactor = 1/nfft
normfactor = nfft
W = promote_type(H, T)

L = min(nx, nfft - (nb - 1))
tmp1 = Vector{T}(undef, nfft)
tmp2 = Vector{Complex{T}}(undef, nfft >> 1 + 1)
tmp1 = Vector{W}(undef, nfft)
tmp2 = Vector{Complex{W}}(undef, nfft >> 1 + 1)

p1 = plan_rfft(tmp1)
p2 = plan_brfft(tmp2, nfft)

# FFT of filter
filterft = similar(tmp2)
tmp1[1:nb] .= b .* normfactor
tmp1[nb+1:end] .= zero(T)
tmp1[1:nb] .= b ./ normfactor
tmp1[nb+1:end] .= zero(W)
mul!(filterft, p1, tmp1)

# FFT of chunks
for colstart = 0:nx:length(x)-1
off = 1
while off <= nx
npadbefore = max(0, nb - off)
xstart = off - nb + npadbefore + 1
n = min(nfft - npadbefore, nx - xstart + 1)
for colstart = 0:nx:length(x)-1, off = 1:L:nx
npadbefore = max(0, nb - off)
xstart = off - nb + npadbefore + 1
n = min(nfft - npadbefore, nx - xstart + 1)

tmp1[1:npadbefore] .= zero(T)
tmp1[npadbefore+n+1:end] .= zero(T)
tmp1[1:npadbefore] .= zero(W)
tmp1[npadbefore+n+1:end] .= zero(W)

copyto!(tmp1, npadbefore+1, x, colstart+xstart, n)
mul!(tmp2, p1, tmp1)
broadcast!(*, tmp2, tmp2, filterft)
mul!(tmp1, p2, tmp2)
copyto!(tmp1, npadbefore+1, x, colstart+xstart, n)
mul!(tmp2, p1, tmp1)
broadcast!(*, tmp2, tmp2, filterft)
mul!(tmp1, p2, tmp2)

# Copy to output
copyto!(out, colstart+off, tmp1, nb, min(L, nx - off + 1))

off += L
end
# Copy to output
copyto!(out, colstart+off, tmp1, nb, min(L, nx - off + 1))
end

out
@@ -524,6 +519,6 @@ function filt_choose_alg!(
end
end

function filt_choose_alg!(out::AbstractArray, b::AbstractArray, x::AbstractArray)
function filt_choose_alg!(out::AbstractArray, b::AbstractVector, x::AbstractArray)
tdfilt!(out, b, x)
end
Loading

0 comments on commit d2e69ee

Please sign in to comment.