-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathchannel_flow_2D_enzyme_cuda.jl
165 lines (147 loc) · 7.33 KB
/
channel_flow_2D_enzyme_cuda.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
using Plots, Printf
using Plots.PlotMeasures
using CUDA
using Enzyme
@views av(A) = 0.5 .* (A[1:end-1] .+ A[2:end])
macro avy(A) esc(:(0.5 * ($A[iy, iz] + $A[iy+1, iz]))) end
macro avz(A) esc(:(0.5 * ($A[iy, iz] + $A[iy, iz+1]))) end
macro av4(A) esc(:(0.25 * ($A[iy, iz] + $A[iy, iz+1] + $A[iy+1, iz] + $A[iy+1, iz+1]))) end
macro d_ya(A) esc(:($A[iy+1, iz] - $A[iy, iz])) end
macro d_za(A) esc(:($A[iy, iz+1] - $A[iy, iz])) end
macro d_yi(A) esc(:($A[iy+1, iz+1] - $A[iy, iz+1])) end
macro d_zi(A) esc(:($A[iy+1, iz+1] - $A[iy+1, iz])) end
macro eII() esc(:(sqrt((0.5 * ((vx[iy+1, iz+1] - vx[iy, iz+1]) / dy + (vx[iy+1, iz] - vx[iy, iz]) / dy))^2 + (0.5 * ((vx[iy+1, iz+1] - vx[iy+1, iz]) / dz + (vx[iy, iz+1] - vx[iy, iz]) / dz))^2))) end
function update_ηeff!(ηeff, vx, k0, npow, ηreg, ηrel, dy, dz)
iy = (blockIdx().x - 1) * blockDim().x + threadIdx().x
iz = (blockIdx().y - 1) * blockDim().y + threadIdx().y
if (iy <= size(ηeff, 1) && iz <= size(ηeff, 2)) ηeff[iy, iz] = ηeff[iy, iz] * (1.0 - ηrel) + ηrel / (1.0 ./ (k0 * @eII()^(npow - 1.0)) + 1.0 / ηreg) end
return
end
function assign_ηeff!(ηeff, vx, k0, npow, ηreg, dy, dz)
iy = (blockIdx().x - 1) * blockDim().x + threadIdx().x
iz = (blockIdx().y - 1) * blockDim().y + threadIdx().y
if (iy <= size(ηeff, 1) && iz <= size(ηeff, 2)) ηeff[iy, iz] = 1.0 / (1.0 ./ (k0 * @eII()^(npow - 1.0)) + 1.0 / ηreg) end
return
end
function grad_assign_ηeff!(η̄eff,v̄x,ηeff, vx, k0, npow, ηreg, dy, dz)
Enzyme.autodiff_deferred(Reverse,assign_ηeff!,??)
return
end
function update_τ!(ηeff, τxy, τxz, vx, dy, dz)
iy = (blockIdx().x - 1) * blockDim().x + threadIdx().x
iz = (blockIdx().y - 1) * blockDim().y + threadIdx().y
if (iy <= size(τxy, 1) && iz <= size(τxy, 2)) τxy[iy, iz] = @avz(ηeff) * @d_yi(vx) / dy end
if (iy <= size(τxz, 1) && iz <= size(τxz, 2)) τxz[iy, iz] = @avy(ηeff) * @d_zi(vx) / dz end
return
end
function grad_update_τ!(??)
Enzyme.autodiff_deferred(Reverse,update_τ!,??)
return
end
function update_v!(vx, τxy, τxz, ηeff, ρg, sinα, dτ, dy, dz)
iy = (blockIdx().x - 1) * blockDim().x + threadIdx().x
iz = (blockIdx().y - 1) * blockDim().y + threadIdx().y
if (iy <= size(vx, 1) - 2 && iz <= size(vx, 2) - 2) vx[iy+1, iz+1] = vx[iy+1, iz+1] + (@d_ya(τxy) / dy + @d_za(τxz) / dz + ρg * sinα) * dτ / @av4(ηeff) end
return
end
function apply_bc!(vx)
iy = (blockIdx().x - 1) * blockDim().x + threadIdx().x
iz = (blockIdx().y - 1) * blockDim().y + threadIdx().y
if (iy <= size(vx, 1) && iz == size(vx, 2)) vx[iy, iz] = vx[iy, iz-1] end
if (iy == 1 && iz <= size(vx, 2)) vx[iy, iz] = vx[iy+1, iz] end
return
end
function residual!(resv, τxy, τxz, ρg, sinα, dy, dz)
iy = (blockIdx().x - 1) * blockDim().x + threadIdx().x
iz = (blockIdx().y - 1) * blockDim().y + threadIdx().y
if (iy <= size(resv, 1) && iz <= size(resv, 2)) resv[iy, iz] = @d_ya(τxy) / dy + @d_za(τxz) / dz + ρg * sinα end
return
end
function grad_residual!(??)
Enzyme.autodiff_deferred(Reverse,residual!,??)
return
end
@views function main()
# physics
# non-dimensional
npow = 1.0 / 3.0
sinα = sin(π / 12)
# dimensionally independent
ly, lz = 1.0, 1.0 # [m]
k0 = 1.0 # [Pa*s^npow]
ρg = 1.0 # [Pa/m]
# scales
psc = ρg * lz
ηsc = psc * (k0 / psc)^(1.0 / npow)
# dimensionally dependent
ηreg = 1e4 * ηsc
# numerics
nz = 64
ny = ceil(Int, nz * ly / lz)
nthreads = (16, 16)
nblocks = cld.((ny, nz), nthreads)
cfl = 1 / 4.1
ϵtol = 1e-6
ηrel = 5e-1
maxiter = 20000max(ny, nz)
ncheck = 500max(ny, nz)
# preprocessing
dy, dz = ly / ny, lz / nz
yc, zc = LinRange(-ly / 2 + dy / 2, ly / 2 - dy / 2, ny), LinRange(dz / 2, lz - dz / 2, nz)
yv, zv = av(yc), av(zc)
dτ = cfl * min(dy, dz)^2
# init
vx = CUDA.zeros(Float64, ny, nz)
v̄x = CUDA.zeros(Float64, ny, nz)
ηeff = CUDA.zeros(Float64, ny - 1, nz - 1)
η̄eff = CUDA.zeros(Float64, ny - 1, nz - 1)
τxy = CUDA.zeros(Float64, ny - 1, nz - 2)
τxz = CUDA.zeros(Float64, ny - 2, nz - 1)
τ̄xy = CUDA.zeros(Float64, ny - 1, nz - 2)
τ̄xz = CUDA.zeros(Float64, ny - 2, nz - 1)
r = CUDA.zeros(Float64, ny - 2, nz - 2)
r̄ = CUDA.zeros(Float64, ny - 2, nz - 2)
# action (forward)
iters_evo = Float64[]; errs_evo = Float64[]; err = 2ϵtol; iter = 1
while err >= ϵtol && iter <= maxiter
CUDA.@sync @cuda threads=nthreads blocks=nblocks update_ηeff!(ηeff, vx, k0, npow, ηreg, ηrel, dy, dz)
CUDA.@sync @cuda threads=nthreads blocks=nblocks update_τ!(ηeff, τxy, τxz, vx, dy, dz)
CUDA.@sync @cuda threads=nthreads blocks=nblocks update_v!(vx, τxy, τxz, ηeff, ρg, sinα, dτ, dy, dz)
CUDA.@sync @cuda threads=nthreads blocks=nblocks apply_bc!(vx)
if iter % ncheck == 0
CUDA.@sync @cuda threads=nthreads blocks=nblocks residual!(r, τxy, τxz, ρg, sinα, dy, dz)
err = maximum(abs.(r)) * lz / psc
push!(iters_evo, iter / nz); push!(errs_evo, err)
p1 = heatmap(yc, zc, Array(vx)'; aspect_ratio=1, xlabel="y", ylabel="z", title="Vx", xlims=(-ly / 2, ly / 2), ylims=(0, lz), c=:turbo, right_margin=10mm)
p2 = heatmap(yv, zv, Array(ηeff)'; aspect_ratio=1, xlabel="y", ylabel="z", title="ηeff", xlims=(-ly / 2, ly / 2), ylims=(0, lz), c=:turbo, colorbar_scale=:log10)
p3 = plot(iters_evo, errs_evo; xlabel="niter/nx", ylabel="err", yscale=:log10, framestyle=:box, legend=false, markershape=:circle)
display(plot(p1, p2, p3; size=(1200, 400), layout=(1, 3), bottom_margin=10mm, left_margin=10mm))
@printf(" #iter/nz=%.1f, err=%1.3e\n", iter / nz, err)
end
iter += 1
end
# action (inverse)
iters_evo = Float64[]; errs_evo = Float64[]; err = 2ϵtol; iter = 1
while err >= ϵtol && iter <= maxiter
τ̄xy .= 0.0
τ̄xz .= 0.0
η̄eff .= 0.0
CUDA.@sync @cuda threads=nthreads blocks=nblocks grad_residual!(??)
CUDA.@sync @cuda threads=nthreads blocks=nblocks grad_update_τ!(??)
CUDA.@sync @cuda threads=nthreads blocks=nblocks grad_assign_ηeff!(??)
CUDA.@sync @cuda threads=nthreads blocks=nblocks apply_bc!(v̄x)
if iter % ncheck == 0
CUDA.@sync @cuda threads=nthreads blocks=nblocks residual!(r, τxy, τxz, ρg, sinα, dy, dz)
err = maximum(abs.(r)) * lz / psc
push!(iters_evo, iter / nz); push!(errs_evo, err)
p1 = heatmap(yc, zc, Array(vx)'; aspect_ratio=1, xlabel="y", ylabel="z", title="Vx", xlims=(-ly / 2, ly / 2), ylims=(0, lz), c=:turbo, right_margin=10mm)
p2 = heatmap(yv, zv, Array(ηeff)'; aspect_ratio=1, xlabel="y", ylabel="z", title="ηeff", xlims=(-ly / 2, ly / 2), ylims=(0, lz), c=:turbo, colorbar_scale=:log10)
p3 = plot(iters_evo, errs_evo; xlabel="niter/nx", ylabel="err", yscale=:log10, framestyle=:box, legend=false, markershape=:circle)
display(plot(p1, p2, p3; size=(1200, 400), layout=(1, 3), bottom_margin=10mm, left_margin=10mm))
@printf(" #iter/nz=%.1f, err=%1.3e\n", iter / nz, err)
end
iter += 1
end
return
end
main()