Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize volume integral kernels for shock capturing (less common use) #120

Merged
merged 7 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion benchmark/advection_basic_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ advection_velocity = 1.0f0
equations = LinearScalarAdvectionEquation1D(advection_velocity)

solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs, RealT = RealT)
solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs, RealT = RealT)

coordinates_min = -1.0f0
coordinates_max = 1.0f0
Expand All @@ -24,7 +25,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
solver)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition_convergence_test,
solver)
solver_gpu)

tspan_gpu = (0.0f0, 1.0f0)
t_gpu = 0.0f0
Expand Down
3 changes: 2 additions & 1 deletion benchmark/advection_basic_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ advection_velocity = (0.2f0, -0.7f0)
equations = LinearScalarAdvectionEquation2D(advection_velocity)

solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs, RealT = RealT)
solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs, RealT = RealT)

coordinates_min = (-1.0f0, -1.0f0)
coordinates_max = (1.0f0, 1.0f0)
Expand All @@ -24,7 +25,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
solver)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition_convergence_test,
solver)
solver_gpu)

tspan_gpu = (0.0f0, 1.0f0)
t_gpu = 0.0f0
Expand Down
3 changes: 2 additions & 1 deletion benchmark/advection_basic_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ advection_velocity = (0.2f0, -0.7f0, 0.5f0)
equations = LinearScalarAdvectionEquation3D(advection_velocity)

solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs, RealT = RealT)
solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs, RealT = RealT)

coordinates_min = (-1.0f0, -1.0f0, -1.0f0)
coordinates_max = (1.0f0, 1.0f0, 1.0f0)
Expand All @@ -24,7 +25,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
solver)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition_convergence_test,
solver)
solver_gpu)

tspan_gpu = (0.0f0, 1.0f0)
t_gpu = 0.0f0
Expand Down
5 changes: 4 additions & 1 deletion benchmark/euler_ec_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ volume_flux = flux_ranocha
solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
volume_integral = VolumeIntegralFluxDifferencing(volume_flux),
RealT = RealT)
solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
volume_integral = VolumeIntegralFluxDifferencing(volume_flux),
RealT = RealT)

coordinates_min = (-2.0f0,)
coordinates_max = (2.0f0,)
Expand All @@ -25,7 +28,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
@info "Time for cache initialization on CPU"
@time semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)

tspan_gpu = (0.0f0, 0.4f0)
t_gpu = 0.0f0
Expand Down
5 changes: 4 additions & 1 deletion benchmark/euler_ec_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ volume_flux = flux_ranocha
solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
volume_integral = VolumeIntegralFluxDifferencing(volume_flux),
RealT = RealT)
solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
volume_integral = VolumeIntegralFluxDifferencing(volume_flux),
RealT = RealT)

coordinates_min = (-2.0f0, -2.0f0)
coordinates_max = (2.0f0, 2.0f0)
Expand All @@ -27,7 +30,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
@time semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
boundary_conditions = boundary_condition_periodic)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
boundary_conditions = boundary_condition_periodic)

tspan_gpu = (0.0f0, 0.4f0)
Expand Down
5 changes: 4 additions & 1 deletion benchmark/euler_ec_3d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ volume_flux = flux_ranocha
solver = DGSEM(polydeg = 2, surface_flux = flux_ranocha,
volume_integral = VolumeIntegralFluxDifferencing(volume_flux),
RealT = RealT)
solver_gpu = DGSEMGPU(polydeg = 2, surface_flux = flux_ranocha,
volume_integral = VolumeIntegralFluxDifferencing(volume_flux),
RealT = RealT)

coordinates_min = (-2.0f0, -2.0f0, -2.0f0)
coordinates_max = (2.0f0, 2.0f0, 2.0f0)
Expand All @@ -25,7 +28,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
@info "Time for cache initialization on CPU"
@time semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)

tspan_gpu = (0.0f0, 0.4f0)
t_gpu = 0.0f0
Expand Down
82 changes: 10 additions & 72 deletions benchmark/euler_shock_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ initial_condition = initial_condition_weak_blast_wave

surface_flux = flux_lax_friedrichs
volume_flux = flux_shima_etal

basis = LobattoLegendreBasis(RealT, 3)
basis_gpu = LobattoLegendreBasisGPU(3, RealT)

indicator_sc = IndicatorHennemannGassner(equations, basis,
alpha_max = 0.5f0,
alpha_min = 0.001f0,
Expand All @@ -21,7 +24,9 @@ indicator_sc = IndicatorHennemannGassner(equations, basis,
volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
volume_flux_dg = volume_flux,
volume_flux_fv = surface_flux)

solver = DGSEM(basis, surface_flux, volume_integral)
solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)

coordinates_min = -2.0f0
coordinates_max = 2.0f0
Expand All @@ -33,7 +38,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
@info "Time for cache initialization on CPU"
@time semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)

tspan = tspan_gpu = (0.0f0, 0.4f0)
t = t_gpu = 0.0f0
Expand Down Expand Up @@ -63,74 +68,7 @@ du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cach

# Reset du and volume integral
@info "Time for reset_du! and volume_integral! on GPU"
CUDA.@time TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu.volume_integral, solver_gpu,
cache_gpu)
@info "Time for reset_du! and volume_integral! on CPU"
@time begin
Trixi.reset_du!(du, solver, cache)
Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations),
equations, solver.volume_integral, solver, cache)
end

# Prolong to interfaces
@info "Time for prolong2interfaces! on GPU"
CUDA.@time TrixiCUDA.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu)
@info "Time for prolong2interfaces! on CPU"
@time Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver)

# Interface flux
@info "Time for interface_flux! on GPU"
CUDA.@time TrixiCUDA.cuda_interface_flux!(mesh_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu, cache_gpu)
@info "Time for interface_flux! on CPU"
@time Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh,
Trixi.have_nonconservative_terms(equations), equations,
solver.surface_integral, solver, cache)

# Prolong to boundaries
@info "Time for prolong2boundaries! on GPU"
CUDA.@time TrixiCUDA.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu,
equations_gpu, cache_gpu)
@info "Time for prolong2boundaries! on CPU"
@time Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver)

# Boundary flux
@info "Time for boundary_flux! on GPU"
CUDA.@time TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu, cache_gpu)
@info "Time for boundary_flux! on CPU"
@time Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
solver.surface_integral, solver)

# Surface integral
@info "Time for surface_integral! on GPU"
CUDA.@time TrixiCUDA.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
@info "Time for surface_integral! on CPU"
@time Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral,
solver, cache)

# Jacobian
@info "Time for jacobian! on GPU"
CUDA.@time TrixiCUDA.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu)
@info "Time for jacobian! on CPU"
@time Trixi.apply_jacobian!(du, mesh, equations, solver, cache)

# Sources terms
@info "Time for sources! on GPU"
CUDA.@time TrixiCUDA.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu,
equations_gpu, cache_gpu)
@info "Time for sources! on CPU"
@time Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache)

# Semidiscretization process
@info "Time for rhs! on GPU"
CUDA.@time TrixiCUDA.rhs_gpu!(du_gpu, u_gpu, t_gpu, mesh_gpu, equations_gpu,
boundary_conditions_gpu, source_terms_gpu,
solver_gpu, cache_gpu)
@info "Time for rhs! on CPU"
@time Trixi.rhs!(du, u, t, mesh, equations, boundary_conditions, source_terms,
solver, cache)
@benchmark CUDA.@sync TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu.volume_integral, solver_gpu,
cache_gpu)
101 changes: 10 additions & 91 deletions benchmark/euler_shock_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ initial_condition = initial_condition_weak_blast_wave

surface_flux = flux_lax_friedrichs
volume_flux = flux_shima_etal

basis = LobattoLegendreBasis(RealT, 3)
basis_gpu = LobattoLegendreBasisGPU(3, RealT)

indicator_sc = IndicatorHennemannGassner(equations, basis,
alpha_max = 0.5f0,
alpha_min = 0.001f0,
Expand All @@ -21,7 +24,9 @@ indicator_sc = IndicatorHennemannGassner(equations, basis,
volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
volume_flux_dg = volume_flux,
volume_flux_fv = surface_flux)

solver = DGSEM(basis, surface_flux, volume_integral)
solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)

coordinates_min = (-2.0f0, -2.0f0)
coordinates_max = (2.0f0, 2.0f0)
Expand All @@ -33,7 +38,7 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
@info "Time for cache initialization on CPU"
@time semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
@info "Time for cache initialization on GPU"
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
CUDA.@time semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)

tspan = tspan_gpu = (0.0f0, 1.0f0)
t = t_gpu = 0.0f0
Expand Down Expand Up @@ -63,93 +68,7 @@ du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cach

# Reset du and volume integral
@info "Time for reset_du! and volume_integral! on GPU"
CUDA.@time TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu.volume_integral, solver_gpu,
cache_gpu)
@info "Time for reset_du! and volume_integral! on CPU"
@time begin
Trixi.reset_du!(du, solver, cache)
Trixi.calc_volume_integral!(du, u, mesh, Trixi.have_nonconservative_terms(equations),
equations, solver.volume_integral, solver, cache)
end

# Prolong to interfaces
@info "Time for prolong2interfaces! on GPU"
CUDA.@time TrixiCUDA.cuda_prolong2interfaces!(u_gpu, mesh_gpu, equations_gpu, cache_gpu)
@info "Time for prolong2interfaces! on CPU"
@time Trixi.prolong2interfaces!(cache, u, mesh, equations, solver.surface_integral, solver)

# Interface flux
@info "Time for interface_flux! on GPU"
CUDA.@time TrixiCUDA.cuda_interface_flux!(mesh_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu, cache_gpu)
@info "Time for interface_flux! on CPU"
@time Trixi.calc_interface_flux!(cache.elements.surface_flux_values, mesh,
Trixi.have_nonconservative_terms(equations), equations,
solver.surface_integral, solver, cache)

# Prolong to boundaries
@info "Time for prolong2boundaries! on GPU"
CUDA.@time TrixiCUDA.cuda_prolong2boundaries!(u_gpu, mesh_gpu, boundary_conditions_gpu,
equations_gpu, cache_gpu)
@info "Time for prolong2boundaries! on CPU"
@time Trixi.prolong2boundaries!(cache, u, mesh, equations, solver.surface_integral, solver)

# Boundary flux
@info "Time for boundary_flux! on GPU"
CUDA.@time TrixiCUDA.cuda_boundary_flux!(t_gpu, mesh_gpu, boundary_conditions_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu, cache_gpu)
@info "Time for boundary_flux! on CPU"
@time Trixi.calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
solver.surface_integral, solver)

# Prolong to mortars
@info "Time for prolong2mortars! on GPU"
CUDA.@time TrixiCUDA.cuda_prolong2mortars!(u_gpu, mesh_gpu,
TrixiCUDA.check_cache_mortars(cache_gpu),
solver_gpu, cache_gpu)
@info "Time for prolong2mortars! on CPU"
@time Trixi.prolong2mortars!(cache, u, mesh, equations,
solver.mortar, solver.surface_integral, solver)

# Mortar flux
@info "Time for mortar_flux! on GPU"
CUDA.@time TrixiCUDA.cuda_mortar_flux!(mesh_gpu, TrixiCUDA.check_cache_mortars(cache_gpu),
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu, cache_gpu)
@info "Time for mortar_flux! on CPU"
@time Trixi.calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
Trixi.have_nonconservative_terms(equations), equations,
solver.mortar, solver.surface_integral, solver, cache)

# Surface integral
@info "Time for surface_integral! on GPU"
CUDA.@time TrixiCUDA.cuda_surface_integral!(du_gpu, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
@info "Time for surface_integral! on CPU"
@time Trixi.calc_surface_integral!(du, u, mesh, equations, solver.surface_integral,
solver, cache)

# Jacobian
@info "Time for jacobian! on GPU"
CUDA.@time TrixiCUDA.cuda_jacobian!(du_gpu, mesh_gpu, equations_gpu, cache_gpu)
@info "Time for jacobian! on CPU"
@time Trixi.apply_jacobian!(du, mesh, equations, solver, cache)

# Sources terms
@info "Time for sources! on GPU"
CUDA.@time TrixiCUDA.cuda_sources!(du_gpu, u_gpu, t_gpu, source_terms_gpu,
equations_gpu, cache_gpu)
@info "Time for sources! on CPU"
@time Trixi.calc_sources!(du, u, t, source_terms, equations, solver, cache)

# Semidiscretization process
@info "Time for rhs! on GPU"
CUDA.@time TrixiCUDA.rhs_gpu!(du_gpu, u_gpu, t_gpu, mesh_gpu, equations_gpu,
boundary_conditions_gpu, source_terms_gpu,
solver_gpu, cache_gpu)
@info "Time for rhs! on CPU"
@time Trixi.rhs!(du, u, t, mesh, equations, boundary_conditions, source_terms,
solver, cache)
@benchmark CUDA.@sync TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
Trixi.have_nonconservative_terms(equations_gpu),
equations_gpu, solver_gpu.volume_integral, solver_gpu,
cache_gpu)
Loading
Loading