From c64665d37bee8cd2c20a1d433f909bdb325b4cf0 Mon Sep 17 00:00:00 2001
From: Huiyu Xie <huiyuxie.sde@gmail.com>
Date: Mon, 20 Jan 2025 13:26:48 -1000
Subject: [PATCH] Adapt `DGSEM` to enable caching of its data on GPU (#117)

* Start

* Complete

* Complete 1D

* Fix

* Fix

* Complete 2D

* Complete 3D

* Test examples
---
 README.md                                     |   5 +-
 examples/advection_basic_1d.jl                |   2 +-
 examples/advection_basic_2d.jl                |   2 +-
 examples/advection_basic_3d.jl                |   2 +-
 examples/advection_mortar_2d.jl               |   2 +-
 examples/advection_mortar_3d.jl               |   2 +-
 examples/euler_ec_1d.jl                       |   4 +-
 examples/euler_ec_2d.jl                       |   4 +-
 examples/euler_ec_3d.jl                       |   4 +-
 examples/euler_shockcapturing_1d.jl           |   4 +-
 examples/euler_shockcapturing_2d.jl           |   4 +-
 examples/euler_shockcapturing_3d.jl           |   4 +-
 examples/euler_source_terms_1d.jl             |   2 +-
 examples/euler_source_terms_2d.jl             |   2 +-
 examples/euler_source_terms_3d.jl             |   4 +-
 examples/eulermulti_ec_1d.jl                  |   4 +-
 examples/eulermulti_ec_2d.jl                  |   4 +-
 examples/hypdiff_nonperiodic_1d.jl            |   2 +-
 examples/hypdiff_nonperiodic_2d.jl            |   2 +-
 examples/hypdiff_nonperiodic_3d.jl            |   2 +-
 examples/shallowwater_dirichlet_1d.jl         |   8 +-
 examples/shallowwater_dirichlet_2d.jl         |   6 +-
 examples/shallowwater_ec_1d.jl                |   6 +-
 examples/shallowwater_ec_2d.jl                |   6 +-
 examples/shallowwater_source_terms_1d.jl      |   6 +-
 examples/shallowwater_source_terms_2d.jl      |   6 +-
 src/TrixiCUDA.jl                              |  31 +++--
 .../semidiscretization_hyperbolic.jl          |   7 +-
 src/solvers/basis_lobatto_legendre.jl         | 108 ++++++++++++++++++
 src/solvers/cache.jl                          |  20 ++--
 src/solvers/dg.jl                             |   4 +-
 src/solvers/dg_1d.jl                          |  40 ++++---
 src/solvers/dg_2d.jl                          |  52 +++++----
 src/solvers/dg_3d.jl                          |  64 ++++++-----
 src/solvers/dgsem.jl                          |  30 +++++
 src/solvers/solvers.jl                        |   2 +
 test/tree_dgsem_1d/advection_basic.jl         |   8 +-
 test/tree_dgsem_1d/advection_extended.jl      |   8 +-
 test/tree_dgsem_1d/burgers_basic.jl           |   8 +-
 test/tree_dgsem_1d/burgers_rarefraction.jl    |   9 +-
 test/tree_dgsem_1d/burgers_shock.jl           |   9 +-
 test/tree_dgsem_1d/euler_blast_wave.jl        |   9 +-
 test/tree_dgsem_1d/euler_ec.jl                |   9 +-
 test/tree_dgsem_1d/euler_shock.jl             |   9 +-
 test/tree_dgsem_1d/euler_source_terms.jl      |   8 +-
 .../euler_source_terms_nonperiodic.jl         |   8 +-
 test/tree_dgsem_1d/eulermulti_ec.jl           |   9 +-
 test/tree_dgsem_1d/eulermulti_es.jl           |   9 +-
 test/tree_dgsem_1d/eulerquasi_ec.jl           |   9 +-
 test/tree_dgsem_1d/eulerquasi_source_terms.jl |   9 +-
 .../hypdiff_harmonic_nonperiodic.jl           |   8 +-
 test/tree_dgsem_1d/hypdiff_nonperiodic.jl     |   8 +-
 test/tree_dgsem_1d/mhd_alfven_wave.jl         |   9 +-
 test/tree_dgsem_1d/mhd_ec.jl                  |   9 +-
 test/tree_dgsem_1d/shallowwater_shock.jl      |   9 +-
 test/tree_dgsem_2d/advection_basic.jl         |   8 +-
 test/tree_dgsem_2d/advection_mortar.jl        |   8 +-
 test/tree_dgsem_2d/euler_blob_mortar.jl       |   9 +-
 test/tree_dgsem_2d/euler_ec.jl                |   9 +-
 test/tree_dgsem_2d/euler_shock.jl             |   9 +-
 test/tree_dgsem_2d/euler_source_terms.jl      |   8 +-
 .../euler_source_terms_nonperiodic.jl         |   8 +-
 test/tree_dgsem_2d/eulermulti_ec.jl           |   9 +-
 test/tree_dgsem_2d/eulermulti_es.jl           |   9 +-
 test/tree_dgsem_2d/hypdiff_nonperiodic.jl     |   8 +-
 test/tree_dgsem_2d/mhd_alfven_wave.jl         |  10 +-
 test/tree_dgsem_2d/mhd_alfven_wave_mortar.jl  |  11 +-
 test/tree_dgsem_2d/mhd_ec.jl                  |  10 +-
 test/tree_dgsem_2d/mhd_shock.jl               |  10 +-
 test/tree_dgsem_2d/shallowwater_ec.jl         |  10 +-
 .../shallowwater_source_terms.jl              |  10 +-
 .../shawllowwater_source_terms_nonperiodic.jl |  10 +-
 test/tree_dgsem_3d/advection_basic.jl         |   8 +-
 test/tree_dgsem_3d/advection_mortar.jl        |   8 +-
 test/tree_dgsem_3d/euler_convergence.jl       |   9 +-
 test/tree_dgsem_3d/euler_ec.jl                |   9 +-
 test/tree_dgsem_3d/euler_mortar.jl            |   8 +-
 test/tree_dgsem_3d/euler_shock.jl             |   9 +-
 test/tree_dgsem_3d/euler_source_terms.jl      |   9 +-
 test/tree_dgsem_3d/hypdiff_nonperiodic.jl     |   8 +-
 test/tree_dgsem_3d/mhd_alfven_wave.jl         |  10 +-
 test/tree_dgsem_3d/mhd_alfven_wave_mortar.jl  |  11 +-
 test/tree_dgsem_3d/mhd_ec.jl                  |  10 +-
 test/tree_dgsem_3d/mhd_shock.jl               |  10 +-
 84 files changed, 501 insertions(+), 385 deletions(-)
 create mode 100644 src/solvers/basis_lobatto_legendre.jl
 create mode 100644 src/solvers/dgsem.jl

diff --git a/README.md b/README.md
index 95ac591e..13b52377 100644
--- a/README.md
+++ b/README.md
@@ -78,7 +78,8 @@ using OrdinaryDiffEq
 
 # Currently skip the issue of scalar indexing
 # See issues https://github.com/trixi-gpu/TrixiCUDA.jl/issues/59
-# and https://github.com/trixi-gpu/TrixiCUDA.jl/issues/113
+# https://github.com/trixi-gpu/TrixiCUDA.jl/issues/113
+# https://github.com/trixi-gpu/TrixiCUDA.jl/issues/118
 using CUDA
 CUDA.allowscalar(true)
 
@@ -88,7 +89,7 @@ CUDA.allowscalar(true)
 advection_velocity = 1.0
 equations = LinearScalarAdvectionEquation1D(advection_velocity)
 
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = -1.0
 coordinates_max = 1.0 
diff --git a/examples/advection_basic_1d.jl b/examples/advection_basic_1d.jl
index 76e02465..0c2255e4 100644
--- a/examples/advection_basic_1d.jl
+++ b/examples/advection_basic_1d.jl
@@ -14,7 +14,7 @@ advection_velocity = 1.0
 equations = LinearScalarAdvectionEquation1D(advection_velocity)
 
 # Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = -1.0 # minimum coordinate
 coordinates_max = 1.0 # maximum coordinate
diff --git a/examples/advection_basic_2d.jl b/examples/advection_basic_2d.jl
index 6fbd9487..40f3d10d 100644
--- a/examples/advection_basic_2d.jl
+++ b/examples/advection_basic_2d.jl
@@ -14,7 +14,7 @@ advection_velocity = (0.2, -0.7)
 equations = LinearScalarAdvectionEquation2D(advection_velocity)
 
 # Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
 coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y))
diff --git a/examples/advection_basic_3d.jl b/examples/advection_basic_3d.jl
index 0476ac99..e34f4cf5 100644
--- a/examples/advection_basic_3d.jl
+++ b/examples/advection_basic_3d.jl
@@ -14,7 +14,7 @@ advection_velocity = (0.2, -0.7, 0.5)
 equations = LinearScalarAdvectionEquation3D(advection_velocity)
 
 # Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z))
 coordinates_max = (1.0, 1.0, 1.0) # maximum coordinates (max(x), max(y), max(z))
diff --git a/examples/advection_mortar_2d.jl b/examples/advection_mortar_2d.jl
index 966973c1..dd902562 100644
--- a/examples/advection_mortar_2d.jl
+++ b/examples/advection_mortar_2d.jl
@@ -14,7 +14,7 @@ advection_velocity = (0.2, -0.7)
 equations = LinearScalarAdvectionEquation2D(advection_velocity)
 
 initial_condition = initial_condition_convergence_test
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (-1.0, -1.0)
 coordinates_max = (1.0, 1.0)
diff --git a/examples/advection_mortar_3d.jl b/examples/advection_mortar_3d.jl
index e0c85245..cba49d64 100644
--- a/examples/advection_mortar_3d.jl
+++ b/examples/advection_mortar_3d.jl
@@ -15,7 +15,7 @@ advection_velocity = (0.2, -0.7, 0.5)
 equations = LinearScalarAdvectionEquation3D(advection_velocity)
 
 initial_condition = initial_condition_convergence_test
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (-1.0, -1.0, -1.0)
 coordinates_max = (1.0, 1.0, 1.0)
diff --git a/examples/euler_ec_1d.jl b/examples/euler_ec_1d.jl
index 8d73da8e..a415a766 100644
--- a/examples/euler_ec_1d.jl
+++ b/examples/euler_ec_1d.jl
@@ -14,8 +14,8 @@ equations = CompressibleEulerEquations1D(1.4)
 initial_condition = initial_condition_weak_blast_wave
 
 volume_flux = flux_ranocha
-solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 coordinates_min = (-2.0,)
 coordinates_max = (2.0,)
diff --git a/examples/euler_ec_2d.jl b/examples/euler_ec_2d.jl
index 64810f26..2a70eb01 100644
--- a/examples/euler_ec_2d.jl
+++ b/examples/euler_ec_2d.jl
@@ -14,8 +14,8 @@ equations = CompressibleEulerEquations2D(1.4)
 initial_condition = initial_condition_weak_blast_wave
 
 volume_flux = flux_ranocha
-solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 coordinates_min = (-2.0, -2.0)
 coordinates_max = (2.0, 2.0)
diff --git a/examples/euler_ec_3d.jl b/examples/euler_ec_3d.jl
index 21d1d179..5bff7f93 100644
--- a/examples/euler_ec_3d.jl
+++ b/examples/euler_ec_3d.jl
@@ -15,8 +15,8 @@ equations = CompressibleEulerEquations3D(1.4)
 initial_condition = initial_condition_weak_blast_wave
 
 volume_flux = flux_ranocha
-solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 coordinates_min = (-2.0, -2.0, -2.0)
 coordinates_max = (2.0, 2.0, 2.0)
diff --git a/examples/euler_shockcapturing_1d.jl b/examples/euler_shockcapturing_1d.jl
index 4a0d52b7..fddf27fd 100644
--- a/examples/euler_shockcapturing_1d.jl
+++ b/examples/euler_shockcapturing_1d.jl
@@ -16,7 +16,7 @@ initial_condition = initial_condition_weak_blast_wave
 
 surface_flux = flux_lax_friedrichs
 volume_flux = flux_shima_etal
-basis = LobattoLegendreBasis(3)
+basis = LobattoLegendreBasisGPU(3)
 indicator_sc = IndicatorHennemannGassner(equations, basis,
                                          alpha_max = 0.5,
                                          alpha_min = 0.001,
@@ -25,7 +25,7 @@ indicator_sc = IndicatorHennemannGassner(equations, basis,
 volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
                                                  volume_flux_dg = volume_flux,
                                                  volume_flux_fv = surface_flux)
-solver = DGSEM(basis, surface_flux, volume_integral)
+solver = DGSEMGPU(basis, surface_flux, volume_integral)
 
 coordinates_min = -2.0
 coordinates_max = 2.0
diff --git a/examples/euler_shockcapturing_2d.jl b/examples/euler_shockcapturing_2d.jl
index ceebe5ae..e2f8cb54 100644
--- a/examples/euler_shockcapturing_2d.jl
+++ b/examples/euler_shockcapturing_2d.jl
@@ -16,7 +16,7 @@ initial_condition = initial_condition_weak_blast_wave
 
 surface_flux = flux_lax_friedrichs
 volume_flux = flux_shima_etal
-basis = LobattoLegendreBasis(3)
+basis = LobattoLegendreBasisGPU(3)
 indicator_sc = IndicatorHennemannGassner(equations, basis,
                                          alpha_max = 0.5,
                                          alpha_min = 0.001,
@@ -25,7 +25,7 @@ indicator_sc = IndicatorHennemannGassner(equations, basis,
 volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
                                                  volume_flux_dg = volume_flux,
                                                  volume_flux_fv = surface_flux)
-solver = DGSEM(basis, surface_flux, volume_integral)
+solver = DGSEMGPU(basis, surface_flux, volume_integral)
 
 coordinates_min = (-2.0, -2.0)
 coordinates_max = (2.0, 2.0)
diff --git a/examples/euler_shockcapturing_3d.jl b/examples/euler_shockcapturing_3d.jl
index 2fdd1e16..0826eef0 100644
--- a/examples/euler_shockcapturing_3d.jl
+++ b/examples/euler_shockcapturing_3d.jl
@@ -18,7 +18,7 @@ surface_flux = flux_ranocha # OBS! Using a non-dissipative flux is only sensible
 # but not for real shock simulations
 volume_flux = flux_ranocha
 polydeg = 3
-basis = LobattoLegendreBasis(polydeg)
+basis = LobattoLegendreBasisGPU(polydeg)
 indicator_sc = IndicatorHennemannGassner(equations, basis,
                                          alpha_max = 0.5,
                                          alpha_min = 0.001,
@@ -27,7 +27,7 @@ indicator_sc = IndicatorHennemannGassner(equations, basis,
 volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
                                                  volume_flux_dg = volume_flux,
                                                  volume_flux_fv = surface_flux)
-solver = DGSEM(basis, surface_flux, volume_integral)
+solver = DGSEMGPU(basis, surface_flux, volume_integral)
 
 coordinates_min = (-2.0, -2.0, -2.0)
 coordinates_max = (2.0, 2.0, 2.0)
diff --git a/examples/euler_source_terms_1d.jl b/examples/euler_source_terms_1d.jl
index 1f09f620..f55a3416 100644
--- a/examples/euler_source_terms_1d.jl
+++ b/examples/euler_source_terms_1d.jl
@@ -16,7 +16,7 @@ initial_condition = initial_condition_convergence_test
 
 # Note that the expected EOC of 5 is not reached with this flux.
 # Using flux_hll instead yields the expected EOC.
-solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = 0.0
 coordinates_max = 2.0
diff --git a/examples/euler_source_terms_2d.jl b/examples/euler_source_terms_2d.jl
index a39c8f11..5167ca37 100644
--- a/examples/euler_source_terms_2d.jl
+++ b/examples/euler_source_terms_2d.jl
@@ -13,7 +13,7 @@ CUDA.allowscalar(true)
 equations = CompressibleEulerEquations2D(1.4)
 
 initial_condition = initial_condition_convergence_test
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (0.0, 0.0)
 coordinates_max = (2.0, 2.0)
diff --git a/examples/euler_source_terms_3d.jl b/examples/euler_source_terms_3d.jl
index 516fe52e..6f50eccb 100644
--- a/examples/euler_source_terms_3d.jl
+++ b/examples/euler_source_terms_3d.jl
@@ -14,8 +14,8 @@ equations = CompressibleEulerEquations3D(1.4)
 
 initial_condition = initial_condition_convergence_test
 
-solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs,
-               volume_integral = VolumeIntegralWeakForm())
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs,
+                  volume_integral = VolumeIntegralWeakForm())
 
 coordinates_min = (0.0, 0.0, 0.0)
 coordinates_max = (2.0, 2.0, 2.0)
diff --git a/examples/eulermulti_ec_1d.jl b/examples/eulermulti_ec_1d.jl
index 0bf9de79..50603e7a 100644
--- a/examples/eulermulti_ec_1d.jl
+++ b/examples/eulermulti_ec_1d.jl
@@ -20,8 +20,8 @@ equations = CompressibleEulerMulticomponentEquations1D(gammas = (1.4, 1.4),
 initial_condition = initial_condition_weak_blast_wave
 
 volume_flux = flux_ranocha
-solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 coordinates_min = (-2.0,)
 coordinates_max = (2.0,)
diff --git a/examples/eulermulti_ec_2d.jl b/examples/eulermulti_ec_2d.jl
index e78c8557..5313ffba 100644
--- a/examples/eulermulti_ec_2d.jl
+++ b/examples/eulermulti_ec_2d.jl
@@ -15,8 +15,8 @@ equations = CompressibleEulerMulticomponentEquations2D(gammas = 1.4,
 initial_condition = initial_condition_weak_blast_wave
 
 volume_flux = flux_ranocha
-solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 coordinates_min = (-2.0, -2.0)
 coordinates_max = (2.0, 2.0)
diff --git a/examples/hypdiff_nonperiodic_1d.jl b/examples/hypdiff_nonperiodic_1d.jl
index 4f563b89..56247f75 100644
--- a/examples/hypdiff_nonperiodic_1d.jl
+++ b/examples/hypdiff_nonperiodic_1d.jl
@@ -16,7 +16,7 @@ initial_condition = initial_condition_poisson_nonperiodic
 
 boundary_conditions = boundary_condition_poisson_nonperiodic
 
-solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = 0.0
 coordinates_max = 1.0
diff --git a/examples/hypdiff_nonperiodic_2d.jl b/examples/hypdiff_nonperiodic_2d.jl
index 81a66bfb..cf563daa 100644
--- a/examples/hypdiff_nonperiodic_2d.jl
+++ b/examples/hypdiff_nonperiodic_2d.jl
@@ -19,7 +19,7 @@ boundary_conditions = (x_neg = boundary_condition_poisson_nonperiodic,
                        y_neg = boundary_condition_periodic,
                        y_pos = boundary_condition_periodic)
 
-solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (0.0, 0.0)
 coordinates_max = (1.0, 1.0)
diff --git a/examples/hypdiff_nonperiodic_3d.jl b/examples/hypdiff_nonperiodic_3d.jl
index 03531830..1b0bb42b 100644
--- a/examples/hypdiff_nonperiodic_3d.jl
+++ b/examples/hypdiff_nonperiodic_3d.jl
@@ -20,7 +20,7 @@ boundary_conditions = (x_neg = boundary_condition_poisson_nonperiodic,
                        z_neg = boundary_condition_periodic,
                        z_pos = boundary_condition_periodic)
 
-solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+solver = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (0.0, 0.0, 0.0)
 coordinates_max = (1.0, 1.0, 1.0)
diff --git a/examples/shallowwater_dirichlet_1d.jl b/examples/shallowwater_dirichlet_1d.jl
index db02662b..c69b858c 100644
--- a/examples/shallowwater_dirichlet_1d.jl
+++ b/examples/shallowwater_dirichlet_1d.jl
@@ -20,10 +20,10 @@ boundary_condition = BoundaryConditionDirichlet(initial_condition)
 # Get the DG approximation space
 
 volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
-solver = DGSEM(polydeg = 4,
-               surface_flux = (flux_hll,
-                               flux_nonconservative_fjordholm_etal),
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 4,
+                  surface_flux = (flux_hll,
+                                  flux_nonconservative_fjordholm_etal),
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 ###############################################################################
 # Get the TreeMesh and setup a periodic mesh
diff --git a/examples/shallowwater_dirichlet_2d.jl b/examples/shallowwater_dirichlet_2d.jl
index bcfd525e..bbeeed6a 100644
--- a/examples/shallowwater_dirichlet_2d.jl
+++ b/examples/shallowwater_dirichlet_2d.jl
@@ -20,9 +20,9 @@ boundary_condition = BoundaryConditionDirichlet(initial_condition)
 # Get the DG approximation space
 
 volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
-solver = DGSEM(polydeg = 3,
-               surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 3,
+                  surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 ###############################################################################
 # Get the TreeMesh and setup a periodic mesh
diff --git a/examples/shallowwater_ec_1d.jl b/examples/shallowwater_ec_1d.jl
index 60fde654..4f230e30 100644
--- a/examples/shallowwater_ec_1d.jl
+++ b/examples/shallowwater_ec_1d.jl
@@ -49,9 +49,9 @@ initial_condition = initial_condition_ec_discontinuous_bottom
 # Get the DG approximation space
 
 volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
-solver = DGSEM(polydeg = 4,
-               surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal),
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 4,
+                  surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal),
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 ###############################################################################
 # Get the TreeMesh and setup a periodic mesh
diff --git a/examples/shallowwater_ec_2d.jl b/examples/shallowwater_ec_2d.jl
index c140ac02..8e62e48c 100644
--- a/examples/shallowwater_ec_2d.jl
+++ b/examples/shallowwater_ec_2d.jl
@@ -21,9 +21,9 @@ initial_condition = initial_condition_weak_blast_wave
 # Get the DG approximation space
 
 volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
-solver = DGSEM(polydeg = 4,
-               surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal),
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 4,
+                  surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal),
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 ###############################################################################
 # Get the TreeMesh and setup a periodic mesh
diff --git a/examples/shallowwater_source_terms_1d.jl b/examples/shallowwater_source_terms_1d.jl
index 229f5544..02d42aac 100644
--- a/examples/shallowwater_source_terms_1d.jl
+++ b/examples/shallowwater_source_terms_1d.jl
@@ -18,9 +18,9 @@ initial_condition = initial_condition_convergence_test
 # Get the DG approximation space
 
 volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
-solver = DGSEM(polydeg = 3,
-               surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 3,
+                  surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 ###############################################################################
 # Get the TreeMesh and setup a periodic mesh
diff --git a/examples/shallowwater_source_terms_2d.jl b/examples/shallowwater_source_terms_2d.jl
index 80b6cbea..a05d970a 100644
--- a/examples/shallowwater_source_terms_2d.jl
+++ b/examples/shallowwater_source_terms_2d.jl
@@ -18,9 +18,9 @@ initial_condition = initial_condition_convergence_test # MMS EOC test
 # Get the DG approximation space
 
 volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
-solver = DGSEM(polydeg = 3,
-               surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
-               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+solver = DGSEMGPU(polydeg = 3,
+                  surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
+                  volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
 ###############################################################################
 # Get the TreeMesh and setup a periodic mesh
diff --git a/src/TrixiCUDA.jl b/src/TrixiCUDA.jl
index 0a56766e..f25cd397 100644
--- a/src/TrixiCUDA.jl
+++ b/src/TrixiCUDA.jl
@@ -7,21 +7,30 @@ using CUDA
 using CUDA: @cuda, CuArray, HostKernel,
             threadIdx, blockIdx, blockDim, reshape, similar, launch_configuration
 
-using Trixi: AbstractEquations, AbstractContainer, AbstractMesh, AbstractSemidiscretization,
-             True, False, TreeMesh, DGSEM, SemidiscretizationHyperbolic,
-             ElementContainer1D, ElementContainer2D, ElementContainer3D,
-             InterfaceContainer1D, InterfaceContainer2D, InterfaceContainer3D,
-             BoundaryContainer1D, BoundaryContainer2D, BoundaryContainer3D,
-             LobattoLegendreMortarL2, L2MortarContainer2D, L2MortarContainer3D,
-             BoundaryConditionPeriodic, BoundaryConditionDirichlet,
-             VolumeIntegralWeakForm, VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG,
-             allocate_coefficients, compute_coefficients, mesh_equations_solver_cache,
-             flux, ntuple, nvariables, nnodes, nelements, nmortars,
+# Trixi.jl methods
+using Trixi: allocate_coefficients, compute_coefficients, mesh_equations_solver_cache,
+             flux, ntuple, nnodes, nvariables, nelements,
              local_leaf_cells, init_elements, init_interfaces, init_boundaries, init_mortars,
              have_nonconservative_terms, boundary_condition_periodic,
              digest_boundary_conditions, check_periodicity_mesh_boundary_conditions,
+             gauss_lobatto_nodes_weights, vandermonde_legendre,
+             calc_dsplit, calc_dhat, calc_lhat, polynomial_derivative_matrix,
+             calc_forward_upper, calc_forward_lower, calc_reverse_upper, calc_reverse_lower,
              set_log_type!, set_sqrt_type!
 
+# Trixi.jl structs
+using Trixi: AbstractEquations, AbstractContainer, AbstractMesh, AbstractSemidiscretization,
+             AbstractSurfaceIntegral,
+             True, False, TreeMesh, DG, DGSEM, SemidiscretizationHyperbolic,
+             LobattoLegendreBasis, LobattoLegendreMortarL2,
+             ElementContainer1D, ElementContainer2D, ElementContainer3D,
+             InterfaceContainer1D, InterfaceContainer2D, InterfaceContainer3D,
+             BoundaryContainer1D, BoundaryContainer2D, BoundaryContainer3D,
+             L2MortarContainer2D, L2MortarContainer3D,
+             SurfaceIntegralWeakForm, VolumeIntegralWeakForm,
+             BoundaryConditionPeriodic,
+             VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG
+
 import Trixi: get_node_vars, get_node_coords, get_surface_node_vars,
               nelements, ninterfaces, nmortars, wrap_array, wrap_array_native
 
@@ -40,6 +49,8 @@ include("semidiscretization/semidiscretization.jl")
 include("solvers/solvers.jl")
 
 # Export the public APIs
+export LobattoLegendreBasisGPU
+export DGSEMGPU
 export SemidiscretizationHyperbolicGPU
 export semidiscretizeGPU
 
diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl
index 0af7966d..a7f49521 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic.jl
@@ -1,6 +1,9 @@
 # Everything specific about semidiscretization hyperbolic for PDE solvers.
 
-# Similar to `SemidiscretizationHyperbolic` in Trixi.jl but for GPU cache 
+# Similar to `SemidiscretizationHyperbolic` in Trixi.jl 
+# No need to adapt the `SemidiscretizationHyperbolic` struct as it is already GPU compatible
+
+# Outer constructor for GPU type
 function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver;
                                          source_terms = nothing,
                                          boundary_conditions = boundary_condition_periodic,
@@ -16,7 +19,7 @@ function SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, sol
 
     check_periodicity_mesh_boundary_conditions(mesh, _boundary_conditions)
 
-    # Return the CPU type
+    # Return the CPU type (GPU compatible)
     SemidiscretizationHyperbolic{typeof(mesh), typeof(equations),
                                  typeof(initial_condition),
                                  typeof(_boundary_conditions), typeof(source_terms),
diff --git a/src/solvers/basis_lobatto_legendre.jl b/src/solvers/basis_lobatto_legendre.jl
new file mode 100644
index 00000000..11127819
--- /dev/null
+++ b/src/solvers/basis_lobatto_legendre.jl
@@ -0,0 +1,108 @@
+# Everything related to Lobatto-Legendre basis adapted for initialization on the GPU.
+
+# Similar to `LobattoLegendreBasis` in Trixi.jl but GPU compatible
+# struct LobattoLegendreBasisGPU{RealT <: Real, NNODES,
+#                             VectorT <: AbstractGPUVector{RealT},
+#                             InverseVandermondeLegendre <: AbstractGPUMatrix{RealT},
+#                             BoundaryMatrix <: AbstractGPUMatrix{RealT},
+#                                DerivativeMatrix <: AbstractGPUMatrix{RealT}} <: AbstractBasisSBP{RealT}
+#     nodes::VectorT
+#     weights::VectorT
+#     inverse_weights::VectorT
+
+#     inverse_vandermonde_legendre::InverseVandermondeLegendre
+#     boundary_interpolation::BoundaryMatrix # lhat
+
+#     derivative_matrix::DerivativeMatrix # strong form derivative matrix
+#     derivative_split::DerivativeMatrix # strong form derivative matrix minus boundary terms
+#     derivative_split_transpose::DerivativeMatrix # transpose of `derivative_split`
+#     derivative_dhat::DerivativeMatrix # weak form matrix dhat
+#     # negative adjoint wrt the SBP dot product
+# end
+
+# Similar to `LobattoLegendreBasis` in Trixi.jl 
+function LobattoLegendreBasisGPU(polydeg::Integer, RealT = Float64) # how about setting the default to Float32?
+    nnodes_ = polydeg + 1
+
+    # TODO: Use GPU kernels to complete the computation (compare with CPU)
+    nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_, RealT)
+    inverse_weights_ = inv.(weights_)
+
+    _, inverse_vandermonde_legendre_ = vandermonde_legendre(nodes_, RealT)
+
+    boundary_interpolation_ = zeros(RealT, nnodes_, 2)
+    boundary_interpolation_[:, 1] = calc_lhat(-one(RealT), nodes_, weights_)
+    boundary_interpolation_[:, 2] = calc_lhat(one(RealT), nodes_, weights_)
+
+    derivative_matrix_ = polynomial_derivative_matrix(nodes_)
+    derivative_split_ = calc_dsplit(nodes_, weights_)
+    derivative_dhat_ = calc_dhat(nodes_, weights_)
+
+    # Convert to GPU arrays
+    # TODO: `RealT` can be removed once Trixi.jl can be updated to the latest one
+    nodes = CuArray{RealT}(nodes_)
+    weights = CuArray{RealT}(weights_)
+    inverse_weights = CuArray{RealT}(inverse_weights_)
+
+    inverse_vandermonde_legendre = CuArray{RealT}(inverse_vandermonde_legendre_)
+    # boundary_interpolation = CuArray(boundary_interpolation_) # avoid scalar indexing
+
+    derivative_matrix = CuArray{RealT}(derivative_matrix_)
+    derivative_split = CuArray{RealT}(derivative_split_)
+    derivative_split_transpose = CuArray{RealT}(derivative_split_')
+    derivative_dhat = CuArray{RealT}(derivative_dhat_)
+
+    # TODO: Implement a custom struct for finer control over data types
+    return LobattoLegendreBasis{RealT, nnodes_, typeof(nodes),
+                                typeof(inverse_vandermonde_legendre),
+                                typeof(boundary_interpolation_),
+                                typeof(derivative_matrix)}(nodes_, weights,
+                                                           inverse_weights,
+                                                           inverse_vandermonde_legendre,
+                                                           boundary_interpolation_,
+                                                           derivative_matrix,
+                                                           derivative_split,
+                                                           derivative_split_transpose,
+                                                           derivative_dhat)
+end
+
+# @inline Base.real(basis::LobattoLegendreBasisGPU{RealT}) where {RealT} = RealT
+
+# @inline function nnodes(basis::LobattoLegendreBasisGPU{RealT, NNODES}) where {RealT, NNODES}
+#     NNODES
+# end
+
+# @inline get_nodes(basis::LobattoLegendreBasisGPU) = basis.nodes
+
+# # Similar to `LobattoLegendreMortarL2` in Trixi.jl but GPU compatible
+# struct LobattoLegendreMortarL2GPU{RealT <: Real, NNODES,
+#                                ForwardMatrix <: AbstractGPUMatrix{RealT},
+#                                ReverseMatrix <: AbstractGPUMatrix{RealT}} <: AbstractMortarL2{RealT}
+#     forward_upper::ForwardMatrix
+#     forward_lower::ForwardMatrix
+#     reverse_upper::ReverseMatrix
+#     reverse_lower::ReverseMatrix
+# end
+
+# Similar to `MortarL2` in Trixi.jl
+function MortarL2GPU(basis::LobattoLegendreBasis)
+    RealT = real(basis)
+    nnodes_ = nnodes(basis)
+
+    forward_upper_ = calc_forward_upper(nnodes_, RealT)
+    forward_lower_ = calc_forward_lower(nnodes_, RealT)
+    reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss), RealT)
+    reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss), RealT)
+
+    # Convert to GPU arrays
+    # TODO: `RealT` can be removed once Trixi.jl can be updated to the latest one
+    forward_upper = CuArray{RealT}(forward_upper_)
+    forward_lower = CuArray{RealT}(forward_lower_)
+    reverse_upper = CuArray{RealT}(reverse_upper_)
+    reverse_lower = CuArray{RealT}(reverse_lower_)
+
+    # TODO: Implement a custom struct for finer control over data types
+    LobattoLegendreMortarL2{RealT, nnodes_, typeof(forward_upper),
+                            typeof(reverse_upper)}(forward_upper, forward_lower,
+                                                   reverse_upper, reverse_lower)
+end
diff --git a/src/solvers/cache.jl b/src/solvers/cache.jl
index 918cb849..73d267a0 100644
--- a/src/solvers/cache.jl
+++ b/src/solvers/cache.jl
@@ -6,13 +6,13 @@
 
 # Create cache for general tree mesh
 function create_cache_gpu(mesh, equations,
-                          volume_integral::VolumeIntegralWeakForm, dg::DGSEM,
+                          volume_integral::VolumeIntegralWeakForm, dg::DG,
                           uEltype, cache)
     NamedTuple()
 end
 
 # Create cache specialized for 1D tree mesh
-function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltype)
+function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DG, RealT, uEltype)
     # Get cells for which an element needs to be created (i.e., all leaf cells)
     leaf_cell_ids = local_leaf_cells(mesh.tree)
 
@@ -41,13 +41,13 @@ function create_cache_gpu(mesh::TreeMesh{1}, equations, dg::DGSEM, RealT, uEltyp
 end
 
 function create_cache_gpu(mesh::TreeMesh{1}, equations,
-                          volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM,
+                          volume_integral::VolumeIntegralFluxDifferencing, dg::DG,
                           uEltype, cache)
     NamedTuple()
 end
 
 function create_cache_gpu(mesh::TreeMesh{1}, equations,
-                          volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
+                          volume_integral::VolumeIntegralShockCapturingHG, dg::DG,
                           uEltype, cache)
     fstar1_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements))
     fstar1_R = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nelements(cache.elements))
@@ -62,7 +62,7 @@ end
 
 # Create cache specialized for 2D tree mesh
 function create_cache_gpu(mesh::TreeMesh{2}, equations,
-                          dg::DGSEM, RealT, uEltype)
+                          dg::DG, RealT, uEltype)
     # Get cells for which an element needs to be created (i.e., all leaf cells)
     leaf_cell_ids = local_leaf_cells(mesh.tree)
 
@@ -95,13 +95,13 @@ function create_cache_gpu(mesh::TreeMesh{2}, equations,
 end
 
 function create_cache_gpu(mesh::TreeMesh{2}, equations,
-                          volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM,
+                          volume_integral::VolumeIntegralFluxDifferencing, dg::DG,
                           uEltype, cache)
     NamedTuple()
 end
 
 function create_cache_gpu(mesh::TreeMesh{2}, equations,
-                          volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
+                          volume_integral::VolumeIntegralShockCapturingHG, dg::DG,
                           uEltype, cache)
     fstar1_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
                           nelements(cache.elements))
@@ -135,7 +135,7 @@ end
 
 # Create cache specialized for 3D tree mesh
 function create_cache_gpu(mesh::TreeMesh{3}, equations,
-                          dg::DGSEM, RealT, uEltype)
+                          dg::DG, RealT, uEltype)
     # Get cells for which an element needs to be created (i.e., all leaf cells)
     leaf_cell_ids = local_leaf_cells(mesh.tree)
 
@@ -168,13 +168,13 @@ function create_cache_gpu(mesh::TreeMesh{3}, equations,
 end
 
 function create_cache_gpu(mesh::TreeMesh{3}, equations,
-                          volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM,
+                          volume_integral::VolumeIntegralFluxDifferencing, dg::DG,
                           uEltype, cache)
     NamedTuple()
 end
 
 function create_cache_gpu(mesh::TreeMesh{3}, equations,
-                          volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM,
+                          volume_integral::VolumeIntegralShockCapturingHG, dg::DG,
                           uEltype, cache)
     fstar1_L = CUDA.zeros(uEltype, nvariables(equations), nnodes(dg) + 1, nnodes(dg),
                           nnodes(dg), nelements(cache.elements))
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index b4d77b52..a96b5c2a 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -1,7 +1,7 @@
 # The `wrap_array` function in Trixi.jl is not compatible with GPU arrays,
 # so here we adapt `wrap_array` to work with GPU arrays.
 @inline function wrap_array(u_ode::CuArray, mesh::AbstractMesh, equations,
-                            dg::DGSEM, cache)
+                            dg::DG, cache)
     # TODO: Assert array length before calling `reshape`
     u_ode = reshape(u_ode, nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))...,
                     nelements(dg, cache))
@@ -12,7 +12,7 @@ end
 # The `wrap_array_native` function in Trixi.jl is not compatible with GPU arrays,
 # so here we adapt `wrap_array_native` to work with GPU arrays.
 @inline function wrap_array_native(u_ode::CuArray, mesh::AbstractMesh, equations,
-                                   dg::DGSEM, cache)
+                                   dg::DG, cache)
     # TODO: Assert array length before calling `reshape`
     u_ode = reshape(u_ode, nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))...,
                     nelements(dg, cache))
diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl
index 749b0567..f6a4886e 100644
--- a/src/solvers/dg_1d.jl
+++ b/src/solvers/dg_1d.jl
@@ -1,5 +1,6 @@
 # Everything related to a DG semidiscretization in 1D.
 
+#################################################################################################
 # Functions that end with `_kernel` are CUDA kernels that are going to be launched by 
 # the @cuda macro with parameters from the kernel configurator. They are purely run on 
 # the device (i.e., GPU).
@@ -770,6 +771,7 @@ function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEqu
     return nothing
 end
 
+#################################################################################################
 # Functions that begin with `cuda_` are the functions that pack CUDA kernels together to do 
 # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) 
 # and run them on the device (i.e., GPU).
@@ -780,7 +782,7 @@ end
 # Pack kernels for calculating volume integrals
 function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms,
                                equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
-    derivative_dhat = CuArray(dg.basis.derivative_dhat)
+    derivative_dhat = dg.basis.derivative_dhat
 
     # The maximum number of threads per block is the dominant factor when choosing the optimization 
     # method. However, there are other factors that may cause a launch failure, such as the maximum 
@@ -790,7 +792,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms,
     # TODO: More checks before the kernel launch
     thread_per_block = size(du, 1) * size(du, 2)
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         flux_arr = similar(u)
 
         flux_kernel = @cuda launch=false flux_kernel!(flux_arr, u, equations, flux)
@@ -821,14 +823,14 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
 
     volume_flux = volume_integral.volume_flux
 
-    # TODO: Move `set_diagonal_to_zero!` outside of loop and cache the result in DG on GPU
-    derivative_split = dg.basis.derivative_split
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
     set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
 
     thread_per_block = size(du, 2)
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         volume_flux_arr = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
 
         volume_flux_kernel = @cuda launch=false volume_flux_kernel!(volume_flux_arr, u, equations,
@@ -861,15 +863,15 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
 
     symmetric_flux, nonconservative_flux = dg.volume_integral.volume_flux
 
-    # TODO: Move `set_diagonal_to_zero!` outside of loop and cache the result in DG on GPU
-    derivative_split = dg.basis.derivative_split # create copy
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
     set_diagonal_to_zero!(derivative_split)
     derivative_split_zero = CuArray(derivative_split)
-    derivative_split = CuArray(dg.basis.derivative_split)
+    derivative_split = dg.basis.derivative_split
 
     thread_per_block = size(du, 2)
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         symmetric_flux_arr = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
         noncons_flux_arr = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
 
@@ -932,11 +934,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
                                       kernel_configurator_1d(pure_blended_element_count_kernel,
                                                              length(alpha))...)
 
-    derivative_split = dg.basis.derivative_split
-    set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`
-
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
+    set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
-    inverse_weights = CuArray(dg.basis.inverse_weights)
+
+    inverse_weights = dg.basis.inverse_weights
     volume_flux_arr = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
 
     fstar1_L = cache.fstar1_L
@@ -1001,11 +1004,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
                                       kernel_configurator_1d(pure_blended_element_count_kernel,
                                                              length(alpha))...)
 
-    derivative_split = dg.basis.derivative_split
-    set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`
-
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
+    set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
-    inverse_weights = CuArray(dg.basis.inverse_weights)
+
+    inverse_weights = dg.basis.inverse_weights
     volume_flux_arr = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
     noncons_flux_arr = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 3))
 
@@ -1027,7 +1031,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{1}, nonconservative_terms::
                             kernel_configurator_2d(volume_flux_dgfv_kernel, size(u, 2)^2,
                                                    size(u, 3))...)
 
-    derivative_split = CuArray(dg.basis.derivative_split) # use original `derivative_split`
+    derivative_split = dg.basis.derivative_split # use original `derivative_split`
 
     volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
                                                                               element_ids_dgfv,
diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl
index 401e8bbc..408d6ba8 100644
--- a/src/solvers/dg_2d.jl
+++ b/src/solvers/dg_2d.jl
@@ -1,5 +1,6 @@
 # Everything related to a DG semidiscretization in 2D.
 
+#################################################################################################
 # Functions that end with `_kernel` are CUDA kernels that are going to be launched by 
 # the @cuda macro with parameters from the kernel configurator. They are purely run on 
 # the device (i.e., GPU).
@@ -1226,6 +1227,7 @@ function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEqu
     return nothing
 end
 
+#################################################################################################
 # Functions that begin with `cuda_` are the functions that pack CUDA kernels together to do 
 # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) 
 # and run them on the device (i.e., GPU).
@@ -1236,7 +1238,7 @@ end
 # Pack kernels for calculating volume integrals
 function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
-    derivative_dhat = CuArray(dg.basis.derivative_dhat)
+    derivative_dhat = dg.basis.derivative_dhat
 
     # The maximum number of threads per block is the dominant factor when choosing the optimization 
     # method. However, there are other factors that may cause a launch failure, such as the maximum 
@@ -1246,7 +1248,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms,
     # TODO: More checks before the kernel launch
     thread_per_block = size(du, 1) * size(du, 2)^2
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         flux_arr1 = similar(u)
         flux_arr2 = similar(u)
 
@@ -1279,14 +1281,14 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
 
     volume_flux = volume_integral.volume_flux
 
-    # TODO: Move `set_diagonal_to_zero!` outside of loop and cache the result in DG on GPU
-    derivative_split = dg.basis.derivative_split
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
     set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
 
     thread_per_block = size(du, 2)^2
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         volume_flux_arr1 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
                                           size(u, 4))
         volume_flux_arr2 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
@@ -1323,15 +1325,15 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
 
     symmetric_flux, nonconservative_flux = dg.volume_integral.volume_flux
 
-    # TODO: Move `set_diagonal_to_zero!` outside of loop and cache the result in DG on GPU
-    derivative_split = dg.basis.derivative_split # create copy
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
     set_diagonal_to_zero!(derivative_split)
     derivative_split_zero = CuArray(derivative_split)
-    derivative_split = CuArray(dg.basis.derivative_split)
+    derivative_split = dg.basis.derivative_split
 
     thread_per_block = size(du, 2)^2
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         symmetric_flux_arr1 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
                                              size(u, 4))
         symmetric_flux_arr2 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
@@ -1406,11 +1408,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
                                       kernel_configurator_1d(pure_blended_element_count_kernel,
                                                              length(alpha))...)
 
-    derivative_split = dg.basis.derivative_split
-    set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`
-
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
+    set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
-    inverse_weights = CuArray(dg.basis.inverse_weights)
+
+    inverse_weights = dg.basis.inverse_weights
     volume_flux_arr1 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
                                       size(u, 4))
     volume_flux_arr2 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
@@ -1488,11 +1491,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
                                       kernel_configurator_1d(pure_blended_element_count_kernel,
                                                              length(alpha))...)
 
-    derivative_split = dg.basis.derivative_split
-    set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`
-
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
+    set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
-    inverse_weights = CuArray(dg.basis.inverse_weights)
+
+    inverse_weights = dg.basis.inverse_weights
     volume_flux_arr1 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
                                       size(u, 4))
     volume_flux_arr2 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
@@ -1527,7 +1531,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{2}, nonconservative_terms::
                             kernel_configurator_2d(volume_flux_dgfv_kernel, size(u, 2)^3,
                                                    size(u, 4))...)
 
-    derivative_split = CuArray(dg.basis.derivative_split) # use original `derivative_split`
+    derivative_split = dg.basis.derivative_split # use original `derivative_split`
 
     volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
                                                                               element_ids_dgfv,
@@ -1794,8 +1798,8 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{2}, cache_mortars::True, dg::DG
     # The original CPU arrays hold NaNs
     u_upper = cache.mortars.u_upper
     u_lower = cache.mortars.u_lower
-    forward_upper = CuArray(dg.mortar.forward_upper)
-    forward_lower = CuArray(dg.mortar.forward_lower)
+    forward_upper = dg.mortar.forward_upper
+    forward_lower = dg.mortar.forward_lower
 
     prolong_mortars_small2small_kernel = @cuda launch=false prolong_mortars_small2small_kernel!(u_upper,
                                                                                                 u_lower,
@@ -1843,8 +1847,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati
     # The original CPU arrays hold NaNs
     u_upper = cache.mortars.u_upper
     u_lower = cache.mortars.u_lower
-    reverse_upper = CuArray(dg.mortar.reverse_upper)
-    reverse_lower = CuArray(dg.mortar.reverse_lower)
+    reverse_upper = dg.mortar.reverse_upper
+    reverse_lower = dg.mortar.reverse_lower
 
     surface_flux_values = cache.elements.surface_flux_values
     tmp_surface_flux_values = zero(similar(surface_flux_values)) # Maybe use CUDA.zeros
@@ -1900,8 +1904,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{2}, cache_mortars::True, nonconservati
     # The original CPU arrays hold NaNs
     u_upper = cache.mortars.u_upper
     u_lower = cache.mortars.u_lower
-    reverse_upper = CuArray(dg.mortar.reverse_upper)
-    reverse_lower = CuArray(dg.mortar.reverse_lower)
+    reverse_upper = dg.mortar.reverse_upper
+    reverse_lower = dg.mortar.reverse_lower
 
     surface_flux_values = cache.elements.surface_flux_values
     tmp_surface_flux_values = zero(similar(surface_flux_values)) # Maybe use CUDA.zeros
diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl
index c66ebf31..5939d439 100644
--- a/src/solvers/dg_3d.jl
+++ b/src/solvers/dg_3d.jl
@@ -1,5 +1,6 @@
 # Everything related to a DG semidiscretization in 3D.
 
+#################################################################################################
 # Functions that end with `_kernel` are CUDA kernels that are going to be launched by 
 # the @cuda macro with parameters from the kernel configurator. They are purely run on 
 # the device (i.e., GPU).
@@ -1812,6 +1813,7 @@ function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEqu
     return nothing
 end
 
+#################################################################################################
 # Functions that begin with `cuda_` are the functions that pack CUDA kernels together to do 
 # partial work in semidiscretization. They are used to invoke kernels from the host (i.e., CPU) 
 # and run them on the device (i.e., GPU).
@@ -1822,7 +1824,7 @@ end
 # Pack kernels for calculating volume integrals
 function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache)
-    derivative_dhat = CuArray(dg.basis.derivative_dhat)
+    derivative_dhat = dg.basis.derivative_dhat
 
     # The maximum number of threads per block is the dominant factor when choosing the optimization 
     # method. However, there are other factors that may cause a launch failure, such as the maximum 
@@ -1832,7 +1834,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms,
     # TODO: More checks before the kernel launch
     thread_per_block = size(du, 1) * size(du, 2)^3
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         flux_arr1 = similar(u)
         flux_arr2 = similar(u)
         flux_arr3 = similar(u)
@@ -1866,14 +1868,14 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
 
     volume_flux = volume_integral.volume_flux
 
-    # TODO: Move `set_diagonal_to_zero!` outside of loop and cache the result in DG on GPU
-    derivative_split = dg.basis.derivative_split
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
     set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
 
     thread_per_block = size(du, 2)^3
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         volume_flux_arr1 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
                                           size(u, 2), size(u, 5))
         volume_flux_arr2 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
@@ -1916,15 +1918,15 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
 
     symmetric_flux, nonconservative_flux = dg.volume_integral.volume_flux
 
-    # TODO: Move `set_diagonal_to_zero!` outside of loop and cache the result in DG on GPU
-    derivative_split = dg.basis.derivative_split # create copy
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
     set_diagonal_to_zero!(derivative_split)
     derivative_split_zero = CuArray(derivative_split)
-    derivative_split = CuArray(dg.basis.derivative_split)
+    derivative_split = dg.basis.derivative_split
 
     thread_per_block = size(du, 2)^3
     if thread_per_block > MAX_THREADS_PER_BLOCK
-        # How to optimize when size is large?
+        # How to optimize when size is large (less common use)?
         symmetric_flux_arr1 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
                                              size(u, 2), size(u, 5))
         symmetric_flux_arr2 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
@@ -2008,11 +2010,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
                                       kernel_configurator_1d(pure_blended_element_count_kernel,
                                                              length(alpha))...)
 
-    derivative_split = dg.basis.derivative_split
-    set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`
-
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
+    set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
-    inverse_weights = CuArray(dg.basis.inverse_weights)
+
+    inverse_weights = dg.basis.inverse_weights
     volume_flux_arr1 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
                                       size(u, 2), size(u, 5))
     volume_flux_arr2 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
@@ -2099,11 +2102,12 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
                                       kernel_configurator_1d(pure_blended_element_count_kernel,
                                                              length(alpha))...)
 
-    derivative_split = dg.basis.derivative_split
-    set_diagonal_to_zero!(derivative_split) # temporarily set here, maybe move outside `rhs!`
-
+    # TODO: Combine below into the existing kernels
+    derivative_split = Array(dg.basis.derivative_split) # create copy
+    set_diagonal_to_zero!(derivative_split)
     derivative_split = CuArray(derivative_split)
-    inverse_weights = CuArray(dg.basis.inverse_weights)
+
+    inverse_weights = dg.basis.inverse_weights
     volume_flux_arr1 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
                                       size(u, 2), size(u, 5))
     volume_flux_arr2 = CuArray{RealT}(undef, size(u, 1), size(u, 2), size(u, 2), size(u, 2),
@@ -2148,7 +2152,7 @@ function cuda_volume_integral!(du, u, mesh::TreeMesh{3}, nonconservative_terms::
                             kernel_configurator_2d(volume_flux_dgfv_kernel, size(u, 2)^4,
                                                    size(u, 5))...)
 
-    derivative_split = CuArray(dg.basis.derivative_split) # use original `derivative_split`
+    derivative_split = dg.basis.derivative_split # use original `derivative_split`
 
     volume_integral_dg_kernel = @cuda launch=false volume_integral_dg_kernel!(du, element_ids_dg,
                                                                               element_ids_dgfv,
@@ -2379,8 +2383,8 @@ end
 #     u_upper_right = cache.mortars.u_upper_right
 #     u_lower_left = cache.mortars.u_lower_left
 #     u_lower_right = cache.mortars.u_lower_right
-#     forward_upper = CuArray(dg.mortar.forward_upper)
-#     forward_lower = CuArray(dg.mortar.forward_lower)
+#     forward_upper = dg.mortar.forward_upper
+#     forward_lower = dg.mortar.forward_lower
 
 #     prolong_mortars_small2small_kernel = @cuda launch=false prolong_mortars_small2small_kernel!(u_upper_left,
 #                                                                                                 u_upper_right,
@@ -2453,8 +2457,8 @@ function cuda_prolong2mortars!(u, mesh::TreeMesh{3}, cache_mortars::True, dg::DG
     u_upper_right = cache.mortars.u_upper_right
     u_lower_left = cache.mortars.u_lower_left
     u_lower_right = cache.mortars.u_lower_right
-    forward_upper = CuArray(dg.mortar.forward_upper)
-    forward_lower = CuArray(dg.mortar.forward_lower)
+    forward_upper = dg.mortar.forward_upper
+    forward_lower = dg.mortar.forward_lower
 
     prolong_mortars_small2small_kernel = @cuda launch=false prolong_mortars_small2small_kernel!(u_upper_left,
                                                                                                 u_upper_right,
@@ -2521,8 +2525,8 @@ end
 #     u_upper_right = cache.mortars.u_upper_right
 #     u_lower_left = cache.mortars.u_lower_left
 #     u_lower_right = cache.mortars.u_lower_right
-#     reverse_upper = CuArray(dg.mortar.reverse_upper)
-#     reverse_lower = CuArray(dg.mortar.reverse_lower)
+#     reverse_upper = dg.mortar.reverse_upper
+#     reverse_lower = dg.mortar.reverse_lower
 
 #     surface_flux_values = cache.elements.surface_flux_values
 #     tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero
@@ -2630,8 +2634,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati
     u_upper_right = cache.mortars.u_upper_right
     u_lower_left = cache.mortars.u_lower_left
     u_lower_right = cache.mortars.u_lower_right
-    reverse_upper = CuArray(dg.mortar.reverse_upper)
-    reverse_lower = CuArray(dg.mortar.reverse_lower)
+    reverse_upper = dg.mortar.reverse_upper
+    reverse_lower = dg.mortar.reverse_lower
 
     surface_flux_values = cache.elements.surface_flux_values
     tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero
@@ -2719,8 +2723,8 @@ end
 #     u_upper_right = cache.mortars.u_upper_right
 #     u_lower_left = cache.mortars.u_lower_left
 #     u_lower_right = cache.mortars.u_lower_right
-#     reverse_upper = CuArray(dg.mortar.reverse_upper)
-#     reverse_lower = CuArray(dg.mortar.reverse_lower)
+#     reverse_upper = dg.mortar.reverse_upper
+#     reverse_lower = dg.mortar.reverse_lower
 
 #     surface_flux_values = cache.elements.surface_flux_values
 #     tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero
@@ -2829,8 +2833,8 @@ function cuda_mortar_flux!(mesh::TreeMesh{3}, cache_mortars::True, nonconservati
     u_upper_right = cache.mortars.u_upper_right
     u_lower_left = cache.mortars.u_lower_left
     u_lower_right = cache.mortars.u_lower_right
-    reverse_upper = CuArray(dg.mortar.reverse_upper)
-    reverse_lower = CuArray(dg.mortar.reverse_lower)
+    reverse_upper = dg.mortar.reverse_upper
+    reverse_lower = dg.mortar.reverse_lower
 
     surface_flux_values = cache.elements.surface_flux_values
     tmp_surface_flux_values = zero(similar(surface_flux_values)) # undef to zero
diff --git a/src/solvers/dgsem.jl b/src/solvers/dgsem.jl
new file mode 100644
index 00000000..b36287ad
--- /dev/null
+++ b/src/solvers/dgsem.jl
@@ -0,0 +1,30 @@
+# Rewrite `DGSEM` in Trixi.jl to add the specialized parts of the arrays 
+# required to copy the arrays from CPU to GPU.
+
+# Similar to `DGSEM` in Trixi.jl 
+function DGSEMGPU(basis::LobattoLegendreBasis,
+                  surface_flux = flux_central,
+                  volume_integral = VolumeIntegralWeakForm(),
+                  mortar = MortarL2GPU(basis))
+    surface_integral = SurfaceIntegralWeakForm(surface_flux)
+    return DG{typeof(basis), typeof(mortar), typeof(surface_integral),
+              typeof(volume_integral)}(basis, mortar, surface_integral, volume_integral)
+end
+
+function DGSEMGPU(basis::LobattoLegendreBasis,
+                  surface_integral::AbstractSurfaceIntegral,
+                  volume_integral = VolumeIntegralWeakForm(),
+                  mortar = MortarL2GPU(basis))
+    return DG{typeof(basis), typeof(mortar), typeof(surface_integral),
+              typeof(volume_integral)}(basis, mortar, surface_integral, volume_integral)
+end
+
+function DGSEMGPU(; RealT = Float64, # how about setting the default to Float32?
+                  polydeg::Integer,
+                  surface_flux = flux_central,
+                  surface_integral = SurfaceIntegralWeakForm(surface_flux),
+                  volume_integral = VolumeIntegralWeakForm())
+    basis = LobattoLegendreBasisGPU(polydeg, RealT)
+
+    return DGSEMGPU(basis, surface_integral, volume_integral)
+end
diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl
index c8ff738a..85a2b66f 100644
--- a/src/solvers/solvers.jl
+++ b/src/solvers/solvers.jl
@@ -1,3 +1,4 @@
+include("basis_lobatto_legendre.jl")
 include("cache.jl")
 include("common.jl")
 include("containers_1d.jl")
@@ -7,6 +8,7 @@ include("dg_1d.jl")
 include("dg_2d.jl")
 include("dg_3d.jl")
 include("dg.jl")
+include("dgsem.jl")
 
 # See also `rhs!` function in Trixi.jl
 function rhs_gpu!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t)
diff --git a/test/tree_dgsem_1d/advection_basic.jl b/test/tree_dgsem_1d/advection_basic.jl
index 7fc7cd6a..e25c320e 100644
--- a/test/tree_dgsem_1d/advection_basic.jl
+++ b/test/tree_dgsem_1d/advection_basic.jl
@@ -10,6 +10,7 @@ include("../test_macros.jl")
     equations = LinearScalarAdvectionEquation1D(advection_velocity)
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = -1.0
     coordinates_max = 1.0
@@ -21,7 +22,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
                                         solver)
     semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition_convergence_test,
-                                               solver)
+                                               solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -49,11 +50,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/advection_extended.jl b/test/tree_dgsem_1d/advection_extended.jl
index 977a8575..77be6f86 100644
--- a/test/tree_dgsem_1d/advection_extended.jl
+++ b/test/tree_dgsem_1d/advection_extended.jl
@@ -14,6 +14,7 @@ include("../test_macros.jl")
     boundary_conditions = boundary_condition_periodic
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = -1.0
     coordinates_max = 1.0
@@ -25,7 +26,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_conditions)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_conditions)
 
     tspan = tspan_gpu = (0.0, 1.0)
@@ -54,11 +55,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/burgers_basic.jl b/test/tree_dgsem_1d/burgers_basic.jl
index b38074de..221ae5a3 100644
--- a/test/tree_dgsem_1d/burgers_basic.jl
+++ b/test/tree_dgsem_1d/burgers_basic.jl
@@ -11,6 +11,7 @@ include("../test_macros.jl")
     initial_condition = initial_condition_convergence_test
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = 0.0
     coordinates_max = 1.0
@@ -20,7 +21,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test)
 
     tspan = tspan_gpu = (0.0, 2.0)
@@ -49,11 +50,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/burgers_rarefraction.jl b/test/tree_dgsem_1d/burgers_rarefraction.jl
index a4234992..8b905612 100644
--- a/test/tree_dgsem_1d/burgers_rarefraction.jl
+++ b/test/tree_dgsem_1d/burgers_rarefraction.jl
@@ -9,6 +9,7 @@ include("../test_macros.jl")
     equations = InviscidBurgersEquation1D()
 
     basis = LobattoLegendreBasis(3)
+    basis_gpu = LobattoLegendreBasisGPU(3)
     # Use shock capturing techniques to suppress oscillations at discontinuities
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 1.0,
@@ -24,6 +25,7 @@ include("../test_macros.jl")
                                                      volume_flux_fv = surface_flux)
 
     solver = DGSEM(basis, surface_flux, volume_integral)
+    solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
 
     coordinate_min = 0.0
     coordinate_max = 1.0
@@ -58,7 +60,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_conditions)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_conditions)
 
     tspan = tspan_gpu = (0.0, 0.2)
@@ -87,11 +89,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/burgers_shock.jl b/test/tree_dgsem_1d/burgers_shock.jl
index b7abe34f..d2d3ba3c 100644
--- a/test/tree_dgsem_1d/burgers_shock.jl
+++ b/test/tree_dgsem_1d/burgers_shock.jl
@@ -9,6 +9,7 @@ include("../test_macros.jl")
     equations = InviscidBurgersEquation1D()
 
     basis = LobattoLegendreBasis(3)
+    basis_gpu = LobattoLegendreBasisGPU(3)
     # Use shock capturing techniques to suppress oscillations at discontinuities
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 1.0,
@@ -24,6 +25,7 @@ include("../test_macros.jl")
                                                      volume_flux_fv = surface_flux)
 
     solver = DGSEM(basis, surface_flux, volume_integral)
+    solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
 
     coordinate_min = 0.0
     coordinate_max = 1.0
@@ -59,7 +61,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_conditions)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_conditions)
 
     tspan = tspan_gpu = (0.0, 0.2)
@@ -88,11 +90,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/euler_blast_wave.jl b/test/tree_dgsem_1d/euler_blast_wave.jl
index 3bea2602..f56ff178 100644
--- a/test/tree_dgsem_1d/euler_blast_wave.jl
+++ b/test/tree_dgsem_1d/euler_blast_wave.jl
@@ -27,6 +27,7 @@ include("../test_macros.jl")
     surface_flux = flux_lax_friedrichs
     volume_flux = flux_ranocha
     basis = LobattoLegendreBasis(3)
+    basis_gpu = LobattoLegendreBasisGPU(3)
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 0.5,
                                              alpha_min = 0.001,
@@ -36,6 +37,7 @@ include("../test_macros.jl")
                                                      volume_flux_dg = volume_flux,
                                                      volume_flux_fv = surface_flux)
     solver = DGSEM(basis, surface_flux, volume_integral)
+    solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
 
     coordinates_min = (-2.0,)
     coordinates_max = (2.0,)
@@ -44,7 +46,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 12.5)
     t = t_gpu = 0.0
@@ -72,11 +74,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/euler_ec.jl b/test/tree_dgsem_1d/euler_ec.jl
index b07f9ae6..60d7f091 100644
--- a/test/tree_dgsem_1d/euler_ec.jl
+++ b/test/tree_dgsem_1d/euler_ec.jl
@@ -13,6 +13,8 @@ include("../test_macros.jl")
     volume_flux = flux_ranocha
     solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0,)
     coordinates_max = (2.0,)
@@ -21,7 +23,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -49,11 +51,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/euler_shock.jl b/test/tree_dgsem_1d/euler_shock.jl
index dd13ba52..f58ff56c 100644
--- a/test/tree_dgsem_1d/euler_shock.jl
+++ b/test/tree_dgsem_1d/euler_shock.jl
@@ -13,6 +13,7 @@ include("../test_macros.jl")
     surface_flux = flux_lax_friedrichs
     volume_flux = flux_shima_etal
     basis = LobattoLegendreBasis(3)
+    basis_gpu = LobattoLegendreBasisGPU(3)
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 0.5,
                                              alpha_min = 0.001,
@@ -22,6 +23,7 @@ include("../test_macros.jl")
                                                      volume_flux_dg = volume_flux,
                                                      volume_flux_fv = surface_flux)
     solver = DGSEM(basis, surface_flux, volume_integral)
+    solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
 
     coordinates_min = -2.0
     coordinates_max = 2.0
@@ -30,7 +32,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -58,11 +60,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/euler_source_terms.jl b/test/tree_dgsem_1d/euler_source_terms.jl
index 9e78bac2..8aa43e30 100644
--- a/test/tree_dgsem_1d/euler_source_terms.jl
+++ b/test/tree_dgsem_1d/euler_source_terms.jl
@@ -11,6 +11,7 @@ include("../test_macros.jl")
     initial_condition = initial_condition_convergence_test
 
     solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = 0.0
     coordinates_max = 2.0
@@ -20,7 +21,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test)
 
     tspan = tspan_gpu = (0.0, 2.0)
@@ -49,11 +50,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/euler_source_terms_nonperiodic.jl b/test/tree_dgsem_1d/euler_source_terms_nonperiodic.jl
index 2ef36f09..0a20c2bb 100644
--- a/test/tree_dgsem_1d/euler_source_terms_nonperiodic.jl
+++ b/test/tree_dgsem_1d/euler_source_terms_nonperiodic.jl
@@ -15,6 +15,7 @@ include("../test_macros.jl")
                            x_pos = boundary_condition)
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (0.0,)
     coordinates_max = (2.0,)
@@ -26,7 +27,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test,
                                         boundary_conditions = boundary_conditions)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test,
                                                boundary_conditions = boundary_conditions)
 
@@ -56,11 +57,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/eulermulti_ec.jl b/test/tree_dgsem_1d/eulermulti_ec.jl
index c5c8eb80..2d36da79 100644
--- a/test/tree_dgsem_1d/eulermulti_ec.jl
+++ b/test/tree_dgsem_1d/eulermulti_ec.jl
@@ -19,6 +19,8 @@ include("../test_macros.jl")
     volume_flux = flux_ranocha
     solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0,)
     coordinates_max = (2.0,)
@@ -27,7 +29,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -55,11 +57,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/eulermulti_es.jl b/test/tree_dgsem_1d/eulermulti_es.jl
index 0fe0d94b..fb8dcb34 100644
--- a/test/tree_dgsem_1d/eulermulti_es.jl
+++ b/test/tree_dgsem_1d/eulermulti_es.jl
@@ -14,6 +14,8 @@ include("../test_macros.jl")
     volume_flux = flux_ranocha
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0,)
     coordinates_max = (2.0,)
@@ -22,7 +24,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -50,11 +52,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/eulerquasi_ec.jl b/test/tree_dgsem_1d/eulerquasi_ec.jl
index a2e23797..4c439883 100644
--- a/test/tree_dgsem_1d/eulerquasi_ec.jl
+++ b/test/tree_dgsem_1d/eulerquasi_ec.jl
@@ -23,6 +23,8 @@ include("../test_macros.jl")
     volume_flux = surface_flux
     solver = DGSEM(polydeg = 4, surface_flux = surface_flux,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 4, surface_flux = surface_flux,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-1.0,)
     coordinates_max = (1.0,)
@@ -31,7 +33,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -59,11 +61,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/eulerquasi_source_terms.jl b/test/tree_dgsem_1d/eulerquasi_source_terms.jl
index 3a86f2a8..f9bc6ac7 100644
--- a/test/tree_dgsem_1d/eulerquasi_source_terms.jl
+++ b/test/tree_dgsem_1d/eulerquasi_source_terms.jl
@@ -14,6 +14,8 @@ include("../test_macros.jl")
     volume_flux = surface_flux
     solver = DGSEM(polydeg = 4, surface_flux = surface_flux,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 4, surface_flux = surface_flux,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = -1.0
     coordinates_max = 1.0
@@ -23,7 +25,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test)
 
     tspan = tspan_gpu = (0.0, 2.0)
@@ -52,11 +54,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/hypdiff_harmonic_nonperiodic.jl b/test/tree_dgsem_1d/hypdiff_harmonic_nonperiodic.jl
index 6c47c780..cabe492d 100644
--- a/test/tree_dgsem_1d/hypdiff_harmonic_nonperiodic.jl
+++ b/test/tree_dgsem_1d/hypdiff_harmonic_nonperiodic.jl
@@ -27,6 +27,7 @@ include("../test_macros.jl")
     boundary_conditions = BoundaryConditionDirichlet(initial_condition)
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = -1.0
     coordinates_max = 2.0
@@ -38,7 +39,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_conditions,
                                         source_terms = source_terms_harmonic)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_conditions,
                                                source_terms = source_terms_harmonic)
 
@@ -68,11 +69,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/hypdiff_nonperiodic.jl b/test/tree_dgsem_1d/hypdiff_nonperiodic.jl
index f4b56b98..6f31453f 100644
--- a/test/tree_dgsem_1d/hypdiff_nonperiodic.jl
+++ b/test/tree_dgsem_1d/hypdiff_nonperiodic.jl
@@ -13,6 +13,7 @@ include("../test_macros.jl")
     boundary_conditions = boundary_condition_poisson_nonperiodic
 
     solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = 0.0
     coordinates_max = 1.0
@@ -24,7 +25,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_conditions,
                                         source_terms = source_terms_poisson_nonperiodic)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_conditions,
                                                source_terms = source_terms_poisson_nonperiodic)
 
@@ -54,11 +55,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/mhd_alfven_wave.jl b/test/tree_dgsem_1d/mhd_alfven_wave.jl
index 15b35cf6..c0d964f8 100644
--- a/test/tree_dgsem_1d/mhd_alfven_wave.jl
+++ b/test/tree_dgsem_1d/mhd_alfven_wave.jl
@@ -14,6 +14,8 @@ include("../test_macros.jl")
     volume_flux = flux_hindenlang_gassner
     solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = 0.0
     coordinates_max = 1.0
@@ -22,7 +24,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 2.0)
     t = t_gpu = 0.0
@@ -50,11 +52,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/mhd_ec.jl b/test/tree_dgsem_1d/mhd_ec.jl
index 306e6fd3..0e8b120c 100644
--- a/test/tree_dgsem_1d/mhd_ec.jl
+++ b/test/tree_dgsem_1d/mhd_ec.jl
@@ -14,6 +14,8 @@ include("../test_macros.jl")
     volume_flux = flux_hindenlang_gassner
     solver = DGSEM(polydeg = 3, surface_flux = flux_hindenlang_gassner,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_hindenlang_gassner,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = 0.0
     coordinates_max = 1.0
@@ -22,7 +24,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -50,11 +52,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_1d/shallowwater_shock.jl b/test/tree_dgsem_1d/shallowwater_shock.jl
index 39b25c74..7c08c014 100644
--- a/test/tree_dgsem_1d/shallowwater_shock.jl
+++ b/test/tree_dgsem_1d/shallowwater_shock.jl
@@ -41,6 +41,7 @@ include("../test_macros.jl")
                                                   hydrostatic_reconstruction_audusse_etal),
                     flux_nonconservative_audusse_etal)
     basis = LobattoLegendreBasis(4)
+    basis_gpu = LobattoLegendreBasisGPU(4)
 
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 0.5,
@@ -52,6 +53,7 @@ include("../test_macros.jl")
                                                      volume_flux_fv = surface_flux)
 
     solver = DGSEM(basis, surface_flux, volume_integral)
+    solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
 
     coordinates_min = -3.0
     coordinates_max = 3.0
@@ -62,7 +64,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_condition)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_condition)
 
     tspan = tspan_gpu = (0.0, 3.0)
@@ -91,11 +93,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/advection_basic.jl b/test/tree_dgsem_2d/advection_basic.jl
index 9a598cd5..e1be9bf6 100644
--- a/test/tree_dgsem_2d/advection_basic.jl
+++ b/test/tree_dgsem_2d/advection_basic.jl
@@ -10,6 +10,7 @@ include("../test_macros.jl")
     equations = LinearScalarAdvectionEquation2D(advection_velocity)
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (-1.0, -1.0)
     coordinates_max = (1.0, 1.0)
@@ -21,7 +22,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
                                         solver)
     semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition_convergence_test,
-                                               solver)
+                                               solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -49,11 +50,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/advection_mortar.jl b/test/tree_dgsem_2d/advection_mortar.jl
index 8cae26cc..f31ac697 100644
--- a/test/tree_dgsem_2d/advection_mortar.jl
+++ b/test/tree_dgsem_2d/advection_mortar.jl
@@ -11,6 +11,7 @@ include("../test_macros.jl")
 
     initial_condition = initial_condition_convergence_test
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (-1.0, -1.0)
     coordinates_max = (1.0, 1.0)
@@ -22,7 +23,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -50,11 +51,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/euler_blob_mortar.jl b/test/tree_dgsem_2d/euler_blob_mortar.jl
index d215c39d..ac268a69 100644
--- a/test/tree_dgsem_2d/euler_blob_mortar.jl
+++ b/test/tree_dgsem_2d/euler_blob_mortar.jl
@@ -40,6 +40,7 @@ include("../test_macros.jl")
     surface_flux = flux_lax_friedrichs
     volume_flux = flux_ranocha
     basis = LobattoLegendreBasis(3)
+    basis_gpu = LobattoLegendreBasisGPU(3)
 
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 0.05,
@@ -52,6 +53,7 @@ include("../test_macros.jl")
                                                      volume_flux_fv = surface_flux)
 
     solver = DGSEM(basis, surface_flux, volume_integral)
+    solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
 
     coordinates_min = (-32.0, -32.0)
     coordinates_max = (32.0, 32.0)
@@ -65,7 +67,7 @@ include("../test_macros.jl")
                     n_cells_max = 100_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 16.0)
     t = t_gpu = 0.0
@@ -93,11 +95,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/euler_ec.jl b/test/tree_dgsem_2d/euler_ec.jl
index f0a89e56..22ca8037 100644
--- a/test/tree_dgsem_2d/euler_ec.jl
+++ b/test/tree_dgsem_2d/euler_ec.jl
@@ -13,6 +13,8 @@ include("../test_macros.jl")
     volume_flux = flux_ranocha
     solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0, -2.0)
     coordinates_max = (2.0, 2.0)
@@ -23,7 +25,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_condition_periodic)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_condition_periodic)
 
     tspan = tspan_gpu = (0.0, 0.4)
@@ -52,11 +54,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/euler_shock.jl b/test/tree_dgsem_2d/euler_shock.jl
index 98f0c5ba..1b6ced88 100644
--- a/test/tree_dgsem_2d/euler_shock.jl
+++ b/test/tree_dgsem_2d/euler_shock.jl
@@ -13,6 +13,7 @@ include("../test_macros.jl")
     surface_flux = flux_lax_friedrichs
     volume_flux = flux_shima_etal
     basis = LobattoLegendreBasis(3)
+    basis_gpu = LobattoLegendreBasisGPU(3)
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 0.5,
                                              alpha_min = 0.001,
@@ -22,6 +23,7 @@ include("../test_macros.jl")
                                                      volume_flux_dg = volume_flux,
                                                      volume_flux_fv = surface_flux)
     solver = DGSEM(basis, surface_flux, volume_integral)
+    solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
 
     coordinates_min = (-2.0, -2.0)
     coordinates_max = (2.0, 2.0)
@@ -30,7 +32,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -58,11 +60,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/euler_source_terms.jl b/test/tree_dgsem_2d/euler_source_terms.jl
index 815287ed..680c9a6b 100644
--- a/test/tree_dgsem_2d/euler_source_terms.jl
+++ b/test/tree_dgsem_2d/euler_source_terms.jl
@@ -10,6 +10,7 @@ include("../test_macros.jl")
 
     initial_condition = initial_condition_convergence_test
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (0.0, 0.0)
     coordinates_max = (2.0, 2.0)
@@ -19,7 +20,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test)
 
     tspan = tspan_gpu = (0.0, 2.0)
@@ -48,11 +49,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/euler_source_terms_nonperiodic.jl b/test/tree_dgsem_2d/euler_source_terms_nonperiodic.jl
index 4a64bca8..13c9deb0 100644
--- a/test/tree_dgsem_2d/euler_source_terms_nonperiodic.jl
+++ b/test/tree_dgsem_2d/euler_source_terms_nonperiodic.jl
@@ -17,6 +17,7 @@ include("../test_macros.jl")
                            y_pos = boundary_condition)
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (0.0, 0.0)
     coordinates_max = (2.0, 2.0)
@@ -28,7 +29,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test,
                                         boundary_conditions = boundary_conditions)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test,
                                                boundary_conditions = boundary_conditions)
 
@@ -58,11 +59,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/eulermulti_ec.jl b/test/tree_dgsem_2d/eulermulti_ec.jl
index 608365f3..574d1dd2 100644
--- a/test/tree_dgsem_2d/eulermulti_ec.jl
+++ b/test/tree_dgsem_2d/eulermulti_ec.jl
@@ -14,6 +14,8 @@ include("../test_macros.jl")
     volume_flux = flux_ranocha
     solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0, -2.0)
     coordinates_max = (2.0, 2.0)
@@ -22,7 +24,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -50,11 +52,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/eulermulti_es.jl b/test/tree_dgsem_2d/eulermulti_es.jl
index 58b55106..c46e1b96 100644
--- a/test/tree_dgsem_2d/eulermulti_es.jl
+++ b/test/tree_dgsem_2d/eulermulti_es.jl
@@ -19,6 +19,8 @@ include("../test_macros.jl")
     volume_flux = flux_ranocha
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0, -2.0)
     coordinates_max = (2.0, 2.0)
@@ -27,7 +29,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -55,11 +57,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/hypdiff_nonperiodic.jl b/test/tree_dgsem_2d/hypdiff_nonperiodic.jl
index 87648203..9a0a1436 100644
--- a/test/tree_dgsem_2d/hypdiff_nonperiodic.jl
+++ b/test/tree_dgsem_2d/hypdiff_nonperiodic.jl
@@ -16,6 +16,7 @@ include("../test_macros.jl")
                            y_pos = boundary_condition_periodic)
 
     solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (0.0, 0.0)
     coordinates_max = (1.0, 1.0)
@@ -27,7 +28,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_conditions,
                                         source_terms = source_terms_poisson_nonperiodic)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_conditions,
                                                source_terms = source_terms_poisson_nonperiodic)
 
@@ -57,11 +58,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/mhd_alfven_wave.jl b/test/tree_dgsem_2d/mhd_alfven_wave.jl
index d8494802..5cb40e62 100644
--- a/test/tree_dgsem_2d/mhd_alfven_wave.jl
+++ b/test/tree_dgsem_2d/mhd_alfven_wave.jl
@@ -15,6 +15,9 @@ include("../test_macros.jl")
     solver = DGSEM(polydeg = 3,
                    surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3,
+                          surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (0.0, 0.0)
     coordinates_max = (sqrt(2.0), sqrt(2.0))
@@ -23,7 +26,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 2.0)
     t = t_gpu = 0.0
@@ -51,11 +54,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/mhd_alfven_wave_mortar.jl b/test/tree_dgsem_2d/mhd_alfven_wave_mortar.jl
index 1392e004..bf038855 100644
--- a/test/tree_dgsem_2d/mhd_alfven_wave_mortar.jl
+++ b/test/tree_dgsem_2d/mhd_alfven_wave_mortar.jl
@@ -16,6 +16,10 @@ include("../test_macros.jl")
                    surface_flux = (flux_hlle,
                                    flux_nonconservative_powell),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3,
+                          surface_flux = (flux_hlle,
+                                          flux_nonconservative_powell),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (0.0, 0.0)
     coordinates_max = (sqrt(2.0), sqrt(2.0))
@@ -27,7 +31,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 2.0)
     t = t_gpu = 0.0
@@ -55,11 +59,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/mhd_ec.jl b/test/tree_dgsem_2d/mhd_ec.jl
index 66e96a1d..dd4340a0 100644
--- a/test/tree_dgsem_2d/mhd_ec.jl
+++ b/test/tree_dgsem_2d/mhd_ec.jl
@@ -14,6 +14,9 @@ include("../test_macros.jl")
     solver = DGSEM(polydeg = 3,
                    surface_flux = (flux_hindenlang_gassner, flux_nonconservative_powell),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3,
+                          surface_flux = (flux_hindenlang_gassner, flux_nonconservative_powell),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0, -2.0)
     coordinates_max = (2.0, 2.0)
@@ -22,7 +25,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -50,11 +53,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/mhd_shock.jl b/test/tree_dgsem_2d/mhd_shock.jl
index 6a96be40..e1a02165 100644
--- a/test/tree_dgsem_2d/mhd_shock.jl
+++ b/test/tree_dgsem_2d/mhd_shock.jl
@@ -14,6 +14,7 @@ include("../test_macros.jl")
     volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell)
     polydeg = 4
     basis = LobattoLegendreBasis(polydeg)
+    basis_gpu = LobattoLegendreBasisGPU(polydeg)
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 0.5,
                                              alpha_min = 0.001,
@@ -25,6 +26,8 @@ include("../test_macros.jl")
 
     solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux,
                    volume_integral = volume_integral)
+    solver_gpu = DGSEMGPU(polydeg = polydeg, surface_flux = surface_flux,
+                          volume_integral = volume_integral)
 
     coordinates_min = (-2.0, -2.0)
     coordinates_max = (2.0, 2.0)
@@ -33,7 +36,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -61,11 +64,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/shallowwater_ec.jl b/test/tree_dgsem_2d/shallowwater_ec.jl
index 6b0dd199..51b02c66 100644
--- a/test/tree_dgsem_2d/shallowwater_ec.jl
+++ b/test/tree_dgsem_2d/shallowwater_ec.jl
@@ -14,6 +14,9 @@ include("../test_macros.jl")
     solver = DGSEM(polydeg = 4,
                    surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 4,
+                          surface_flux = (flux_fjordholm_etal, flux_nonconservative_fjordholm_etal),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-1.0, -1.0)
     coordinates_max = (1.0, 1.0)
@@ -22,7 +25,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 2.0)
     t = t_gpu = 0.0
@@ -50,11 +53,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/shallowwater_source_terms.jl b/test/tree_dgsem_2d/shallowwater_source_terms.jl
index 687646db..d19123c9 100644
--- a/test/tree_dgsem_2d/shallowwater_source_terms.jl
+++ b/test/tree_dgsem_2d/shallowwater_source_terms.jl
@@ -14,6 +14,9 @@ include("../test_macros.jl")
     solver = DGSEM(polydeg = 3,
                    surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3,
+                          surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (0.0, 0.0)
     coordinates_max = (sqrt(2.0), sqrt(2.0))
@@ -24,7 +27,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test)
 
     tspan = tspan_gpu = (0.0, 1.0)
@@ -53,11 +56,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_2d/shawllowwater_source_terms_nonperiodic.jl b/test/tree_dgsem_2d/shawllowwater_source_terms_nonperiodic.jl
index ac36131a..20b956c6 100644
--- a/test/tree_dgsem_2d/shawllowwater_source_terms_nonperiodic.jl
+++ b/test/tree_dgsem_2d/shawllowwater_source_terms_nonperiodic.jl
@@ -16,6 +16,9 @@ include("../test_macros.jl")
     solver = DGSEM(polydeg = 3,
                    surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3,
+                          surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (0.0, 0.0)
     coordinates_max = (sqrt(2.0), sqrt(2.0))
@@ -27,7 +30,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         boundary_conditions = boundary_condition,
                                         source_terms = source_terms_convergence_test)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                boundary_conditions = boundary_condition,
                                                source_terms = source_terms_convergence_test)
 
@@ -57,11 +60,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/advection_basic.jl b/test/tree_dgsem_3d/advection_basic.jl
index 4e06fca8..ba04f10f 100644
--- a/test/tree_dgsem_3d/advection_basic.jl
+++ b/test/tree_dgsem_3d/advection_basic.jl
@@ -10,6 +10,7 @@ include("../test_macros.jl")
     equations = LinearScalarAdvectionEquation3D(advection_velocity)
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (-1.0, -1.0, -1.0)
     coordinates_max = (1.0, 1.0, 1.0)
@@ -21,7 +22,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
                                         solver)
     semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition_convergence_test,
-                                               solver)
+                                               solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -49,11 +50,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/advection_mortar.jl b/test/tree_dgsem_3d/advection_mortar.jl
index 392e22b7..baa496b8 100644
--- a/test/tree_dgsem_3d/advection_mortar.jl
+++ b/test/tree_dgsem_3d/advection_mortar.jl
@@ -11,6 +11,7 @@ include("../test_macros.jl")
 
     initial_condition = initial_condition_convergence_test
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (-1.0, -1.0, -1.0)
     coordinates_max = (1.0, 1.0, 1.0)
@@ -24,7 +25,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 5.0)
     t = t_gpu = 0.0
@@ -52,11 +53,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/euler_convergence.jl b/test/tree_dgsem_3d/euler_convergence.jl
index 072a7b59..405dfab8 100644
--- a/test/tree_dgsem_3d/euler_convergence.jl
+++ b/test/tree_dgsem_3d/euler_convergence.jl
@@ -12,6 +12,8 @@ include("../test_macros.jl")
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_hll,
                    volume_integral = VolumeIntegralWeakForm())
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_hll,
+                          volume_integral = VolumeIntegralWeakForm())
 
     coordinates_min = (0.0, 0.0, 0.0)
     coordinates_max = (2.0, 2.0, 2.0)
@@ -21,7 +23,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_eoc_test_euler)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_eoc_test_euler)
 
     tspan = tspan_gpu = (0.0, 1.0)
@@ -50,11 +52,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/euler_ec.jl b/test/tree_dgsem_3d/euler_ec.jl
index c69429d7..e185185b 100644
--- a/test/tree_dgsem_3d/euler_ec.jl
+++ b/test/tree_dgsem_3d/euler_ec.jl
@@ -13,6 +13,8 @@ include("../test_macros.jl")
     volume_flux = flux_ranocha
     solver = DGSEM(polydeg = 3, surface_flux = flux_ranocha,
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_ranocha,
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0, -2.0, -2.0)
     coordinates_max = (2.0, 2.0, 2.0)
@@ -21,7 +23,7 @@ include("../test_macros.jl")
                     n_cells_max = 100_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -49,11 +51,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/euler_mortar.jl b/test/tree_dgsem_3d/euler_mortar.jl
index 4d5653d3..72af4efe 100644
--- a/test/tree_dgsem_3d/euler_mortar.jl
+++ b/test/tree_dgsem_3d/euler_mortar.jl
@@ -10,6 +10,7 @@ include("../test_macros.jl")
 
     initial_condition = initial_condition_convergence_test
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (0.0, 0.0, 0.0)
     coordinates_max = (2.0, 2.0, 2.0)
@@ -22,7 +23,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test)
 
     tspan = tspan_gpu = (0.0, 1.0)
@@ -51,11 +52,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/euler_shock.jl b/test/tree_dgsem_3d/euler_shock.jl
index 9752bb26..b6dc7fab 100644
--- a/test/tree_dgsem_3d/euler_shock.jl
+++ b/test/tree_dgsem_3d/euler_shock.jl
@@ -15,6 +15,7 @@ include("../test_macros.jl")
 
     polydeg = 3
     basis = LobattoLegendreBasis(polydeg)
+    basis_gpu = LobattoLegendreBasisGPU(polydeg)
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 0.5,
                                              alpha_min = 0.001,
@@ -24,6 +25,7 @@ include("../test_macros.jl")
                                                      volume_flux_dg = volume_flux,
                                                      volume_flux_fv = surface_flux)
     solver = DGSEM(basis, surface_flux, volume_integral)
+    solver_gpu = DGSEMGPU(basis_gpu, surface_flux, volume_integral)
 
     coordinates_min = (-2.0, -2.0, -2.0)
     coordinates_max = (2.0, 2.0, 2.0)
@@ -32,7 +34,7 @@ include("../test_macros.jl")
                     n_cells_max = 100_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -60,11 +62,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/euler_source_terms.jl b/test/tree_dgsem_3d/euler_source_terms.jl
index eeace118..503993c9 100644
--- a/test/tree_dgsem_3d/euler_source_terms.jl
+++ b/test/tree_dgsem_3d/euler_source_terms.jl
@@ -12,6 +12,8 @@ include("../test_macros.jl")
 
     solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs,
                    volume_integral = VolumeIntegralWeakForm())
+    solver_gpu = DGSEMGPU(polydeg = 3, surface_flux = flux_lax_friedrichs,
+                          volume_integral = VolumeIntegralWeakForm())
 
     coordinates_min = (0.0, 0.0, 0.0)
     coordinates_max = (2.0, 2.0, 2.0)
@@ -21,7 +23,7 @@ include("../test_macros.jl")
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_convergence_test)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_convergence_test)
 
     tspan = tspan_gpu = (0.0, 5.0)
@@ -50,11 +52,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/hypdiff_nonperiodic.jl b/test/tree_dgsem_3d/hypdiff_nonperiodic.jl
index a95cc07e..e8f629ff 100644
--- a/test/tree_dgsem_3d/hypdiff_nonperiodic.jl
+++ b/test/tree_dgsem_3d/hypdiff_nonperiodic.jl
@@ -17,6 +17,7 @@ include("../test_macros.jl")
                            z_pos = boundary_condition_periodic)
 
     solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+    solver_gpu = DGSEMGPU(polydeg = 4, surface_flux = flux_lax_friedrichs)
 
     coordinates_min = (0.0, 0.0, 0.0)
     coordinates_max = (1.0, 1.0, 1.0)
@@ -28,7 +29,7 @@ include("../test_macros.jl")
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                         source_terms = source_terms_poisson_nonperiodic,
                                         boundary_conditions = boundary_conditions)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver,
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu,
                                                source_terms = source_terms_poisson_nonperiodic,
                                                boundary_conditions = boundary_conditions)
 
@@ -58,11 +59,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/mhd_alfven_wave.jl b/test/tree_dgsem_3d/mhd_alfven_wave.jl
index 3c24c459..81496c35 100644
--- a/test/tree_dgsem_3d/mhd_alfven_wave.jl
+++ b/test/tree_dgsem_3d/mhd_alfven_wave.jl
@@ -14,6 +14,9 @@ include("../test_macros.jl")
     solver = DGSEM(polydeg = 3,
                    surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3,
+                          surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-1.0, -1.0, -1.0)
     coordinates_max = (1.0, 1.0, 1.0)
@@ -22,7 +25,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -50,11 +53,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/mhd_alfven_wave_mortar.jl b/test/tree_dgsem_3d/mhd_alfven_wave_mortar.jl
index 84ad362d..456d05f8 100644
--- a/test/tree_dgsem_3d/mhd_alfven_wave_mortar.jl
+++ b/test/tree_dgsem_3d/mhd_alfven_wave_mortar.jl
@@ -15,6 +15,10 @@ include("../test_macros.jl")
                    surface_flux = (flux_hlle,
                                    flux_nonconservative_powell),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3,
+                          surface_flux = (flux_hlle,
+                                          flux_nonconservative_powell),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-1.0, -1.0, -1.0)
     coordinates_max = (1.0, 1.0, 1.0)
@@ -26,7 +30,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -54,11 +58,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/mhd_ec.jl b/test/tree_dgsem_3d/mhd_ec.jl
index 991e7338..f4d0bcd4 100644
--- a/test/tree_dgsem_3d/mhd_ec.jl
+++ b/test/tree_dgsem_3d/mhd_ec.jl
@@ -14,6 +14,9 @@ include("../test_macros.jl")
     solver = DGSEM(polydeg = 3,
                    surface_flux = (flux_hindenlang_gassner, flux_nonconservative_powell),
                    volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+    solver_gpu = DGSEMGPU(polydeg = 3,
+                          surface_flux = (flux_hindenlang_gassner, flux_nonconservative_powell),
+                          volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
 
     coordinates_min = (-2.0, -2.0, -2.0)
     coordinates_max = (2.0, 2.0, 2.0)
@@ -22,7 +25,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 0.4)
     t = t_gpu = 0.0
@@ -50,11 +53,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,
diff --git a/test/tree_dgsem_3d/mhd_shock.jl b/test/tree_dgsem_3d/mhd_shock.jl
index 56e7ee20..0e14cd3b 100644
--- a/test/tree_dgsem_3d/mhd_shock.jl
+++ b/test/tree_dgsem_3d/mhd_shock.jl
@@ -14,6 +14,7 @@ include("../test_macros.jl")
     volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell)
     polydeg = 4
     basis = LobattoLegendreBasis(polydeg)
+    basis_gpu = LobattoLegendreBasisGPU(polydeg)
     indicator_sc = IndicatorHennemannGassner(equations, basis,
                                              alpha_max = 0.5,
                                              alpha_min = 0.001,
@@ -25,6 +26,8 @@ include("../test_macros.jl")
 
     solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux,
                    volume_integral = volume_integral)
+    solver_gpu = DGSEMGPU(polydeg = polydeg, surface_flux = surface_flux,
+                          volume_integral = volume_integral)
 
     coordinates_min = (-2.0, -2.0, -2.0)
     coordinates_max = (2.0, 2.0, 2.0)
@@ -33,7 +36,7 @@ include("../test_macros.jl")
                     n_cells_max = 10_000)
 
     semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
-    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver)
+    semi_gpu = SemidiscretizationHyperbolicGPU(mesh, equations, initial_condition, solver_gpu)
 
     tspan = tspan_gpu = (0.0, 1.0)
     t = t_gpu = 0.0
@@ -61,11 +64,8 @@ include("../test_macros.jl")
     u_gpu = TrixiCUDA.wrap_array(u_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
     du_gpu = TrixiCUDA.wrap_array(du_gpu_, mesh_gpu, equations_gpu, solver_gpu, cache_gpu)
 
-    # Tests for components initialization
-    @test_approx (u_gpu, u)
-    # du is initlaizaed as undefined, cannot test now
-
     # Tests for semidiscretization process
+    @test_approx (u_gpu, u) # du is initlaizaed as undefined, cannot test now
     Trixi.reset_du!(du, solver, cache)
 
     TrixiCUDA.cuda_volume_integral!(du_gpu, u_gpu, mesh_gpu,